diff --git a/inventory/group_vars/datagrepper b/inventory/group_vars/datagrepper
index 3d8f2c30da..bb12864e2f 100644
--- a/inventory/group_vars/datagrepper
+++ b/inventory/group_vars/datagrepper
@@ -1,2 +1,16 @@
---
+# Define resources for this group of hosts here.
+lvm_size: 20000
+mem_size: 2048
+num_cpus: 2
+
+# for systems that do not match the above - specify the same parameter in
+# the host_vars/$hostname file
+
+tcp_ports: [ 80, 443, 6996 ]
+# Neeed for rsync from log02 for logs.
+custom_rules: [ '-A INPUT -p tcp -m tcp -s 10.5.126.29 --dport 873 -j ACCEPT', '-A INPUT -p tcp -m tcp -s 192.168.1.56 --dport 873 -j ACCEPT' ]
+
+fas_client_groups: sysadmin-noc,sysadmin-datenommer,fi-apprentice
+
freezes: false
diff --git a/playbooks/groups/datagrepper.yml b/playbooks/groups/datagrepper.yml
new file mode 100644
index 0000000000..e855efe252
--- /dev/null
+++ b/playbooks/groups/datagrepper.yml
@@ -0,0 +1,105 @@
+# create a new datagrepper server
+
+- name: make datagrepper server
+ hosts: datagrepper;datagrepper-stg
+ user: root
+ gather_facts: False
+
+ vars_files:
+ - /srv/web/infra/ansible/vars/global.yml
+ - "{{ private }}/vars.yml"
+ - /srv/web/infra/ansible/vars/{{ ansible_distribution }}.yml
+
+ tasks:
+ - include: "{{ tasks }}/virt_instance_create.yml"
+ - include: "{{ tasks }}/accelerate_prep.yml"
+
+ handlers:
+ - include: "{{ handlers }}/restart_services.yml"
+
+- name: make the box be real
+ hosts: datagrepper;datagrepper-stg
+ user: root
+ gather_facts: True
+ accelerate: "{{ accelerated }}"
+
+ vars_files:
+ - /srv/web/infra/ansible/vars/global.yml
+ - "{{ private }}/vars.yml"
+ - /srv/web/infra/ansible/vars/{{ ansible_distribution }}.yml
+
+ roles:
+ - base
+ - rkhunter
+ - denyhosts
+ - nagios_client
+ - fas_client
+ - collectd/base
+ - fedmsg/base
+ - rsyncd
+ - sudo
+ - datagrepper
+
+ tasks:
+ - include: "{{ tasks }}/hosts.yml"
+ - include: "{{ tasks }}/yumrepos.yml"
+ - include: "{{ tasks }}/2fa_client.yml"
+ - include: "{{ tasks }}/motd.yml"
+ - include: "{{ tasks }}/openvpn_client.yml"
+ when: env != "staging"
+ - include: "{{ tasks }}/apache.yml"
+ - include: "{{ tasks }}/mod_wsgi.yml"
+
+ handlers:
+ - include: "{{ handlers }}/restart_services.yml"
+
+- name: set up gluster server on prod
+ hosts: packages
+ user: root
+ gather_facts: True
+ accelerate: "{{ accelerated }}"
+
+ vars_files:
+ - /srv/web/infra/ansible/vars/global.yml
+ - "{{ private }}/vars.yml"
+ - /srv/web/infra/ansible/vars/{{ ansible_distribution }}.yml
+
+ roles:
+ - role: gluster/server
+ name: gluster
+ username: "{{ packagesglusterusername }}"
+ password: "{{ packagesglusterpassword }}"
+ owner: root
+ group: root
+ datadir: /srv/glusterfs/packages
+
+ handlers:
+ - include: "{{ handlers }}/restart_services.yml"
+
+- name: set up gluster client on prod
+ hosts: packages
+ user: root
+ gather_facts: True
+ accelerate: "{{ accelerated }}"
+
+ vars_files:
+ - /srv/web/infra/ansible/vars/global.yml
+ - "{{ private }}/vars.yml"
+ - /srv/web/infra/ansible/vars/{{ ansible_distribution }}.yml
+
+ roles:
+ - role: gluster/client
+ name: gluster
+ servers:
+ - packages03.phx2.fedoraproject.org
+ - packages04.phx2.fedoraproject.org
+ username: "{{ packagesglusterusername }}"
+ password: "{{ packagesglusterpassword }}"
+ owner: apache
+ group: apache
+ mountdir: /var/cache/fedoracommunity
+
+ handlers:
+ - include: "{{ handlers }}/restart_services.yml"
+
+
diff --git a/roles/datagrepper/files/datagrepper-app.conf b/roles/datagrepper/files/datagrepper-app.conf
new file mode 100644
index 0000000000..eeeb66bdf7
--- /dev/null
+++ b/roles/datagrepper/files/datagrepper-app.conf
@@ -0,0 +1,40 @@
+LoadModule expires_module modules/mod_expires.so
+LoadModule headers_module modules/mod_headers.so
+LoadModule deflate_module modules/mod_deflate.so
+
+ExpiresActive On
+#ExpiresDefault "access plus 300 seconds"
+
+ErrorLog logs/datagrepper_error_log
+CustomLog logs/datagrepper_access_log combined
+
+AddOutputFilterByType DEFLATE text/html text/plain text/xml text/javascript text/css application/x-javascript
+
+# This caching may not necessarily be ideal, or even correct.
+# However, it was the only I could get firebug to show me 302's for
+# my ToscaWidget resources.
+
+ Header unset Cache-Control
+ Header unset Etag
+ Header add Cache-Control "max-age=2592000"
+ #ExpiresDefault A2592000
+
+
+# Static resources for the datagrepper app.
+Alias /datagrepper/css /usr/lib/python2.6/site-packages/datagrepper/static/css
+# Job runner output dir.
+Alias /datagrepper/output /var/cache/datagrepper
+
+WSGIDaemonProcess datagrepper user=fedmsg group=fedmsg maximum-requests=50000 display-name=datagrepper processes=8 threads=4 inactivity-timeout=300
+WSGISocketPrefix run/wsgi
+WSGIRestrictStdout Off
+WSGIRestrictSignal Off
+WSGIPythonOptimize 1
+
+WSGIScriptAlias /datagrepper /usr/share/datagrepper/apache/datagrepper.wsgi
+
+
+ WSGIProcessGroup datagrepper
+ Order deny,allow
+ Allow from all
+
diff --git a/roles/datagrepper/files/datagrepper-fedmsg.py b/roles/datagrepper/files/datagrepper-fedmsg.py
new file mode 100644
index 0000000000..87af8e4a73
--- /dev/null
+++ b/roles/datagrepper/files/datagrepper-fedmsg.py
@@ -0,0 +1,14 @@
+# Configuration for the datagrepper webapp.
+config = {
+ # We don't actually want to run the datanommer consumer on this machine.
+ 'datanommer.enabled': False,
+
+ # Note that this is connecting to db02. That's fine for now, but we want to
+ # move the db for datanommer to a whole other db host in the future. We
+ # expect the amount of data it generates to grow pretty steadily over time
+ # and we don't want *read* operations on that database to slow down all our
+ # other apps.
+ 'datanommer.sqlalchemy.url': 'postgresql://{{ datanommerDBUser }}:{{ datanommerDBPassword }}@db-datanommer/datanommer',
+ 'fedmsg.consumers.datagrepper-runner.enabled': True,
+}
+
diff --git a/roles/datagrepper/files/datagrepper.cfg b/roles/datagrepper/files/datagrepper.cfg
new file mode 100644
index 0000000000..0776e32076
--- /dev/null
+++ b/roles/datagrepper/files/datagrepper.cfg
@@ -0,0 +1,37 @@
+from datetime import timedelta
+
+### Secret key for the Flask application
+SECRET_KEY = '{{ datagrepperCookieSecret }}'
+
+### Unhappy mako
+MAKO_OUTPUT_ENCODING='utf-8'
+
+{% if environment == "staging" %}
+DATAGREPPER_BASE_URL='https://apps.stg.fedoraproject.org/datagrepper/'
+{% else %}
+DATAGREPPER_BASE_URL='https://apps.fedoraproject.org/datagrepper/'
+{% endif %}
+
+DATAGREPPER_CACHE_BACKEND='dogpile.cache.memcached'
+{% if environment == "staging" %}
+DATAGREPPER_CACHE_KWARGS={'arguments': {'url': 'app01.stg.phx2.fedoraproject.org:11211'}}
+{% else %}
+DATAGREPPER_CACHE_KWARGS={'arguments': {'url': ['memcached03.phx2.fedoraproject.org:11211', 'memcached04.phx2.fedoraproject.org:11211']}}
+{% endif %}
+
+{% if environment == "staging" %}
+SQLALCHEMY_DATABASE_URI='postgresql+psycopg2://{{ datagrepper_app_user }}:{{ datagrepper_app_password }}@db02.stg.phx2.fedoraproject.org:5432/datagrepper'
+{% else %}
+SQLALCHEMY_DATABASE_URI='postgresql+psycopg2://{{ datagrepper_app_user }}:{{ datagrepper_app_password }}@db01.phx2.fedoraproject.org:5432/datagrepper'
+{% endif %}
+
+{% if environment == "staging" %}
+DATAGREPPER_OPENID_ENDPOINT='id.stg.fedoraproject.org'
+{% else %}
+DATAGREPPER_OPENID_ENDPOINT='id.fedoraproject.org'
+{% endif %}
+
+RUNNER_LOCKFILE='/var/run/fedmsg/datagrepper.lock'
+JOB_OUTPUT_DIR='/var/cache/datagrepper'
+JOB_OUTPUT_URL='https://apps.{% if environment == "staging" %}stg.{% endif %}fedoraproject.org/datagrepper/output'
+JOB_EXPIRY=timedelta(days=7)
diff --git a/roles/datagrepper/files/fedmsg-hub b/roles/datagrepper/files/fedmsg-hub
new file mode 100644
index 0000000000..e92b4c31dc
--- /dev/null
+++ b/roles/datagrepper/files/fedmsg-hub
@@ -0,0 +1,12 @@
+#!/usr/bin/python
+# puppet/modules/datagrepper/templates/fedmsg-hub.erb
+__requires__ = ['fedmsg', 'SQLAlchemy >= 0.7', 'jinja2 >= 2.4']
+import os
+import sys
+from pkg_resources import load_entry_point
+os.environ['DATAGREPPER_CONFIG'] = '/etc/datagrepper/datagrepper.cfg'
+
+if __name__ == '__main__':
+ sys.exit(
+ load_entry_point('fedmsg', 'console_scripts', 'fedmsg-hub')()
+ )
diff --git a/roles/datagrepper/tasks/main.yml b/roles/datagrepper/tasks/main.yml
new file mode 100644
index 0000000000..e53341e822
--- /dev/null
+++ b/roles/datagrepper/tasks/main.yml
@@ -0,0 +1,66 @@
+- name: install datagrepper and required packages
+ yum: pkg={{ item }} state=installed
+ with_items:
+ - datagrepper
+ - python-psycopg2
+ tags:
+ - packages
+ - datagrepper
+
+- name: creating datagrepper config dir
+ file: path=/etc/datagrepper state=directory owner=root more=755
+ tags:
+ - config
+ - datagrepper
+
+- name: setting up datagrepper config from the template
+ template: src=datagrepper.cfg dest=/etc/datagrepper/datagrepper.cfg owner=fedmsg group=fedmsg mode=640
+ tags:
+ - config
+ - datagrepper
+
+- name: setting up credential from the template
+ template: src=datagrepper-fedmsg.py dest=/etc/fedmsg.d/datagrepper.py owner=fedmsg group=fedmsg mode=640
+ tags:
+ - config
+ - datagrepper
+
+- name: installing memcached package for /topics
+ yum: pkg=python-memcached state=installed
+ tags:
+ - packages
+ - datagrepper
+
+- name: setting up httpd config
+ template: src=datagrepper-app.conf dest=/etc/httpd/conf.d/datagrepper.conf owner=root group=root
+ notify:
+ - restart httpd
+ tags:
+ - config
+ - datagrepper
+
+- name: adding fedmsg-hub binary
+ file: src=fedmsg-hub dest=/usr/bin/fedmsg-hub owner=root group=root mode=755
+ notify:
+ - restart fedmsg-hub
+ tags:
+ - config
+ - datagrepper
+
+- name: ensure selinux lets httpd talk to the database
+ seboolean: name=httpd_can_network_connect_db persistent=yes state=yes
+ tags:
+ - datagrepper
+
+- name: ensure selinux lets httpd talk to the memcache
+ seboolean: name=httpd_can_network_memcache persistent=yes state=yes
+ tags:
+ - datagrepper
+
+- name: adding datagrepper dir in httpd context
+ command: semanage fcontext -a -t httpd_cache_t "/var/cache/datagrepper(/.*)?"
+ tags:
+ - datagrepper
+
+# selinux policy has been intentionally omitted since that is obtained from fedmsg/base
+