From e470c9948e80871bda10ed3073c9b6cffd4c4003 Mon Sep 17 00:00:00 2001 From: Aditya adimania Patawari Date: Tue, 8 Jul 2014 15:35:43 +0000 Subject: [PATCH] Initial port of datagrepper from puppet to ansible. https://fedorahosted.org/fedora-infrastructure/ticket/4393 --- inventory/group_vars/datagrepper | 14 +++ playbooks/groups/datagrepper.yml | 105 ++++++++++++++++++ roles/datagrepper/files/datagrepper-app.conf | 40 +++++++ roles/datagrepper/files/datagrepper-fedmsg.py | 14 +++ roles/datagrepper/files/datagrepper.cfg | 37 ++++++ roles/datagrepper/files/fedmsg-hub | 12 ++ roles/datagrepper/tasks/main.yml | 66 +++++++++++ 7 files changed, 288 insertions(+) create mode 100644 playbooks/groups/datagrepper.yml create mode 100644 roles/datagrepper/files/datagrepper-app.conf create mode 100644 roles/datagrepper/files/datagrepper-fedmsg.py create mode 100644 roles/datagrepper/files/datagrepper.cfg create mode 100644 roles/datagrepper/files/fedmsg-hub create mode 100644 roles/datagrepper/tasks/main.yml diff --git a/inventory/group_vars/datagrepper b/inventory/group_vars/datagrepper index 3d8f2c30da..bb12864e2f 100644 --- a/inventory/group_vars/datagrepper +++ b/inventory/group_vars/datagrepper @@ -1,2 +1,16 @@ --- +# Define resources for this group of hosts here. +lvm_size: 20000 +mem_size: 2048 +num_cpus: 2 + +# for systems that do not match the above - specify the same parameter in +# the host_vars/$hostname file + +tcp_ports: [ 80, 443, 6996 ] +# Neeed for rsync from log02 for logs. +custom_rules: [ '-A INPUT -p tcp -m tcp -s 10.5.126.29 --dport 873 -j ACCEPT', '-A INPUT -p tcp -m tcp -s 192.168.1.56 --dport 873 -j ACCEPT' ] + +fas_client_groups: sysadmin-noc,sysadmin-datenommer,fi-apprentice + freezes: false diff --git a/playbooks/groups/datagrepper.yml b/playbooks/groups/datagrepper.yml new file mode 100644 index 0000000000..e855efe252 --- /dev/null +++ b/playbooks/groups/datagrepper.yml @@ -0,0 +1,105 @@ +# create a new datagrepper server + +- name: make datagrepper server + hosts: datagrepper;datagrepper-stg + user: root + gather_facts: False + + vars_files: + - /srv/web/infra/ansible/vars/global.yml + - "{{ private }}/vars.yml" + - /srv/web/infra/ansible/vars/{{ ansible_distribution }}.yml + + tasks: + - include: "{{ tasks }}/virt_instance_create.yml" + - include: "{{ tasks }}/accelerate_prep.yml" + + handlers: + - include: "{{ handlers }}/restart_services.yml" + +- name: make the box be real + hosts: datagrepper;datagrepper-stg + user: root + gather_facts: True + accelerate: "{{ accelerated }}" + + vars_files: + - /srv/web/infra/ansible/vars/global.yml + - "{{ private }}/vars.yml" + - /srv/web/infra/ansible/vars/{{ ansible_distribution }}.yml + + roles: + - base + - rkhunter + - denyhosts + - nagios_client + - fas_client + - collectd/base + - fedmsg/base + - rsyncd + - sudo + - datagrepper + + tasks: + - include: "{{ tasks }}/hosts.yml" + - include: "{{ tasks }}/yumrepos.yml" + - include: "{{ tasks }}/2fa_client.yml" + - include: "{{ tasks }}/motd.yml" + - include: "{{ tasks }}/openvpn_client.yml" + when: env != "staging" + - include: "{{ tasks }}/apache.yml" + - include: "{{ tasks }}/mod_wsgi.yml" + + handlers: + - include: "{{ handlers }}/restart_services.yml" + +- name: set up gluster server on prod + hosts: packages + user: root + gather_facts: True + accelerate: "{{ accelerated }}" + + vars_files: + - /srv/web/infra/ansible/vars/global.yml + - "{{ private }}/vars.yml" + - /srv/web/infra/ansible/vars/{{ ansible_distribution }}.yml + + roles: + - role: gluster/server + name: gluster + username: "{{ packagesglusterusername }}" + password: "{{ packagesglusterpassword }}" + owner: root + group: root + datadir: /srv/glusterfs/packages + + handlers: + - include: "{{ handlers }}/restart_services.yml" + +- name: set up gluster client on prod + hosts: packages + user: root + gather_facts: True + accelerate: "{{ accelerated }}" + + vars_files: + - /srv/web/infra/ansible/vars/global.yml + - "{{ private }}/vars.yml" + - /srv/web/infra/ansible/vars/{{ ansible_distribution }}.yml + + roles: + - role: gluster/client + name: gluster + servers: + - packages03.phx2.fedoraproject.org + - packages04.phx2.fedoraproject.org + username: "{{ packagesglusterusername }}" + password: "{{ packagesglusterpassword }}" + owner: apache + group: apache + mountdir: /var/cache/fedoracommunity + + handlers: + - include: "{{ handlers }}/restart_services.yml" + + diff --git a/roles/datagrepper/files/datagrepper-app.conf b/roles/datagrepper/files/datagrepper-app.conf new file mode 100644 index 0000000000..eeeb66bdf7 --- /dev/null +++ b/roles/datagrepper/files/datagrepper-app.conf @@ -0,0 +1,40 @@ +LoadModule expires_module modules/mod_expires.so +LoadModule headers_module modules/mod_headers.so +LoadModule deflate_module modules/mod_deflate.so + +ExpiresActive On +#ExpiresDefault "access plus 300 seconds" + +ErrorLog logs/datagrepper_error_log +CustomLog logs/datagrepper_access_log combined + +AddOutputFilterByType DEFLATE text/html text/plain text/xml text/javascript text/css application/x-javascript + +# This caching may not necessarily be ideal, or even correct. +# However, it was the only I could get firebug to show me 302's for +# my ToscaWidget resources. + + Header unset Cache-Control + Header unset Etag + Header add Cache-Control "max-age=2592000" + #ExpiresDefault A2592000 + + +# Static resources for the datagrepper app. +Alias /datagrepper/css /usr/lib/python2.6/site-packages/datagrepper/static/css +# Job runner output dir. +Alias /datagrepper/output /var/cache/datagrepper + +WSGIDaemonProcess datagrepper user=fedmsg group=fedmsg maximum-requests=50000 display-name=datagrepper processes=8 threads=4 inactivity-timeout=300 +WSGISocketPrefix run/wsgi +WSGIRestrictStdout Off +WSGIRestrictSignal Off +WSGIPythonOptimize 1 + +WSGIScriptAlias /datagrepper /usr/share/datagrepper/apache/datagrepper.wsgi + + + WSGIProcessGroup datagrepper + Order deny,allow + Allow from all + diff --git a/roles/datagrepper/files/datagrepper-fedmsg.py b/roles/datagrepper/files/datagrepper-fedmsg.py new file mode 100644 index 0000000000..87af8e4a73 --- /dev/null +++ b/roles/datagrepper/files/datagrepper-fedmsg.py @@ -0,0 +1,14 @@ +# Configuration for the datagrepper webapp. +config = { + # We don't actually want to run the datanommer consumer on this machine. + 'datanommer.enabled': False, + + # Note that this is connecting to db02. That's fine for now, but we want to + # move the db for datanommer to a whole other db host in the future. We + # expect the amount of data it generates to grow pretty steadily over time + # and we don't want *read* operations on that database to slow down all our + # other apps. + 'datanommer.sqlalchemy.url': 'postgresql://{{ datanommerDBUser }}:{{ datanommerDBPassword }}@db-datanommer/datanommer', + 'fedmsg.consumers.datagrepper-runner.enabled': True, +} + diff --git a/roles/datagrepper/files/datagrepper.cfg b/roles/datagrepper/files/datagrepper.cfg new file mode 100644 index 0000000000..0776e32076 --- /dev/null +++ b/roles/datagrepper/files/datagrepper.cfg @@ -0,0 +1,37 @@ +from datetime import timedelta + +### Secret key for the Flask application +SECRET_KEY = '{{ datagrepperCookieSecret }}' + +### Unhappy mako +MAKO_OUTPUT_ENCODING='utf-8' + +{% if environment == "staging" %} +DATAGREPPER_BASE_URL='https://apps.stg.fedoraproject.org/datagrepper/' +{% else %} +DATAGREPPER_BASE_URL='https://apps.fedoraproject.org/datagrepper/' +{% endif %} + +DATAGREPPER_CACHE_BACKEND='dogpile.cache.memcached' +{% if environment == "staging" %} +DATAGREPPER_CACHE_KWARGS={'arguments': {'url': 'app01.stg.phx2.fedoraproject.org:11211'}} +{% else %} +DATAGREPPER_CACHE_KWARGS={'arguments': {'url': ['memcached03.phx2.fedoraproject.org:11211', 'memcached04.phx2.fedoraproject.org:11211']}} +{% endif %} + +{% if environment == "staging" %} +SQLALCHEMY_DATABASE_URI='postgresql+psycopg2://{{ datagrepper_app_user }}:{{ datagrepper_app_password }}@db02.stg.phx2.fedoraproject.org:5432/datagrepper' +{% else %} +SQLALCHEMY_DATABASE_URI='postgresql+psycopg2://{{ datagrepper_app_user }}:{{ datagrepper_app_password }}@db01.phx2.fedoraproject.org:5432/datagrepper' +{% endif %} + +{% if environment == "staging" %} +DATAGREPPER_OPENID_ENDPOINT='id.stg.fedoraproject.org' +{% else %} +DATAGREPPER_OPENID_ENDPOINT='id.fedoraproject.org' +{% endif %} + +RUNNER_LOCKFILE='/var/run/fedmsg/datagrepper.lock' +JOB_OUTPUT_DIR='/var/cache/datagrepper' +JOB_OUTPUT_URL='https://apps.{% if environment == "staging" %}stg.{% endif %}fedoraproject.org/datagrepper/output' +JOB_EXPIRY=timedelta(days=7) diff --git a/roles/datagrepper/files/fedmsg-hub b/roles/datagrepper/files/fedmsg-hub new file mode 100644 index 0000000000..e92b4c31dc --- /dev/null +++ b/roles/datagrepper/files/fedmsg-hub @@ -0,0 +1,12 @@ +#!/usr/bin/python +# puppet/modules/datagrepper/templates/fedmsg-hub.erb +__requires__ = ['fedmsg', 'SQLAlchemy >= 0.7', 'jinja2 >= 2.4'] +import os +import sys +from pkg_resources import load_entry_point +os.environ['DATAGREPPER_CONFIG'] = '/etc/datagrepper/datagrepper.cfg' + +if __name__ == '__main__': + sys.exit( + load_entry_point('fedmsg', 'console_scripts', 'fedmsg-hub')() + ) diff --git a/roles/datagrepper/tasks/main.yml b/roles/datagrepper/tasks/main.yml new file mode 100644 index 0000000000..e53341e822 --- /dev/null +++ b/roles/datagrepper/tasks/main.yml @@ -0,0 +1,66 @@ +- name: install datagrepper and required packages + yum: pkg={{ item }} state=installed + with_items: + - datagrepper + - python-psycopg2 + tags: + - packages + - datagrepper + +- name: creating datagrepper config dir + file: path=/etc/datagrepper state=directory owner=root more=755 + tags: + - config + - datagrepper + +- name: setting up datagrepper config from the template + template: src=datagrepper.cfg dest=/etc/datagrepper/datagrepper.cfg owner=fedmsg group=fedmsg mode=640 + tags: + - config + - datagrepper + +- name: setting up credential from the template + template: src=datagrepper-fedmsg.py dest=/etc/fedmsg.d/datagrepper.py owner=fedmsg group=fedmsg mode=640 + tags: + - config + - datagrepper + +- name: installing memcached package for /topics + yum: pkg=python-memcached state=installed + tags: + - packages + - datagrepper + +- name: setting up httpd config + template: src=datagrepper-app.conf dest=/etc/httpd/conf.d/datagrepper.conf owner=root group=root + notify: + - restart httpd + tags: + - config + - datagrepper + +- name: adding fedmsg-hub binary + file: src=fedmsg-hub dest=/usr/bin/fedmsg-hub owner=root group=root mode=755 + notify: + - restart fedmsg-hub + tags: + - config + - datagrepper + +- name: ensure selinux lets httpd talk to the database + seboolean: name=httpd_can_network_connect_db persistent=yes state=yes + tags: + - datagrepper + +- name: ensure selinux lets httpd talk to the memcache + seboolean: name=httpd_can_network_memcache persistent=yes state=yes + tags: + - datagrepper + +- name: adding datagrepper dir in httpd context + command: semanage fcontext -a -t httpd_cache_t "/var/cache/datagrepper(/.*)?" + tags: + - datagrepper + +# selinux policy has been intentionally omitted since that is obtained from fedmsg/base +