diff --git a/inventory/host_vars/db-datanommer02.phx2.fedoraproject.org b/inventory/host_vars/db-datanommer02.phx2.fedoraproject.org index 93a01783bc..64b977e9e9 100644 --- a/inventory/host_vars/db-datanommer02.phx2.fedoraproject.org +++ b/inventory/host_vars/db-datanommer02.phx2.fedoraproject.org @@ -30,3 +30,8 @@ kernel_shmmax: 68719476736 db_backup_dir: ['/backups'] shared_buffers: "4GB" effective_cache_size: "12GB" + +# GDPR SAR variables - datanommer/datagrepper +sar_script: /usr/local/bin/datagrepper_sar.py +sar_script_user: root +sar_output_file: datagrepper.csv diff --git a/inventory/host_vars/pgbdr01.stg.phx2.fedoraproject.org b/inventory/host_vars/pgbdr01.stg.phx2.fedoraproject.org index 1a25996d9e..f01975aeb4 100644 --- a/inventory/host_vars/pgbdr01.stg.phx2.fedoraproject.org +++ b/inventory/host_vars/pgbdr01.stg.phx2.fedoraproject.org @@ -47,3 +47,9 @@ keepalived_interface: eth0 keepalived_priority: 100 keepalived_ipaddress: 10.5.128.171/24 keepalived_routerid: 19 + + +# GDPR SAR variables - datanommer/datagrepper +sar_script: /usr/local/bin/datagrepper_sar.py +sar_script_user: root +sar_output_file: datagrepper.csv diff --git a/roles/postgresql_server/files/datagrepper_sar.py b/roles/postgresql_server/files/datagrepper_sar.py new file mode 100644 index 0000000000..6e7e34919a --- /dev/null +++ b/roles/postgresql_server/files/datagrepper_sar.py @@ -0,0 +1,57 @@ +#!/usr/bin/python + +from __future__ import unicode_literals, print_function + +import os +import random +import string +import subprocess +import sys +import tempfile + + +def main(): + ''' Prints out all the datagrepper messages related to the username + specified in the SAR_USERNAME environment variable. + If no such environment variable is available, the script will bail. + ''' + + username = os.getenv('SAR_USERNAME') + if not username: + print('An username is required to query datagrepper') + return 1 + + tempfilename = '/tmp/sar_{0}_{1}'.format(username, ''.join( + [random.choice(string.ascii_letters + string.digits) + for n in xrange(10)] + )) + + # Get all messages related to this user. + query = ''' +COPY ( + SELECT DISTINCT messages FROM messages WHERE + messages.id IN ( + SELECT messages.id + FROM messages, user_messages + WHERE messages.id = user_messages.msg + AND user_messages.username = '{username}' + UNION + SELECt messages.id + FROM messages + WHERE messages.username = '{username}' + ) +) +TO '{tmpfile}' delimiter ',' CSV header; +''' + query = query.format(username=username, tmpfile=tempfilename) + command = ['sudo', '-u', 'postgres', 'psql', 'datanommer', '-c', '"%s"' % query] + subprocess.check_call( + ' '.join(command), shell=True, stdout=subprocess.PIPE) + with open(tempfilename) as stream: + data = stream.read() + os.unlink(tempfilename) + print(data) + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/roles/postgresql_server/tasks/main.yml b/roles/postgresql_server/tasks/main.yml index ec3d2d7681..9dd6cca83a 100644 --- a/roles/postgresql_server/tasks/main.yml +++ b/roles/postgresql_server/tasks/main.yml @@ -140,3 +140,13 @@ tags: - cron - postgresql + +- name: Set up datanommer/datagrepper SAR script + copy: > + src=datagrepper_sar.py + dest=/usr/local/bin/datagrepper_sar.py mode=0700 + when: inventory_hostname.startswith(('db-datanommer02', 'pgbdr01.stg')) + tags: + - postgresql + - SAR + - GBDR