diff --git a/inventory/host_vars/mailman01.phx2.fedoraproject.org b/inventory/host_vars/mailman01.phx2.fedoraproject.org index 1e45bac384..37f7ac8707 100644 --- a/inventory/host_vars/mailman01.phx2.fedoraproject.org +++ b/inventory/host_vars/mailman01.phx2.fedoraproject.org @@ -9,3 +9,8 @@ eth0_ip: 10.5.126.36 vmhost: virthost17.phx2.fedoraproject.org datacenter: phx2 lvm_size: 750000 + +# GDPR SAR variables +sar_script: /srv/webui/bin/hyperkitty-sar.py +sar_script_user: apache +sar_output_file: mailinglists.json diff --git a/inventory/inventory b/inventory/inventory index c3f4d61ec4..86ff64d061 100644 --- a/inventory/inventory +++ b/inventory/inventory @@ -41,6 +41,7 @@ retrace02.qa.fedoraproject.org [sar] bodhi-backend02.phx2.fedoraproject.org +mailman01.phx2.fedoraproject.org [certgetter] certgetter01.phx2.fedoraproject.org diff --git a/roles/mailman/files/hyperkitty-sar.py b/roles/mailman/files/hyperkitty-sar.py new file mode 100644 index 0000000000..29dc9349f5 --- /dev/null +++ b/roles/mailman/files/hyperkitty-sar.py @@ -0,0 +1,79 @@ +#!/usr/bin/env python +""" +GDPR SAR script for HyperKitty. + +Extract all emails from a selected address and prints them in JSON to the +standard output. +""" + +from __future__ import absolute_import, unicode_literals, print_function + +import argparse +import json +import logging +import os +import sys + +import requests +from six.moves.urllib.parse import urljoin + + +ENV_EMAIL = "GDPR_EMAIL" +HYPERKITTY_INSTANCE = "http://localhost/archives/" + +log = logging.getLogger() + + +def get_emails(address): + url = urljoin(HYPERKITTY_INSTANCE, "api/sender/{}/emails/".format(address)) + result = {"next": url} + count = None + email_urls = [] + while result.get("next"): + url = result["next"] + response = requests.get(url) + if response.status_code >= 300: + log.error("Could not get URL %s: %d %s", + url, response.status_code, response.reason) + break + result = response.json() + if count is None: + count = result["count"] + email_urls.extend([e["url"] for e in result["results"]]) + if count != len(email_urls): + log.error("Mismatch in the number of emails: got %s but there are " + "%s in total.", len(email_urls), count) + raise ValueError + emails = [] + for url in email_urls: + response = requests.get(url) + result = response.json() + emails.append(result) + return emails + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--debug", action="store_true") + return parser.parse_args() + + +def main(): + args = parse_args() + try: + email = os.environ[ENV_EMAIL] + except KeyError as e: + print("Missing environment variable. {}".format(e), file=sys.stderr) + sys.exit(1) + logging.basicConfig( + level=logging.DEBUG if args.debug else logging.WARNING, + stream=sys.stderr, + ) + emails = get_emails(email) + print(json.dumps(dict( + emails=emails, count=len(emails), + ), indent=2)) + + +if __name__ == "__main__": + main() diff --git a/roles/mailman/tasks/main.yml b/roles/mailman/tasks/main.yml index 882ed98327..7b1bdd0456 100644 --- a/roles/mailman/tasks/main.yml +++ b/roles/mailman/tasks/main.yml @@ -427,6 +427,7 @@ - post-update.sh - import-mm2.py - periodic.py + - hyperkitty-sar.py - name: install the templatized scripts template: src={{ item }}.j2 dest="{{ mailman_webui_basedir }}/bin/{{ item }}"