diff --git a/roles/distgit/pagure/templates/pagure-sync-bugzilla.py.j2 b/roles/distgit/pagure/templates/pagure-sync-bugzilla.py.j2 index db9f5bc04c..6d83cf5833 100644 --- a/roles/distgit/pagure/templates/pagure-sync-bugzilla.py.j2 +++ b/roles/distgit/pagure/templates/pagure-sync-bugzilla.py.j2 @@ -27,7 +27,7 @@ sync information from the Pagure into bugzilla This short script takes information about package onwership and imports it into bugzilla. ''' - +from __future__ import print_function import re import argparse import datetime @@ -39,6 +39,9 @@ import json import xmlrpclib import codecs import smtplib +import multiprocessing.pool +from math import ceil +from functools import partial try: from email.Message import Message except ImportError: @@ -483,8 +486,10 @@ def _get_watchers_rv_json(pagure_project): print('Querying {0}'.format(watchers_api_url)) watchers_rv = session.get(watchers_api_url, timeout=60) if not watchers_rv.ok: - error_msg = base_error_msg.format( - watchers_api_url, watchers_rv.status_code, watchers_rv.text) + error_msg = ('The connection to "{0}" failed with the status code {1} ' + 'and output "{2}"'.format( + watchers_api_url, watchers_rv.status_code, + watchers_rv.text)) raise RuntimeError(error_msg) return watchers_rv.json() @@ -507,8 +512,10 @@ def _is_retired_in_pdc(product, project): raise RuntimeError("Could not find %r in PDC." % project) branches = pdc_rv.json()['results'] if not branches: - raise RuntimeError("No results for %r in PDC." % project) - return branches[0]['active'] + print("No results for %r in PDC." % project, file=sys.stderr) + # Assume it's not retired if we can't find out for sure + return False + return not branches[0]['active'] @cache.cache_on_arguments() @@ -526,18 +533,100 @@ def _get_override_yaml(project): return {} -def pagure_project_to_acl_schema(pagure_project, product): +@cache.cache_on_arguments() +def _get_package_summary_from_mdapi(namespace, repo, session=None): + summary = None + if namespace != 'rpms': + return summary + + if session is None: + session = retry_session() + + url = '{0}/rawhide/srcpkg/{1}'.format(MDAPIURL.rstrip('/'), repo) + if DRY_RUN: + print('Querying {0}'.format(url)) + + rv = session.get(url, timeout=60) + if rv.ok: + rv_json = rv.json() + summary = rv_json['summary'] + elif not rv.ok and rv.status_code != 404: + error_msg = ('The connection to "{0}" failed with the status code {1} ' + 'and output "{2}"').format(url, rv.status_code, rv.text) + raise RuntimeError(error_msg) + + return summary + + +def _get_pdc_project_name_and_branches(session, namespace, repo): + """ + Gets the branches on a project. This function is used for mapping. + :param namespace: string of the namespace the project is in + :param repo: string of the project + :return: a tuple with the repo name and a list of the repo's branches + """ + branches_url = '{0}component-branches/'.format(PDCURL) + params = dict( + global_component=repo, + type=PDC_TYPES[namespace] + ) + if DRY_RUN: + print('Querying {0} {1}'.format(branches_url, params)) + rv = session.get(branches_url, timeout=60) + + # If the project's branches can't be reported, just return no branches and + # it will be skipped later on + if not rv.ok: + print(('The connection to "{0}" failed with the status code {1} and ' + 'output "{2}"'.format(branches_url, rv.status_code, rv.text)), + file = sys.stderr) + return repo, [] + + data = rv.json() + return repo, [branch['name'] for branch in data['results']] + + +def _get_pagure_projects_from_page(session, namespace, page): + """ + Gets the names of all the Pagure projects on a page. This function is to be + used for mapping. + :param namespace: string of the namespace to query for projects + :param page: int of the page to query at + :return: list of projects on the page + """ + url = ('{0}/api/0/projects?namespace={1}&page={2}&per_page=100&' + 'fork=false'.format( + PAGURE_DIST_GIT_URL.rstrip('/'), namespace, page)) + + if DRY_RUN: + print('- Querying {0}'.format(url)) + + response = session.get(url, timeout=120) + if not bool(response): + print("Failed to talk to %r %r." % ( + response.request.url, response), file=sys.stderr) + raise RuntimeError('Failed to talk to {0} {1}.'.format( + response.request.url, response)) + + return response.json()['projects'] + + +def _pagure_project_to_acl_schema(project_and_product, session=None): """ This function translates the JSON of a Pagure project to what PkgDB used to - output in the Bugzilla API. - :param pagure_project: a dictionary of the JSON of a Pagure project - :return: a dictionary of the content that the Bugzilla API would output + output in the Bugzilla API. This function is used for mapping. + :param project_and_product: a tuple containing the dictionary of the JSON + of a Pagure project and a string of the product (e.g. "Fedora", + "Fedora EPEL") + :param session: a requests session object or None + :return: a dictionary of the content that the PkgDB Bugzilla API would + return """ - session = retry_session() - base_error_msg = ('The connection to "{0}" failed with the status code ' - '{1} and output "{2}"') + project, product = project_and_product + if session is None: + session = retry_session() - watchers_rv_json = _get_watchers_rv_json(pagure_project) + watchers_rv_json = _get_watchers_rv_json(project) user_cc_list = [] for user, watch_levels in watchers_rv_json['watchers'].items(): @@ -545,28 +634,15 @@ def pagure_project_to_acl_schema(pagure_project, product): if 'issues' in watch_levels: user_cc_list.append(user) - summary = None - if pagure_project['namespace'] == 'rpms': - mdapi_url = '{0}/rawhide/srcpkg/{1}'.format( - MDAPIURL.rstrip('/'), pagure_project['name']) - if DRY_RUN: - print('Querying {0}'.format(mdapi_url)) - mdapi_rv = session.get(mdapi_url, timeout=60) - if mdapi_rv.ok: - mdapi_rv_json = mdapi_rv.json() - summary = mdapi_rv_json['summary'] - elif not mdapi_rv.ok and mdapi_rv.status_code != 404: - error_msg = base_error_msg.format( - mdapi_url, mdapi_rv.status_code, mdapi_rv.text) - raise RuntimeError(error_msg) + summary = _get_package_summary_from_mdapi( + project['namespace'], project['name'], session) - # Check if the branch is retired in PDC, and if so set assignee to orphan. - owner = pagure_project['access_users']['owner'][0] + # Check if the project is retired in PDC, and if so set assignee to orphan. + owner = project['access_users']['owner'][0] if _is_retired_in_pdc(product, project): owner = 'extras-orphan@fedoraproject.org' # Check if the Bugzilla ticket assignee has been manually overridden - owner = pagure_project['access_users']['owner'][0] override_yaml = _get_override_yaml(project) if override_yaml.get(product) \ and isinstance(override_yaml[product], string_types): @@ -583,7 +659,11 @@ def pagure_project_to_acl_schema(pagure_project, product): # No package has this set in PkgDB's API, so it can be safely turned # off and set to the defaults later on in the code 'qacontact': None, - 'summary': summary + 'summary': summary, + # These two values are not part of original PkgDB RV, but they are + # useful + 'product': product, + 'project': project['name'] } @@ -610,57 +690,99 @@ if __name__ == '__main__': 'Fedora Container': {}, 'Fedora EPEL': {}, } - pagure_rpms_api_url = ('{0}/api/0/projects?fork=false&namespace=rpms&page=1&' - 'per_page=100'.format( - PAGURE_DIST_GIT_URL.rstrip('/'))) + session = retry_session() + pagure_namespace_to_project_lists = {} + pool = multiprocessing.pool.ThreadPool(8) - while True: + # Query for all the rpm and container projects and store them in + # pagure_namespace_to_projects + for namespace in ['rpms', 'container']: + first_page_url = ('{0}/api/0/projects?namespace={1}&fork=false&page=1' + '&per_page=1'.format(PAGURE_DIST_GIT_URL, namespace)) if DRY_RUN: - print('Querying {0}'.format(pagure_rpms_api_url)) - rv_json = session.get(pagure_rpms_api_url, timeout=120).json() - for project in rv_json['projects']: - pagure_project_branches_api_url = ( - '{0}/api/0/rpms/{1}/git/branches' - .format(PAGURE_DIST_GIT_URL.rstrip('/'), project['name'])) - branch_rv_json = session.get( - pagure_project_branches_api_url, timeout=60).json() - epel = False - fedora = False - for branch in branch_rv_json['branches']: - if re.match(r'epel\d+', branch): - epel = True - projects_dict['Fedora EPEL'][project['name']] = \ - pagure_project_to_acl_schema(project, 'Fedora EPEL') - else: - fedora = True - projects_dict['Fedora'][project['name']] = \ - pagure_project_to_acl_schema(project, 'Fedora') + print('- Querying {0}'.format(first_page_url)) + first_page_rv = session.get(first_page_url, timeout=120) - if fedora and epel: - break + if not bool(first_page_rv): + raise RuntimeError('Failed to talk to {0} {1}.'.format( + first_page_rv.request.url, first_page_rv)) - if rv_json['pagination']['next']: - pagure_rpms_api_url = rv_json['pagination']['next'] - else: - break + total_projects = first_page_rv.json()['total_projects'] + num_pages = int(ceil(total_projects / 100.0)) - pagure_container_api_url = ( - '{0}/api/0/projects?fork=false&namespace=container&page=1&per_page=100' - .format(PAGURE_DIST_GIT_URL)) - while True: - if DRY_RUN: - print('Querying {0}'.format(pagure_container_api_url)) - rv_json = session.get(pagure_container_api_url, timeout=120).json() - for project in rv_json['projects']: - project_pkgdb_schema = pagure_project_to_acl_schema(project) - projects_dict['Fedora Container'][project['name']] = \ - project_pkgdb_schema + # Since we are going to multi-thread, we need to make a partial + # function call so that all the function needs is an iterable to run + p_get_pagure_projects_from_page = partial( + _get_pagure_projects_from_page, session, namespace) + pagure_namespace_to_project_lists[namespace] = pool.map( + p_get_pagure_projects_from_page, range(1, num_pages + 1)) - if rv_json['pagination']['next']: - pagure_container_api_url = rv_json['pagination']['next'] - else: - break + # Flatten the list of lists (each page is a list of a projects) + pagure_namespace_to_projects = {} + for namespace in ['rpms', 'container']: + pagure_namespace_to_projects[namespace] = [] + for project_list in pagure_namespace_to_project_lists[namespace]: + pagure_namespace_to_projects[namespace] += project_list + # This is no longer needed, so we can save some RAM + del pagure_namespace_to_project_lists + + # Now, we must get all the branches for the RPM projects we just queried. + # This will be stored in pagure_rpm_project_branches as a dictionary of + # {'python-requests': 'master', 'f27', 'f26'} + pagure_rpm_project_names = [project['name'] for project in + pagure_namespace_to_projects['rpms']] + p_get_pdc_project_name_and_branches = partial( + _get_pdc_project_name_and_branches, session, 'rpms') + pagure_rpm_project_branches = dict(pool.map( + p_get_pdc_project_name_and_branches, pagure_rpm_project_names)) + # This is no longer needed, so we can save some RAM + del pagure_rpm_project_names + + # Determine what products each project maps to based on its branches. + # pagure_rpms_project_products will be in the format of + # [('python-requests': 'Fedora')...] which will be used my a mapping + # function below + pagure_rpms_project_products = [] + for project in pagure_namespace_to_projects['rpms']: + name = project['name'] + products = [] + branches = pagure_rpm_project_branches[name] + for branch in branches: + if re.match(r'^epel\d+$', branch): + epel = True + products.append('Fedora EPEL') + else: + fedora = True + products.append('Fedora') + + if 'Fedora' in products and 'Fedora EPEL' in products: + break + + for product in products: + pagure_rpms_project_products.append((project, product)) + + for project in pagure_namespace_to_projects['container']: + pagure_rpms_project_products.append((project, 'Fedora Container')) + + # Save some RAM since this large dict is no longer needed + del pagure_namespace_to_projects + + # Now, we must transform the data we collected into something that PkgDB + # would have returned + p_pagure_project_to_acl_schema = partial( + _pagure_project_to_acl_schema, session=session) + project_to_acl_schemas = pool.map( + p_pagure_project_to_acl_schema, pagure_rpms_project_products) + pool.close() + + # Transform the data returned in project_to_acl_schemas to be an orderly + # dictionary for ease of use later on. + for rv in project_to_acl_schemas: + projects_dict[rv['product']][rv['project']] = rv + + # This is no longer needed, so we can save some RAM + del project_to_acl_schemas # Initialize the connection to bugzilla bugzilla = Bugzilla(BZSERVER, BZUSER, BZPASS, projects_dict)