Use multithread on Pagure and PDC queries
This commit is contained in:
parent
11ace40beb
commit
27a90039fe
1 changed files with 197 additions and 75 deletions
|
@ -27,7 +27,7 @@ sync information from the Pagure into bugzilla
|
||||||
This short script takes information about package onwership and imports it
|
This short script takes information about package onwership and imports it
|
||||||
into bugzilla.
|
into bugzilla.
|
||||||
'''
|
'''
|
||||||
|
from __future__ import print_function
|
||||||
import re
|
import re
|
||||||
import argparse
|
import argparse
|
||||||
import datetime
|
import datetime
|
||||||
|
@ -39,6 +39,9 @@ import json
|
||||||
import xmlrpclib
|
import xmlrpclib
|
||||||
import codecs
|
import codecs
|
||||||
import smtplib
|
import smtplib
|
||||||
|
import multiprocessing.pool
|
||||||
|
from math import ceil
|
||||||
|
from functools import partial
|
||||||
try:
|
try:
|
||||||
from email.Message import Message
|
from email.Message import Message
|
||||||
except ImportError:
|
except ImportError:
|
||||||
|
@ -483,8 +486,10 @@ def _get_watchers_rv_json(pagure_project):
|
||||||
print('Querying {0}'.format(watchers_api_url))
|
print('Querying {0}'.format(watchers_api_url))
|
||||||
watchers_rv = session.get(watchers_api_url, timeout=60)
|
watchers_rv = session.get(watchers_api_url, timeout=60)
|
||||||
if not watchers_rv.ok:
|
if not watchers_rv.ok:
|
||||||
error_msg = base_error_msg.format(
|
error_msg = ('The connection to "{0}" failed with the status code {1} '
|
||||||
watchers_api_url, watchers_rv.status_code, watchers_rv.text)
|
'and output "{2}"'.format(
|
||||||
|
watchers_api_url, watchers_rv.status_code,
|
||||||
|
watchers_rv.text))
|
||||||
raise RuntimeError(error_msg)
|
raise RuntimeError(error_msg)
|
||||||
return watchers_rv.json()
|
return watchers_rv.json()
|
||||||
|
|
||||||
|
@ -507,8 +512,10 @@ def _is_retired_in_pdc(product, project):
|
||||||
raise RuntimeError("Could not find %r in PDC." % project)
|
raise RuntimeError("Could not find %r in PDC." % project)
|
||||||
branches = pdc_rv.json()['results']
|
branches = pdc_rv.json()['results']
|
||||||
if not branches:
|
if not branches:
|
||||||
raise RuntimeError("No results for %r in PDC." % project)
|
print("No results for %r in PDC." % project, file=sys.stderr)
|
||||||
return branches[0]['active']
|
# Assume it's not retired if we can't find out for sure
|
||||||
|
return False
|
||||||
|
return not branches[0]['active']
|
||||||
|
|
||||||
|
|
||||||
@cache.cache_on_arguments()
|
@cache.cache_on_arguments()
|
||||||
|
@ -526,18 +533,100 @@ def _get_override_yaml(project):
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
|
|
||||||
def pagure_project_to_acl_schema(pagure_project, product):
|
@cache.cache_on_arguments()
|
||||||
|
def _get_package_summary_from_mdapi(namespace, repo, session=None):
|
||||||
|
summary = None
|
||||||
|
if namespace != 'rpms':
|
||||||
|
return summary
|
||||||
|
|
||||||
|
if session is None:
|
||||||
|
session = retry_session()
|
||||||
|
|
||||||
|
url = '{0}/rawhide/srcpkg/{1}'.format(MDAPIURL.rstrip('/'), repo)
|
||||||
|
if DRY_RUN:
|
||||||
|
print('Querying {0}'.format(url))
|
||||||
|
|
||||||
|
rv = session.get(url, timeout=60)
|
||||||
|
if rv.ok:
|
||||||
|
rv_json = rv.json()
|
||||||
|
summary = rv_json['summary']
|
||||||
|
elif not rv.ok and rv.status_code != 404:
|
||||||
|
error_msg = ('The connection to "{0}" failed with the status code {1} '
|
||||||
|
'and output "{2}"').format(url, rv.status_code, rv.text)
|
||||||
|
raise RuntimeError(error_msg)
|
||||||
|
|
||||||
|
return summary
|
||||||
|
|
||||||
|
|
||||||
|
def _get_pdc_project_name_and_branches(session, namespace, repo):
|
||||||
|
"""
|
||||||
|
Gets the branches on a project. This function is used for mapping.
|
||||||
|
:param namespace: string of the namespace the project is in
|
||||||
|
:param repo: string of the project
|
||||||
|
:return: a tuple with the repo name and a list of the repo's branches
|
||||||
|
"""
|
||||||
|
branches_url = '{0}component-branches/'.format(PDCURL)
|
||||||
|
params = dict(
|
||||||
|
global_component=repo,
|
||||||
|
type=PDC_TYPES[namespace]
|
||||||
|
)
|
||||||
|
if DRY_RUN:
|
||||||
|
print('Querying {0} {1}'.format(branches_url, params))
|
||||||
|
rv = session.get(branches_url, timeout=60)
|
||||||
|
|
||||||
|
# If the project's branches can't be reported, just return no branches and
|
||||||
|
# it will be skipped later on
|
||||||
|
if not rv.ok:
|
||||||
|
print(('The connection to "{0}" failed with the status code {1} and '
|
||||||
|
'output "{2}"'.format(branches_url, rv.status_code, rv.text)),
|
||||||
|
file = sys.stderr)
|
||||||
|
return repo, []
|
||||||
|
|
||||||
|
data = rv.json()
|
||||||
|
return repo, [branch['name'] for branch in data['results']]
|
||||||
|
|
||||||
|
|
||||||
|
def _get_pagure_projects_from_page(session, namespace, page):
|
||||||
|
"""
|
||||||
|
Gets the names of all the Pagure projects on a page. This function is to be
|
||||||
|
used for mapping.
|
||||||
|
:param namespace: string of the namespace to query for projects
|
||||||
|
:param page: int of the page to query at
|
||||||
|
:return: list of projects on the page
|
||||||
|
"""
|
||||||
|
url = ('{0}/api/0/projects?namespace={1}&page={2}&per_page=100&'
|
||||||
|
'fork=false'.format(
|
||||||
|
PAGURE_DIST_GIT_URL.rstrip('/'), namespace, page))
|
||||||
|
|
||||||
|
if DRY_RUN:
|
||||||
|
print('- Querying {0}'.format(url))
|
||||||
|
|
||||||
|
response = session.get(url, timeout=120)
|
||||||
|
if not bool(response):
|
||||||
|
print("Failed to talk to %r %r." % (
|
||||||
|
response.request.url, response), file=sys.stderr)
|
||||||
|
raise RuntimeError('Failed to talk to {0} {1}.'.format(
|
||||||
|
response.request.url, response))
|
||||||
|
|
||||||
|
return response.json()['projects']
|
||||||
|
|
||||||
|
|
||||||
|
def _pagure_project_to_acl_schema(project_and_product, session=None):
|
||||||
"""
|
"""
|
||||||
This function translates the JSON of a Pagure project to what PkgDB used to
|
This function translates the JSON of a Pagure project to what PkgDB used to
|
||||||
output in the Bugzilla API.
|
output in the Bugzilla API. This function is used for mapping.
|
||||||
:param pagure_project: a dictionary of the JSON of a Pagure project
|
:param project_and_product: a tuple containing the dictionary of the JSON
|
||||||
:return: a dictionary of the content that the Bugzilla API would output
|
of a Pagure project and a string of the product (e.g. "Fedora",
|
||||||
|
"Fedora EPEL")
|
||||||
|
:param session: a requests session object or None
|
||||||
|
:return: a dictionary of the content that the PkgDB Bugzilla API would
|
||||||
|
return
|
||||||
"""
|
"""
|
||||||
|
project, product = project_and_product
|
||||||
|
if session is None:
|
||||||
session = retry_session()
|
session = retry_session()
|
||||||
base_error_msg = ('The connection to "{0}" failed with the status code '
|
|
||||||
'{1} and output "{2}"')
|
|
||||||
|
|
||||||
watchers_rv_json = _get_watchers_rv_json(pagure_project)
|
watchers_rv_json = _get_watchers_rv_json(project)
|
||||||
|
|
||||||
user_cc_list = []
|
user_cc_list = []
|
||||||
for user, watch_levels in watchers_rv_json['watchers'].items():
|
for user, watch_levels in watchers_rv_json['watchers'].items():
|
||||||
|
@ -545,28 +634,15 @@ def pagure_project_to_acl_schema(pagure_project, product):
|
||||||
if 'issues' in watch_levels:
|
if 'issues' in watch_levels:
|
||||||
user_cc_list.append(user)
|
user_cc_list.append(user)
|
||||||
|
|
||||||
summary = None
|
summary = _get_package_summary_from_mdapi(
|
||||||
if pagure_project['namespace'] == 'rpms':
|
project['namespace'], project['name'], session)
|
||||||
mdapi_url = '{0}/rawhide/srcpkg/{1}'.format(
|
|
||||||
MDAPIURL.rstrip('/'), pagure_project['name'])
|
|
||||||
if DRY_RUN:
|
|
||||||
print('Querying {0}'.format(mdapi_url))
|
|
||||||
mdapi_rv = session.get(mdapi_url, timeout=60)
|
|
||||||
if mdapi_rv.ok:
|
|
||||||
mdapi_rv_json = mdapi_rv.json()
|
|
||||||
summary = mdapi_rv_json['summary']
|
|
||||||
elif not mdapi_rv.ok and mdapi_rv.status_code != 404:
|
|
||||||
error_msg = base_error_msg.format(
|
|
||||||
mdapi_url, mdapi_rv.status_code, mdapi_rv.text)
|
|
||||||
raise RuntimeError(error_msg)
|
|
||||||
|
|
||||||
# Check if the branch is retired in PDC, and if so set assignee to orphan.
|
# Check if the project is retired in PDC, and if so set assignee to orphan.
|
||||||
owner = pagure_project['access_users']['owner'][0]
|
owner = project['access_users']['owner'][0]
|
||||||
if _is_retired_in_pdc(product, project):
|
if _is_retired_in_pdc(product, project):
|
||||||
owner = 'extras-orphan@fedoraproject.org'
|
owner = 'extras-orphan@fedoraproject.org'
|
||||||
|
|
||||||
# Check if the Bugzilla ticket assignee has been manually overridden
|
# Check if the Bugzilla ticket assignee has been manually overridden
|
||||||
owner = pagure_project['access_users']['owner'][0]
|
|
||||||
override_yaml = _get_override_yaml(project)
|
override_yaml = _get_override_yaml(project)
|
||||||
if override_yaml.get(product) \
|
if override_yaml.get(product) \
|
||||||
and isinstance(override_yaml[product], string_types):
|
and isinstance(override_yaml[product], string_types):
|
||||||
|
@ -583,7 +659,11 @@ def pagure_project_to_acl_schema(pagure_project, product):
|
||||||
# No package has this set in PkgDB's API, so it can be safely turned
|
# No package has this set in PkgDB's API, so it can be safely turned
|
||||||
# off and set to the defaults later on in the code
|
# off and set to the defaults later on in the code
|
||||||
'qacontact': None,
|
'qacontact': None,
|
||||||
'summary': summary
|
'summary': summary,
|
||||||
|
# These two values are not part of original PkgDB RV, but they are
|
||||||
|
# useful
|
||||||
|
'product': product,
|
||||||
|
'project': project['name']
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -610,57 +690,99 @@ if __name__ == '__main__':
|
||||||
'Fedora Container': {},
|
'Fedora Container': {},
|
||||||
'Fedora EPEL': {},
|
'Fedora EPEL': {},
|
||||||
}
|
}
|
||||||
pagure_rpms_api_url = ('{0}/api/0/projects?fork=false&namespace=rpms&page=1&'
|
|
||||||
'per_page=100'.format(
|
|
||||||
PAGURE_DIST_GIT_URL.rstrip('/')))
|
|
||||||
session = retry_session()
|
|
||||||
|
|
||||||
while True:
|
session = retry_session()
|
||||||
|
pagure_namespace_to_project_lists = {}
|
||||||
|
pool = multiprocessing.pool.ThreadPool(8)
|
||||||
|
|
||||||
|
# Query for all the rpm and container projects and store them in
|
||||||
|
# pagure_namespace_to_projects
|
||||||
|
for namespace in ['rpms', 'container']:
|
||||||
|
first_page_url = ('{0}/api/0/projects?namespace={1}&fork=false&page=1'
|
||||||
|
'&per_page=1'.format(PAGURE_DIST_GIT_URL, namespace))
|
||||||
if DRY_RUN:
|
if DRY_RUN:
|
||||||
print('Querying {0}'.format(pagure_rpms_api_url))
|
print('- Querying {0}'.format(first_page_url))
|
||||||
rv_json = session.get(pagure_rpms_api_url, timeout=120).json()
|
first_page_rv = session.get(first_page_url, timeout=120)
|
||||||
for project in rv_json['projects']:
|
|
||||||
pagure_project_branches_api_url = (
|
if not bool(first_page_rv):
|
||||||
'{0}/api/0/rpms/{1}/git/branches'
|
raise RuntimeError('Failed to talk to {0} {1}.'.format(
|
||||||
.format(PAGURE_DIST_GIT_URL.rstrip('/'), project['name']))
|
first_page_rv.request.url, first_page_rv))
|
||||||
branch_rv_json = session.get(
|
|
||||||
pagure_project_branches_api_url, timeout=60).json()
|
total_projects = first_page_rv.json()['total_projects']
|
||||||
epel = False
|
num_pages = int(ceil(total_projects / 100.0))
|
||||||
fedora = False
|
|
||||||
for branch in branch_rv_json['branches']:
|
# Since we are going to multi-thread, we need to make a partial
|
||||||
if re.match(r'epel\d+', branch):
|
# function call so that all the function needs is an iterable to run
|
||||||
|
p_get_pagure_projects_from_page = partial(
|
||||||
|
_get_pagure_projects_from_page, session, namespace)
|
||||||
|
pagure_namespace_to_project_lists[namespace] = pool.map(
|
||||||
|
p_get_pagure_projects_from_page, range(1, num_pages + 1))
|
||||||
|
|
||||||
|
# Flatten the list of lists (each page is a list of a projects)
|
||||||
|
pagure_namespace_to_projects = {}
|
||||||
|
for namespace in ['rpms', 'container']:
|
||||||
|
pagure_namespace_to_projects[namespace] = []
|
||||||
|
for project_list in pagure_namespace_to_project_lists[namespace]:
|
||||||
|
pagure_namespace_to_projects[namespace] += project_list
|
||||||
|
# This is no longer needed, so we can save some RAM
|
||||||
|
del pagure_namespace_to_project_lists
|
||||||
|
|
||||||
|
# Now, we must get all the branches for the RPM projects we just queried.
|
||||||
|
# This will be stored in pagure_rpm_project_branches as a dictionary of
|
||||||
|
# {'python-requests': 'master', 'f27', 'f26'}
|
||||||
|
pagure_rpm_project_names = [project['name'] for project in
|
||||||
|
pagure_namespace_to_projects['rpms']]
|
||||||
|
p_get_pdc_project_name_and_branches = partial(
|
||||||
|
_get_pdc_project_name_and_branches, session, 'rpms')
|
||||||
|
pagure_rpm_project_branches = dict(pool.map(
|
||||||
|
p_get_pdc_project_name_and_branches, pagure_rpm_project_names))
|
||||||
|
# This is no longer needed, so we can save some RAM
|
||||||
|
del pagure_rpm_project_names
|
||||||
|
|
||||||
|
# Determine what products each project maps to based on its branches.
|
||||||
|
# pagure_rpms_project_products will be in the format of
|
||||||
|
# [('python-requests': 'Fedora')...] which will be used my a mapping
|
||||||
|
# function below
|
||||||
|
pagure_rpms_project_products = []
|
||||||
|
for project in pagure_namespace_to_projects['rpms']:
|
||||||
|
name = project['name']
|
||||||
|
products = []
|
||||||
|
branches = pagure_rpm_project_branches[name]
|
||||||
|
for branch in branches:
|
||||||
|
if re.match(r'^epel\d+$', branch):
|
||||||
epel = True
|
epel = True
|
||||||
projects_dict['Fedora EPEL'][project['name']] = \
|
products.append('Fedora EPEL')
|
||||||
pagure_project_to_acl_schema(project, 'Fedora EPEL')
|
|
||||||
else:
|
else:
|
||||||
fedora = True
|
fedora = True
|
||||||
projects_dict['Fedora'][project['name']] = \
|
products.append('Fedora')
|
||||||
pagure_project_to_acl_schema(project, 'Fedora')
|
|
||||||
|
|
||||||
if fedora and epel:
|
if 'Fedora' in products and 'Fedora EPEL' in products:
|
||||||
break
|
break
|
||||||
|
|
||||||
if rv_json['pagination']['next']:
|
for product in products:
|
||||||
pagure_rpms_api_url = rv_json['pagination']['next']
|
pagure_rpms_project_products.append((project, product))
|
||||||
else:
|
|
||||||
break
|
|
||||||
|
|
||||||
pagure_container_api_url = (
|
for project in pagure_namespace_to_projects['container']:
|
||||||
'{0}/api/0/projects?fork=false&namespace=container&page=1&per_page=100'
|
pagure_rpms_project_products.append((project, 'Fedora Container'))
|
||||||
.format(PAGURE_DIST_GIT_URL))
|
|
||||||
while True:
|
|
||||||
if DRY_RUN:
|
|
||||||
print('Querying {0}'.format(pagure_container_api_url))
|
|
||||||
rv_json = session.get(pagure_container_api_url, timeout=120).json()
|
|
||||||
for project in rv_json['projects']:
|
|
||||||
project_pkgdb_schema = pagure_project_to_acl_schema(project)
|
|
||||||
projects_dict['Fedora Container'][project['name']] = \
|
|
||||||
project_pkgdb_schema
|
|
||||||
|
|
||||||
if rv_json['pagination']['next']:
|
# Save some RAM since this large dict is no longer needed
|
||||||
pagure_container_api_url = rv_json['pagination']['next']
|
del pagure_namespace_to_projects
|
||||||
else:
|
|
||||||
break
|
# Now, we must transform the data we collected into something that PkgDB
|
||||||
|
# would have returned
|
||||||
|
p_pagure_project_to_acl_schema = partial(
|
||||||
|
_pagure_project_to_acl_schema, session=session)
|
||||||
|
project_to_acl_schemas = pool.map(
|
||||||
|
p_pagure_project_to_acl_schema, pagure_rpms_project_products)
|
||||||
|
pool.close()
|
||||||
|
|
||||||
|
# Transform the data returned in project_to_acl_schemas to be an orderly
|
||||||
|
# dictionary for ease of use later on.
|
||||||
|
for rv in project_to_acl_schemas:
|
||||||
|
projects_dict[rv['product']][rv['project']] = rv
|
||||||
|
|
||||||
|
# This is no longer needed, so we can save some RAM
|
||||||
|
del project_to_acl_schemas
|
||||||
|
|
||||||
# Initialize the connection to bugzilla
|
# Initialize the connection to bugzilla
|
||||||
bugzilla = Bugzilla(BZSERVER, BZUSER, BZPASS, projects_dict)
|
bugzilla = Bugzilla(BZSERVER, BZUSER, BZPASS, projects_dict)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue