Integrate staging pagure-sync-bugzilla changes into prod.

This commit is contained in:
Ralph Bean 2017-12-15 02:23:09 +00:00
parent 7c83acbcd0
commit e0534e11b0

View file

@ -20,11 +20,12 @@
# Author(s): Mike Watters <valholla75@fedoraproject.org>
# Author(s): Pierre-Yves Chibon <pingou@pingoured.fr>
# Author(s): Matt Prahl <mprahl@redhat.com>
# Author(s): Ralph Bean <rbean@redhat.com
#
'''
sync information from the Pagure into bugzilla
This short script takes information about package onwership and imports it
This ... script takes information about package onwership and imports it
into bugzilla.
'''
from __future__ import print_function
@ -41,13 +42,12 @@ import codecs
import smtplib
import traceback
import multiprocessing.pool
from math import ceil
try:
from email.Message import Message
except ImportError:
from email.message import EmailMessage as Message
import bugzilla
import bugzilla as bugzilla_lib
import dogpile.cache
import requests
import yaml
@ -60,8 +60,8 @@ from requests.packages.urllib3.util.retry import Retry
cache = dogpile.cache.make_region().configure(
'dogpile.cache.memory',
expiration_time=3600,
'dogpile.cache.memory',
expiration_time=3600,
)
@ -79,7 +79,13 @@ def retry_session():
session.mount('https://', adapter)
return session
{% if env == 'staging' %}
BZSERVER = 'https://bugzilla.redhat.com'
{% else %}
BZSERVER = 'https://partner-bugzilla.redhat.com'
{% endif %}
BZUSER = '{{ bugzilla_user }}'
BZPASS = '{{ bugzilla_password }}'
BZCOMPAPI = 'component.get'
@ -92,7 +98,7 @@ NOTIFYEMAIL = [
'ralph@fedoraproject.org',
'mprahl@fedoraproject.org',
]
DRY_RUN = False
DEBUG = False
{% if env == 'staging' %}
FASURL = 'https://admin.stg.fedoraproject.org/accounts'
@ -117,9 +123,16 @@ DATA_CACHE = '/var/tmp/pagure_sync_bz.json'
PRODUCTS = {
'Fedora': 'Fedora',
'Fedora Container': 'Fedora Container Images',
'Fedora Modules': 'Fedora Modules',
'Fedora EPEL': 'Fedora EPEL',
}
NAMESPACE_TO_PRODUCT = {
'rpms': 'Fedora', # except EPEL...
'container': 'Fedora Container',
'modules': 'Fedora Modules',
}
# This maps bugzilla products to "lead" branches in PDC. If the lead branch is
# retired, then we in turn set the default assignee to "orphan" for all new bugs
# in the given product.
@ -128,6 +141,8 @@ PRODUCTS_TO_LEAD_BRANCH = {
'Fedora': 'master',
# Same for containers.
'Fedora Container': 'master',
# Same for modules.
'Fedora Modules': 'master',
# If epel7 is retired, then all new epel bugs go to orphan.
'Fedora EPEL': 'epel7',
}
@ -136,6 +151,7 @@ PDC_TYPES = {
'modules': 'module',
'container': 'container',
}
INVERSE_PDC_TYPES = dict([(v, k) for k, v in PDC_TYPES.items()])
# When querying for current info, take segments of 1000 packages a time
@ -199,7 +215,7 @@ def resilient_partial(fn, *initial, **kwargs):
try:
full = initial + additional
return fn(*full, **kwargs)
except Exception as e:
except Exception:
traceback.print_exc()
return None
wrapper.__name__ = fn.__name__
@ -243,8 +259,8 @@ class ProductCache(dict):
pkglist = projects_dict[key].keys()
products = {}
for pkg_segment in segment(pkglist, BZ_PKG_SEGMENT):
# Format that bugzilla will understand. Strip None's that segment() pads
# out the final data segment() with
# Format that bugzilla will understand. Strip None's that
# segment() pads out the final data segment() with
query = [
dict(product=PRODUCTS[key], component=p)
for p in pkg_segment if p is not None
@ -253,24 +269,25 @@ class ProductCache(dict):
for package in raw_data['components']:
# Reformat data to be the same as what's returned from
# getcomponentsdetails
product = dict(initialowner=package['default_assignee'],
description=package['description'],
initialqacontact=package['default_qa_contact'],
initialcclist=package['default_cc'])
product = dict(
initialowner=package['default_assignee'],
description=package['description'],
initialqacontact=package['default_qa_contact'],
initialcclist=package['default_cc'])
products[package['name'].lower()] = product
self[key] = products
return super(ProductCache, self).__getitem__(key)
class Bugzilla(object):
class BugzillaProxy(object):
def __init__(self, bzServer, username, password, acls):
self.bzXmlRpcServer = bzServer
self.username = username
self.password = password
self.server = bugzilla.Bugzilla(
self.server = bugzilla_lib.Bugzilla(
url=self.bzXmlRpcServer,
user=self.username,
password=self.password)
@ -315,7 +332,7 @@ class Bugzilla(object):
return self.userCache[username]['bugzilla_email'].lower()
def add_edit_component(self, package, collection, owner, description=None,
qacontact=None, cclist=None):
qacontact=None, cclist=None):
'''Add or update a component to have the values specified.
'''
# Turn the cclist into something usable by bugzilla
@ -376,14 +393,14 @@ class Bugzilla(object):
break
if data:
### FIXME: initialowner has been made mandatory for some
# FIXME: initialowner has been made mandatory for some
# reason. Asking dkl why.
data['initialowner'] = owner
# Changes occurred. Submit a request to change via xmlrpc
data['product'] = PRODUCTS[collection]
data['component'] = package
if DRY_RUN:
if DEBUG:
print('[EDITCOMP] Changing via editComponent('
'%s, %s, "xxxxx")' % (data, self.username))
print('[EDITCOMP] Former values: %s|%s|%s|%s' % (
@ -418,7 +435,7 @@ class Bugzilla(object):
if initialCCList:
data['initialcclist'] = initialCCList
if DRY_RUN:
if DEBUG:
print('[ADDCOMP] Adding new component AddComponent:('
'%s, %s, "xxxxx")' % (data, self.username))
else:
@ -435,6 +452,10 @@ def send_email(fromAddress, toAddress, subject, message, ccAddress=None):
This will be replaced by sending messages to a log later.
'''
{% if env == 'staging' %}
# Send no email in staging...
pass
{% else %}
msg = Message()
msg.add_header('To', ','.join(toAddress))
msg.add_header('From', fromAddress)
@ -446,6 +467,7 @@ def send_email(fromAddress, toAddress, subject, message, ccAddress=None):
smtp = smtplib.SMTP('bastion')
smtp.sendmail(fromAddress, toAddress, msg.as_string())
smtp.quit()
{% endif %}
def notify_users(errors):
@ -506,57 +528,13 @@ def notify_users(errors):
json.dump(new_data, stream)
@cache.cache_on_arguments()
def _get_watchers_rv_json(pagure_project):
watchers_api_url = '{0}/api/0/{1}/{2}/watchers'.format(
PAGURE_DIST_GIT_URL.rstrip('/'), pagure_project['namespace'],
pagure_project['name'])
if DRY_RUN:
print('Querying {0}'.format(watchers_api_url))
watchers_rv = session.get(watchers_api_url, timeout=60)
if not watchers_rv.ok:
error_msg = ('The connection to "{0}" failed with the status code {1} '
'and output "{2}"'.format(
watchers_api_url, watchers_rv.status_code,
watchers_rv.text))
raise RuntimeError(error_msg)
return watchers_rv.json()
@cache.cache_on_arguments()
def _is_retired_in_pdc(product, project):
lead = PRODUCTS_TO_LEAD_BRANCH[product]
type = PDC_TYPES[project['namespace']]
name = project['name']
pdc_url = '{0}/component-branches/'.format(PDCURL.rstrip('/'))
params = dict(
global_component=name,
type=type,
name=lead,
)
if DRY_RUN:
print('Querying {0} {1}'.format(pdc_url, params))
pdc_rv = session.get(pdc_url, params=params, timeout=30)
if not pdc_rv.ok:
raise RuntimeError("Could not find %r in PDC." % project)
branches = pdc_rv.json()['results']
if not branches:
if DRY_RUN:
print("No results for %s in PDC." % pdc_rv.request.url)
# Default to "not retired" if we have no explicit entry. This is the
# case for an 'el6' branch of a package which has no el6 branch. It
# isn't technically retired, because it never reall existed!
return False
return not branches[0]['active']
@cache.cache_on_arguments()
def _get_override_yaml(project):
pagure_override_url = '{0}/{1}/raw/master/f/{2}/{3}'.format(
PAGUREURL.rstrip('/'), BUGZILLA_OVERRIDE_REPO, project['namespace'],
project['name'])
if DRY_RUN:
if DEBUG:
print('Querying {0}'.format(pagure_override_url))
override_rv = session.get(pagure_override_url, timeout=30)
if override_rv.status_code == 200:
@ -575,7 +553,7 @@ def _get_package_summary_from_mdapi(namespace, repo, session=None):
session = retry_session()
url = '{0}/rawhide/srcpkg/{1}'.format(MDAPIURL.rstrip('/'), repo)
if DRY_RUN:
if DEBUG:
print('Querying {0}'.format(url))
rv = session.get(url, timeout=60)
@ -590,19 +568,18 @@ def _get_package_summary_from_mdapi(namespace, repo, session=None):
return summary
def _get_pdc_project_name_and_branches(session, namespace, repo):
def _get_pdc_branches(session, repo):
"""
Gets the branches on a project. This function is used for mapping.
:param namespace: string of the namespace the project is in
:param repo: string of the project
:return: a tuple with the repo name and a list of the repo's branches
:param repo: the project dict
:return: a list of the repo's branches
"""
branches_url = '{0}component-branches/'.format(PDCURL)
params = dict(
global_component=repo,
type=PDC_TYPES[namespace]
global_component=repo['name'],
type=PDC_TYPES[repo['namespace']]
)
if DRY_RUN:
if DEBUG:
print('Querying {0} {1}'.format(branches_url, params))
rv = session.get(branches_url, params=params, timeout=60)
@ -611,39 +588,29 @@ def _get_pdc_project_name_and_branches(session, namespace, repo):
if not rv.ok:
print(('The connection to "{0}" failed with the status code {1} and '
'output "{2}"'.format(branches_url, rv.status_code, rv.text)),
file = sys.stderr)
return repo, []
file=sys.stderr)
return []
data = rv.json()
return repo, [branch['name'] for branch in data['results']]
return [branch['name'] for branch in data['results']]
def _is_retired(product, project):
branches = project['branches']
if product == 'Fedora EPEL':
for branch, active in branches:
if re.match(r'^epel\d+$', branch):
if active:
return False
# No active branches means it is retired.
return True
else:
for branch, active in branches:
if active:
return False
return True
def _get_pagure_projects_from_page(session, namespace, page):
"""
Gets the names of all the Pagure projects on a page. This function is to be
used for mapping.
:param namespace: string of the namespace to query for projects
:param page: int of the page to query at
:return: list of projects on the page
"""
url = ('{0}/api/0/projects?namespace={1}&page={2}&per_page=100&'
'fork=false'.format(
PAGURE_DIST_GIT_URL.rstrip('/'), namespace, page))
if DRY_RUN:
print('- Querying {0}'.format(url))
response = session.get(url, timeout=120)
if not bool(response):
print("Failed to talk to %r %r." % (
response.request.url, response), file=sys.stderr)
raise RuntimeError('Failed to talk to {0} {1}.'.format(
response.request.url, response))
return response.json()['projects']
def _pagure_project_to_acl_schema(project_and_product, session=None):
def _to_legacy_schema(product_and_project, session=None):
"""
This function translates the JSON of a Pagure project to what PkgDB used to
output in the Bugzilla API. This function is used for mapping.
@ -654,26 +621,19 @@ def _pagure_project_to_acl_schema(project_and_product, session=None):
:return: a dictionary of the content that the PkgDB Bugzilla API would
return
"""
project, product = project_and_product
product, project = product_and_project
if session is None:
session = retry_session()
watchers_rv_json = _get_watchers_rv_json(project)
user_cc_list = []
for user, watch_levels in watchers_rv_json['watchers'].items():
if user == 'releng':
continue
# Only people watching issues should be CC'd
if 'issues' in watch_levels:
user_cc_list.append(user)
owner = project['poc']
watchers = project['watchers']
summary = _get_package_summary_from_mdapi(
project['namespace'], project['name'], session)
# Check if the project is retired in PDC, and if so set assignee to orphan.
owner = project['access_users']['owner'][0]
if _is_retired_in_pdc(product, project):
if _is_retired(product, project):
owner = 'orphan'
# Check if the Bugzilla ticket assignee has been manually overridden
@ -687,7 +647,7 @@ def _pagure_project_to_acl_schema(project_and_product, session=None):
# Groups is empty because you can't have groups watch projects.
# This is done only at the user level.
'groups': [],
'people': user_cc_list
'people': watchers,
},
'owner': owner,
# No package has this set in PkgDB's API, so it can be safely turned
@ -714,7 +674,7 @@ if __name__ == '__main__':
args = parser.parse_args()
if args.debug:
DRY_RUN = True
DEBUG = True
# Non-fatal errors to alert people about
errors = []
@ -722,120 +682,81 @@ if __name__ == '__main__':
projects_dict = {
'Fedora': {},
'Fedora Container': {},
'Fedora Modules': {},
'Fedora EPEL': {},
}
session = retry_session()
pagure_namespace_to_project_lists = {}
pool = multiprocessing.pool.ThreadPool(8)
# Query for all the rpm and container projects and store them in
# pagure_namespace_to_projects
for namespace in ['rpms', 'container']:
first_page_url = ('{0}/api/0/projects?namespace={1}&fork=false&page=1'
'&per_page=1'.format(PAGURE_DIST_GIT_URL, namespace))
if DRY_RUN:
print('- Querying {0}'.format(first_page_url))
first_page_rv = session.get(first_page_url, timeout=120)
# Get the initial ownership and CC data from pagure
# This part is easy.
poc_url = PAGURE_DIST_GIT_URL + '/extras/pagure_poc.json'
if DEBUG:
print("Querying %r for points of contact." % poc_url)
pagure_namespace_to_poc = session.get(poc_url, timeout=120).json()
cc_url = PAGURE_DIST_GIT_URL + '/extras/pagure_bz.json'
if DEBUG:
print("Querying %r for initial cc list." % cc_url)
pagure_namespace_to_cc = session.get(cc_url, timeout=120).json()
if not bool(first_page_rv):
raise RuntimeError('Failed to talk to {0} {1}.'.format(
first_page_rv.request.url, first_page_rv))
# Combine and collapse those two into a single list:
pagure_projects = []
for namespace, entries in pagure_namespace_to_poc.items():
for name, poc in entries.items():
pagure_projects.append(dict(
namespace=namespace,
name=name,
poc=poc,
watchers=pagure_namespace_to_cc[namespace][name],
))
total_projects = first_page_rv.json()['total_projects']
num_pages = int(ceil(total_projects / 100.0))
# Since we are going to multi-thread, we need to make a partial
# function call so that all the function needs is an iterable to run
p_get_pagure_projects_from_page = resilient_partial(
_get_pagure_projects_from_page, session, namespace)
pagure_namespace_to_project_lists[namespace] = pool.map(
p_get_pagure_projects_from_page, range(1, num_pages + 1))
# Filter out failures.
pagure_namespace_to_project_lists[namespace] = [
i for i in pagure_namespace_to_project_lists[namespace] if i]
# Flatten the list of lists (each page is a list of a projects)
pagure_namespace_to_projects = {}
for namespace in ['rpms', 'container']:
pagure_namespace_to_projects[namespace] = []
for project_list in pagure_namespace_to_project_lists[namespace]:
pagure_namespace_to_projects[namespace] += project_list
# This is no longer needed, so we can save some RAM
del pagure_namespace_to_project_lists
# Now, we must get all the branches for the RPM projects we just queried.
# This will be stored in pagure_rpm_project_branches as a dictionary of
# {'python-requests': 'master', 'f27', 'f26'}
pagure_rpm_project_names = [project['name'] for project in
pagure_namespace_to_projects['rpms']]
p_get_pdc_project_name_and_branches = resilient_partial(
_get_pdc_project_name_and_branches, session, 'rpms')
pagure_rpm_project_branches = pool.map(
p_get_pdc_project_name_and_branches, pagure_rpm_project_names)
# Filter out failures.
pagure_rpm_project_branches = [i for i in pagure_rpm_project_branches if i]
# Transform
pagure_rpm_project_branches = dict(pagure_rpm_project_branches)
# This is no longer needed, so we can save some RAM
del pagure_rpm_project_names
branches_url = PDCURL.split('rest_api')[0] + 'extras/active_branches.json'
if DEBUG:
print("Querying %r for EOL information." % branches_url)
pdc_branches = session.get(branches_url, timeout=120).json()
for proj in pagure_projects:
pdc_type = PDC_TYPES[proj['namespace']]
proj['branches'] = pdc_branches.get(pdc_type, {}).get(proj['name'], [])
if not proj['branches'] and DEBUG:
print("! No PDC branch found for {namespace}/{name}".format(**proj))
# Determine what products each project maps to based on its branches.
# pagure_rpms_project_products will be in the format of
# [('python-requests': 'Fedora')...] which will be used my a mapping
# function below
pagure_rpms_project_products = []
for project in pagure_namespace_to_projects['rpms']:
name = project['name']
products = []
branches = pagure_rpm_project_branches[name]
for branch in branches:
for project in pagure_projects:
products = set()
for branch, active in project['branches']:
if re.match(r'^epel\d+$', branch):
epel = True
products.append('Fedora EPEL')
products.add('Fedora EPEL')
else:
fedora = True
products.append('Fedora')
products.add(NAMESPACE_TO_PRODUCT[project['namespace']])
project['products'] = list(products)
if 'Fedora' in products and 'Fedora EPEL' in products:
break
for product in products:
pagure_rpms_project_products.append((project, product))
for project in pagure_namespace_to_projects['container']:
pagure_rpms_project_products.append((project, 'Fedora Container'))
# Save some RAM since this large dict is no longer needed
del pagure_namespace_to_projects
# Now, we must transform the data we collected into something that PkgDB
# would have returned
p_pagure_project_to_acl_schema = resilient_partial(
_pagure_project_to_acl_schema, session=session)
project_to_acl_schemas = pool.map(
p_pagure_project_to_acl_schema, pagure_rpms_project_products)
pool.close()
# Filter out failures.
project_to_acl_schemas = [i for i in project_to_acl_schemas if i]
# Transform the data returned in project_to_acl_schemas to be an orderly
# dictionary for ease of use later on.
for rv in project_to_acl_schemas:
projects_dict[rv['product']][rv['project']] = rv
# This is no longer needed, so we can save some RAM
del project_to_acl_schemas
## Now, we must transform the data we collected into something that PkgDB
## would have returned
p_to_legacy_schema = resilient_partial(_to_legacy_schema, session=session)
items = [
(product, project)
for project in pagure_projects
for product in project['products']
]
legacy_responses = pool.map(p_to_legacy_schema, items)
for response in legacy_responses:
if not response:
continue
projects_dict[response['product']][response['project']] = response
# Initialize the connection to bugzilla
bugzilla = Bugzilla(BZSERVER, BZUSER, BZPASS, projects_dict)
bugzilla = BugzillaProxy(BZSERVER, BZUSER, BZPASS, projects_dict)
for product in projects_dict.keys():
if product not in PRODUCTS:
continue
for pkg in sorted(projects_dict[product]):
if DRY_RUN:
print(pkg)
if DEBUG:
print("Assesssing bugzilla status for %r" % pkg)
pkgInfo = projects_dict[product][pkg]
try:
bugzilla.add_edit_component(
@ -865,7 +786,7 @@ if __name__ == '__main__':
# Send notification of errors
if errors:
if DRY_RUN:
if DEBUG:
print('[DEBUG]', '\n'.join(errors))
else:
notify_users(errors)