diff --git a/distgit_bugzilla_sync/config.py b/distgit_bugzilla_sync/config.py index 67af449..8ac4773 100644 --- a/distgit_bugzilla_sync/config.py +++ b/distgit_bugzilla_sync/config.py @@ -23,17 +23,17 @@ import toml _here = os.path.dirname(__file__) -_default_conf_root = os.path.join(_here, 'default-config-files') -_system_conf_root = os.path.join(os.path.sep, 'etc', 'distgit-bugzilla-sync') +_default_conf_root = os.path.join(_here, "default-config-files") +_system_conf_root = os.path.join(os.path.sep, "etc", "distgit-bugzilla-sync") config_files = { - 'default': { - 'configuration': os.path.join(_default_conf_root, 'configuration.toml'), - 'email_overrides': os.path.join(_default_conf_root, 'email_overrides.toml'), + "default": { + "configuration": os.path.join(_default_conf_root, "configuration.toml"), + "email_overrides": os.path.join(_default_conf_root, "email_overrides.toml"), }, - 'system': { - 'configuration': os.path.join(_system_conf_root, 'configuration.toml'), - 'email_overrides': os.path.join(_system_conf_root, 'email_overrides.toml'), + "system": { + "configuration": os.path.join(_system_conf_root, "configuration.toml"), + "email_overrides": os.path.join(_system_conf_root, "email_overrides.toml"), }, } @@ -85,8 +85,10 @@ class ConfigDict(dict): self[k] = v -def load_configuration(addl_config_files: Optional[Sequence[str]] = None, - addl_email_overrides_files: Optional[Sequence[str]] = None): +def load_configuration( + addl_config_files: Optional[Sequence[str]] = None, + addl_email_overrides_files: Optional[Sequence[str]] = None, +): """Load stored configuration. This function loads default, system-wide, and if specified, additional @@ -99,14 +101,14 @@ def load_configuration(addl_config_files: Optional[Sequence[str]] = None, # Load default files. try: - default_config = toml.load(config_files['default']['configuration']) + default_config = toml.load(config_files["default"]["configuration"]) except FileNotFoundError as e: raise RuntimeError( f"Default configuration file {config_files['default']['configuration']} not found." ) from e try: - default_email_overrides = toml.load(config_files['default']['email_overrides']) + default_email_overrides = toml.load(config_files["default"]["email_overrides"]) except FileNotFoundError as e: raise RuntimeError( f"Default email overrides file {config_files['default']['email_overrides']} not found." @@ -114,12 +116,12 @@ def load_configuration(addl_config_files: Optional[Sequence[str]] = None, # Attempt to load system-wide files. try: - system_config = toml.load(config_files['system']['configuration']) + system_config = toml.load(config_files["system"]["configuration"]) except FileNotFoundError: system_config = {} try: - system_email_overrides = toml.load(config_files['system']['email_overrides']) + system_email_overrides = toml.load(config_files["system"]["email_overrides"]) except FileNotFoundError: system_email_overrides = {} @@ -152,10 +154,10 @@ def load_configuration(addl_config_files: Optional[Sequence[str]] = None, email_overrides.update(system_email_overrides) email_overrides.update(addl_email_overrides) - for env in config['environments'].values(): + for env in config["environments"].values(): # Fill environments with default data. env_values = copy.deepcopy(config) - del env_values['environments'] + del env_values["environments"] # Values specified in the environments should take precedence. env_values.update(env) diff --git a/distgit_bugzilla_sync/package_summaries.py b/distgit_bugzilla_sync/package_summaries.py index 63089ec..78f0e1a 100644 --- a/distgit_bugzilla_sync/package_summaries.py +++ b/distgit_bugzilla_sync/package_summaries.py @@ -22,44 +22,48 @@ from defusedxml import cElementTree as etree import requests -KOJI_REPO = 'https://kojipkgs.fedoraproject.org/repos/' +KOJI_REPO = "https://kojipkgs.fedoraproject.org/repos/" repomd_xml_namespace = { - 'repo': 'http://linux.duke.edu/metadata/repo', - 'rpm': 'http://linux.duke.edu/metadata/rpm', + "repo": "http://linux.duke.edu/metadata/repo", + "rpm": "http://linux.duke.edu/metadata/rpm", } log = logging.getLogger(__name__) def download_db(name, repomd_url, archive): - log.info('%-12s Downloading file: %s to %s', name, repomd_url, archive) + log.info("%-12s Downloading file: %s to %s", name, repomd_url, archive) response = requests.get(repomd_url, verify=True) - with open(archive, 'wb') as stream: + with open(archive, "wb") as stream: stream.write(response.content) def decompress_db(name, archive, location): - ''' Decompress the given archive at the specified location. ''' - log.info('%-12s Extracting %s to %s', name, archive, location) - if archive.endswith('.xz'): + """ Decompress the given archive at the specified location. """ + log.info("%-12s Extracting %s to %s", name, archive, location) + if archive.endswith(".xz"): import lzma + with contextlib.closing(lzma.LZMAFile(archive)) as stream_xz: data = stream_xz.read() - with open(location, 'wb') as stream: + with open(location, "wb") as stream: stream.write(data) - elif archive.endswith('.tar.gz'): + elif archive.endswith(".tar.gz"): import tarfile + with tarfile.open(archive) as tar: tar.extractall(path=location) - elif archive.endswith('.gz'): + elif archive.endswith(".gz"): import gzip - with open(location, 'wb') as out: - with gzip.open(archive, 'rb') as inp: + + with open(location, "wb") as out: + with gzip.open(archive, "rb") as inp: out.write(inp.read()) - elif archive.endswith('.bz2'): + elif archive.endswith(".bz2"): import bz2 - with open(location, 'wb') as out: + + with open(location, "wb") as out: bzar = bz2.BZ2File(archive) out.write(bzar.read()) bzar.close() @@ -68,9 +72,9 @@ def decompress_db(name, archive, location): def needs_update(local_file, remote_sha, sha_type): - ''' Compare hash of a local and remote file. + """ Compare hash of a local and remote file. Return True if our local file needs to be updated. - ''' + """ if not os.path.isfile(local_file): # If we have never downloaded this before, then obviously it has @@ -78,11 +82,11 @@ def needs_update(local_file, remote_sha, sha_type): return True # Old epel5 doesn't even know which sha it is using... - if sha_type == 'sha': - sha_type = 'sha1' + if sha_type == "sha": + sha_type = "sha1" hashobj = getattr(hashlib, sha_type)() - with open(local_file, 'rb') as f: + with open(local_file, "rb") as f: hashobj.update(f.read()) local_sha = hashobj.hexdigest() @@ -93,24 +97,24 @@ def needs_update(local_file, remote_sha, sha_type): def get_primary_xml(destfolder, url, name): - ''' Retrieve the repo metadata at the given url and store them using + """ Retrieve the repo metadata at the given url and store them using the provided name. - ''' - repomd_url = url + '/repomd.xml' + """ + repomd_url = url + "/repomd.xml" response = requests.get(repomd_url, verify=True) if not bool(response): - log.warning('%-12s !! Failed to get %s %s', name, repomd_url, response) + log.warning("%-12s !! Failed to get %s %s", name, repomd_url, response) return try: root = etree.fromstring(response.text) except ParseError: - log.warning('%-12s !! Failed to parse %s %s', name, repomd_url, response) + log.warning("%-12s !! Failed to parse %s %s", name, repomd_url, response) return data_nodes = list(root.findall('repo:data[@type="primary"]', repomd_xml_namespace)) if not data_nodes: - log.debug('No primary.xml could be found in %s', url) + log.debug("No primary.xml could be found in %s", url) return elif len(data_nodes) > 1: log.debug("More than one primary.xml could be found in %s", url) @@ -118,21 +122,21 @@ def get_primary_xml(destfolder, url, name): primary_node = data_nodes[0] - location_node = primary_node.find('repo:location', repomd_xml_namespace) - if location_node is None or 'href' not in location_node.attrib: - log.debug('No valid location found for primary.xml in %s', url) + location_node = primary_node.find("repo:location", repomd_xml_namespace) + if location_node is None or "href" not in location_node.attrib: + log.debug("No valid location found for primary.xml in %s", url) return - cksuminfo_node = primary_node.find('repo:open-checksum', repomd_xml_namespace) - if cksuminfo_node is None or 'type' not in cksuminfo_node.attrib: - log.debug('No valid checksum information found for primary.xml in %s', url) + cksuminfo_node = primary_node.find("repo:open-checksum", repomd_xml_namespace) + if cksuminfo_node is None or "type" not in cksuminfo_node.attrib: + log.debug("No valid checksum information found for primary.xml in %s", url) return - filename = location_node.attrib['href'].replace('repodata/', '') + filename = location_node.attrib["href"].replace("repodata/", "") hash_digest = cksuminfo_node.text - hash_type = cksuminfo_node.attrib['type'] + hash_type = cksuminfo_node.attrib["type"] - repomd_url = url + '/' + filename + repomd_url = url + "/" + filename # First, determine if the file has changed by comparing hash db = "distgit-bugzilla-sync-primary.xml" @@ -140,7 +144,7 @@ def get_primary_xml(destfolder, url, name): # Have we downloaded this before? Did it change? destfile = os.path.join(destfolder, db) if not needs_update(destfile, hash_digest, hash_type): - log.debug('%s No change of %s', name.ljust(12), repomd_url) + log.debug("%s No change of %s", name.ljust(12), repomd_url) else: # If it has changed, then download it and move it into place. archive = os.path.join(destfolder, filename) @@ -157,12 +161,10 @@ def get_package_summaries(): start = time.time() primary_xml = get_primary_xml( - "/var/tmp", - KOJI_REPO + 'rawhide/latest/x86_64/repodata', - "koji", + "/var/tmp", KOJI_REPO + "rawhide/latest/x86_64/repodata", "koji", ) - context = etree.iterparse(primary_xml, events=('start', 'end')) + context = etree.iterparse(primary_xml, events=("start", "end")) root = None @@ -172,9 +174,13 @@ def get_package_summaries(): root = elem continue - if event == 'end' and elem.tag == 'package' and elem.get('type', 'rpm') == 'rpm': - name = elem.findtext('name') - summary = elem.findtext('summary') + if ( + event == "end" + and elem.tag == "package" + and elem.get("type", "rpm") == "rpm" + ): + name = elem.findtext("name") + summary = elem.findtext("summary") if name is not None and summary is not None: summaries[name] = summary # remove package child from root element to keep memory consumption low diff --git a/distgit_bugzilla_sync/script.py b/distgit_bugzilla_sync/script.py index 5fabcec..3e5d5f6 100644 --- a/distgit_bugzilla_sync/script.py +++ b/distgit_bugzilla_sync/script.py @@ -22,12 +22,12 @@ # Author(s): Ralph Bean # -''' +""" sync information from the Pagure into bugzilla This ... script takes information about package onwership and imports it into bugzilla. -''' +""" import argparse import collections @@ -52,15 +52,13 @@ from fedora.client.fas2 import AccountSystem import requests from requests.adapters import HTTPAdapter from urllib3.util import Retry -import yaml from . import package_summaries from .config import config, email_overrides, load_configuration cache = dogpile.cache.make_region().configure( - 'dogpile.cache.memory', - expiration_time=3600, + "dogpile.cache.memory", expiration_time=3600, ) @@ -74,8 +72,8 @@ def retry_session(): status_forcelist=(500, 502, 504), ) adapter = HTTPAdapter(max_retries=retry) - session.mount('http://', adapter) - session.mount('https://', adapter) + session.mount("http://", adapter) + session.mount("https://", adapter) return session @@ -89,6 +87,7 @@ def resilient_partial(fn, *initial, **kwargs): want to know about it, but we don't want it to kill the whole program. So catch its error, log it, but proceed. """ + def wrapper(*additional): try: full = initial + additional @@ -96,34 +95,33 @@ def resilient_partial(fn, *initial, **kwargs): except Exception: traceback.print_exc() return None + wrapper.__name__ = fn.__name__ wrapper.__doc__ = fn.__doc__ return wrapper class DataChangedError(Exception): - '''Raised when data we are manipulating changes while we're modifying it.''' + """Raised when data we are manipulating changes while we're modifying it.""" + pass def segment(iterable, chunk, fill=None): - '''Collect data into `chunk` sized block''' + """Collect data into `chunk` sized block""" args = [iter(iterable)] * chunk return itertools.zip_longest(*args, fillvalue=fill) class BugzillaProxy: - - def __init__(self, bz_server, username, password, config, - pre_cache_users=True): + def __init__(self, bz_server, username, password, config, pre_cache_users=True): self.bz_xmlrpc_server = bz_server self.username = username self.password = password self.server = Bugzilla( - url=self.bz_xmlrpc_server, - user=self.username, - password=self.password) + url=self.bz_xmlrpc_server, user=self.username, password=self.password + ) self.product_cache = {} self.user_cache = {} @@ -132,22 +130,23 @@ class BugzillaProxy: # Connect to the fedora account system self.fas = AccountSystem( - base_url=config['fas']['url'], - username=config['fas']['username'], - password=config['fas']['password']) + base_url=config["fas"]["url"], + username=config["fas"]["username"], + password=config["fas"]["password"], + ) self.config = config if not pre_cache_users: return - if config['verbose']: + if config["verbose"]: print("Pre-caching FAS users and their Bugzilla email addresses.") try: self.user_cache = self.fas.people_by_key( - key='username', - fields=['bugzilla_email']) + key="username", fields=["bugzilla_email"] + ) except fedora.client.ServerError: # Sometimes, building the user cache up front fails with a timeout. # It's ok, we build the cache as-needed later in the script. @@ -158,33 +157,36 @@ class BugzillaProxy: def build_product_cache(self, pagure_projects): """ Cache the bugzilla info about each package in each product. """ + def _query_component(query, num_attempts=5): for i in range(num_attempts): try: - raw_data = self.server._proxy.Component.get({'names': query}) + raw_data = self.server._proxy.Component.get({"names": query}) break except Exception as e: if i >= num_attempts - 1: raise - if self.config['verbose']: + if self.config["verbose"]: print(f" ERROR {e}") print(" - Query failed, going to try again in 20 seconds") # Wait 20 seconds and try again time.sleep(20) return raw_data - if self.config['bugzilla']['compat_api'] == 'getcomponentsdetails': + if self.config["bugzilla"]["compat_api"] == "getcomponentsdetails": # Old API -- in python-bugzilla. But with current server, this # gives ProxyError for collection, product in self.config["products"].items(): - bz_product_name = product.get('bz_product_name', collection) - self.product_cache[collection] = self.server.getcomponentsdetails(bz_product_name) - elif self.config['bugzilla']['compat_api'] == 'component.get': + bz_product_name = product.get("bz_product_name", collection) + self.product_cache[collection] = self.server.getcomponentsdetails( + bz_product_name + ) + elif self.config["bugzilla"]["compat_api"] == "component.get": # Way that's undocumented in the partner-bugzilla api but works # currently - chunk = self.config['bugzilla']['req_segment'] + chunk = self.config["bugzilla"]["req_segment"] for collection, product in self.config["products"].items(): - bz_product_name = product.get('bz_product_name', collection) + bz_product_name = product.get("bz_product_name", collection) # restrict the list of info returned to only the packages of # interest pkglist = [ @@ -198,26 +200,27 @@ class BugzillaProxy: # Format that bugzilla will understand. Strip None's that # segment() pads out the final data segment() with query = [ - {'product': bz_product_name, 'component': p} + {"product": bz_product_name, "component": p} for p in pkg_segment if p is not None ] - if self.config['verbose']: + if self.config["verbose"]: print( f" - Querying product `{bz_product_name}`, " - f"query {cnt} of {estimated_size}") + f"query {cnt} of {estimated_size}" + ) raw_data = _query_component(query) - for package in raw_data['components']: + for package in raw_data["components"]: # Reformat data to be the same as what's returned from # getcomponentsdetails product_info = { - 'initialowner': package['default_assignee'], - 'description': package['description'], - 'initialqacontact': package['default_qa_contact'], - 'initialcclist': package['default_cc'], - 'is_active': package['is_active'], + "initialowner": package["default_assignee"], + "description": package["description"], + "initialqacontact": package["default_qa_contact"], + "initialcclist": package["default_cc"], + "is_active": package["is_active"], } - product_info_by_pkg[package['name'].lower()] = product_info + product_info_by_pkg[package["name"].lower()] = product_info self.product_cache[collection] = product_info_by_pkg def invert_user_cache(self): @@ -225,53 +228,60 @@ class BugzillaProxy: that the bugzilla_email is the key and the username the value. """ for username in self.user_cache: - bz_email = self.user_cache[username]['bugzilla_email'].lower() + bz_email = self.user_cache[username]["bugzilla_email"].lower() self.inverted_user_cache[bz_email] = username def _get_bugzilla_email(self, username): - '''Return the bugzilla email address for a user. + """Return the bugzilla email address for a user. First looks in a cache for a username => bugzilla email. If not found, reloads the cache from fas and tries again. - ''' + """ try: - bz_email = self.user_cache[username]['bugzilla_email'].lower() + bz_email = self.user_cache[username]["bugzilla_email"].lower() except KeyError: - if username.startswith('@'): + if username.startswith("@"): group = self.fas.group_by_name(username[1:]) bz_email = group.mailing_list if bz_email is None: return - self.user_cache[username] = { - 'bugzilla_email': bz_email} + self.user_cache[username] = {"bugzilla_email": bz_email} else: person = self.fas.person_by_username(username) - bz_email = person.get('bugzilla_email', None) + bz_email = person.get("bugzilla_email", None) if bz_email is None: return - self.user_cache[username] = {'bugzilla_email': bz_email} + self.user_cache[username] = {"bugzilla_email": bz_email} bz_email = bz_email.lower() self.inverted_user_cache[bz_email] = username bz_email = email_overrides.get(bz_email, bz_email) return bz_email def update_open_bugs(self, new_poc, prev_poc, product, name, print_fas_names=False): - '''Change the package owner + """Change the package owner :arg new_poc: email of the new point of contact. :arg prev_poc: Username of the previous point of contact :arg product: The product of the package to change in bugzilla :arg name: Name of the package to change the owner. :kwarg print_fas_names: Boolean specifying wether to print email or FAS names (if these could be found). - ''' + """ bz_query = {} - bz_query['product'] = product - bz_query['component'] = name - bz_query['bug_status'] = [ - 'NEW', 'ASSIGNED', 'ON_DEV', 'ON_QA', 'MODIFIED', 'POST', - 'FAILS_QA', 'PASSES_QA', 'RELEASE_PENDING'] + bz_query["product"] = product + bz_query["component"] = name + bz_query["bug_status"] = [ + "NEW", + "ASSIGNED", + "ON_DEV", + "ON_QA", + "MODIFIED", + "POST", + "FAILS_QA", + "PASSES_QA", + "RELEASE_PENDING", + ] # Update only maintained releases - bz_query['version'] = self.config["products"][product]["versions"] + bz_query["version"] = self.config["products"][product]["versions"] def _query_bz(query, num_attempts=5): for i in range(num_attempts): @@ -281,7 +291,7 @@ class BugzillaProxy: except Exception as e: if i >= num_attempts - 1: raise - if self.config['verbose']: + if self.config["verbose"]: print(f" ERROR {e}") print(" - Query failed, going to try again in 20 seconds") # Wait 20 seconds and try again @@ -298,34 +308,42 @@ class BugzillaProxy: if old_poc in self.inverted_user_cache: old_poc = self.inverted_user_cache[old_poc] else: - old_poc = old_poc.split('@', 1)[0] + "@..." + old_poc = old_poc.split("@", 1)[0] + "@..." if new_poc in self.inverted_user_cache: new_poc = self.inverted_user_cache[new_poc] else: - new_poc = new_poc.split('@', 1)[0] + "@..." + new_poc = new_poc.split("@", 1)[0] + "@..." print( - f'[UPDATEBUG] {product}/{name} reassigning bug #{bug.bug_id} ' - f'from {old_poc} to {new_poc}' + f"[UPDATEBUG] {product}/{name} reassigning bug #{bug.bug_id} " + f"from {old_poc} to {new_poc}" ) if not self.config["dryrun"]: try: bug.setassignee( assigned_to=new_poc, - comment=self.config['bz_maintainer_change_comment'], + comment=self.config["bz_maintainer_change_comment"], ) except xmlrpc.client.Fault as e: # Output something useful in args e.args = (new_poc, e.faultCode, e.faultString) raise except xmlrpc.client.ProtocolError as e: - e.args = ('ProtocolError', e.errcode, e.errmsg) + e.args = ("ProtocolError", e.errcode, e.errmsg) raise - def add_edit_component(self, package, collection, owner, description=None, - qacontact=None, cclist=None, print_fas_names=False, - retired=False): - '''Add or update a component to have the values specified. - ''' + def add_edit_component( + self, + package, + collection, + owner, + description=None, + qacontact=None, + cclist=None, + print_fas_names=False, + retired=False, + ): + """Add or update a component to have the values specified. + """ # Turn the cclist into something usable by bugzilla initial_cc_emails = [] initial_cc_emails_lower = [] @@ -342,8 +360,10 @@ class BugzillaProxy: f"({collection}/{package})" ) if self.config["verbose"]: - print(f"** {watcher} has no bugzilla_email or mailing_list set " - f"({collection}/{package}) **") + print( + f"** {watcher} has no bugzilla_email or mailing_list set " + f"({collection}/{package}) **" + ) # Add owner to the cclist so comaintainers taking over a bug don't # have to do this manually @@ -361,14 +381,18 @@ class BugzillaProxy: e.args = (e.faultCode, e.faultString) raise except xmlrpc.client.ProtocolError as e: - e.args = ('ProtocolError', e.errcode, e.errmsg) + e.args = ("ProtocolError", e.errcode, e.errmsg) raise - bz_product_name = self.config['products'][collection].get('bz_product_name', collection) + bz_product_name = self.config["products"][collection].get( + "bz_product_name", collection + ) # Set the qacontact_email and name - default_qa_contact_email = self.config['default_qa_contact_email'] - default_qa_contact = f"" + default_qa_contact_email = self.config["default_qa_contact_email"] + default_qa_contact = ( + f"" + ) if qacontact: qacontact_email = self._get_bugzilla_email(qacontact) else: @@ -381,33 +405,43 @@ class BugzillaProxy: data = {} # Check for changes to the owner, qacontact, or description - if product[pkg_key]['initialowner'].lower() != owner_email.lower(): - data['initialowner'] = owner_email + if product[pkg_key]["initialowner"].lower() != owner_email.lower(): + data["initialowner"] = owner_email - if description and product[pkg_key]['description'] != description: - data['description'] = description + if description and product[pkg_key]["description"] != description: + data["description"] = description - if qacontact and product[pkg_key]['initialqacontact'].lower() != qacontact_email.lower(): - data['initialqacontact'] = qacontact_email + if ( + qacontact + and product[pkg_key]["initialqacontact"].lower() + != qacontact_email.lower() + ): + data["initialqacontact"] = qacontact_email - if len(product[pkg_key]['initialcclist']) != len(initial_cc_emails): - data['initialcclist'] = initial_cc_emails + if len(product[pkg_key]["initialcclist"]) != len(initial_cc_emails): + data["initialcclist"] = initial_cc_emails else: - for cc_member in product[pkg_key]['initialcclist']: + for cc_member in product[pkg_key]["initialcclist"]: if cc_member.lower() not in initial_cc_emails_lower: - data['initialcclist'] = initial_cc_emails + data["initialcclist"] = initial_cc_emails break - if product[pkg_key]['is_active'] != (not retired): - data['is_active'] = not retired + if product[pkg_key]["is_active"] != (not retired): + data["is_active"] = not retired if data: # Changes occurred. Submit a request to change via xmlrpc - data['product'] = bz_product_name - data['component'] = package + data["product"] = bz_product_name + data["component"] = package if self.config["verbose"]: - for key in ["initialowner", "description", "initialqacontact", "initialcclist", "is_active"]: + for key in [ + "initialowner", + "description", + "initialqacontact", + "initialcclist", + "is_active", + ]: if data.get(key) is not None: old_value = product[pkg_key][key] @@ -417,12 +451,14 @@ class BugzillaProxy: if isinstance(new_value, list): new_value = sorted(new_value) - if print_fas_names and key in ('initialowner', - 'initialqacontact', - 'initialcclist'): - if key == 'initialowner': + if print_fas_names and key in ( + "initialowner", + "initialqacontact", + "initialcclist", + ): + if key == "initialowner": new_value = owner - elif key == 'initialqacontact': + elif key == "initialqacontact": new_value = qacontact else: new_value = sorted(initial_cc_fasnames) @@ -430,27 +466,35 @@ class BugzillaProxy: from_fas_names = [] for email in product[pkg_key][key]: if email in self.inverted_user_cache: - from_fas_names.append(self.inverted_user_cache[email]) + from_fas_names.append( + self.inverted_user_cache[email] + ) elif email == default_qa_contact_email: from_fas_names.append(default_qa_contact) if from_fas_names: if len(from_fas_names) < len(product[pkg_key][key]): - x = len(product[pkg_key][key]) - len(from_fas_names) + x = len(product[pkg_key][key]) - len( + from_fas_names + ) from_fas_names.append(f"And {x} more") old_value = f"from `{from_fas_names}`" else: old_value = "" - print(f"[EDITCOMP] {data['product']}/{data['component']}" - f" {key} changed {old_value} to FAS name(s) `{new_value}`") + print( + f"[EDITCOMP] {data['product']}/{data['component']}" + f" {key} changed {old_value} to FAS name(s) `{new_value}`" + ) else: - print(f"[EDITCOMP] {data['product']}/{data['component']}" - f" {key} changed from `{old_value}` to `{new_value}`") + print( + f"[EDITCOMP] {data['product']}/{data['component']}" + f" {key} changed from `{old_value}` to `{new_value}`" + ) owner_changed = "initialowner" in data # FIXME: initialowner has been made mandatory for some # reason. Asking dkl why. - data['initialowner'] = owner_email + data["initialowner"] = owner_email def edit_component(data, num_attempts=5): for i in range(num_attempts): @@ -458,15 +502,20 @@ class BugzillaProxy: self.server.editcomponent(data) break except Exception as e: - if isinstance(e, xmlrpc.client.Fault) and e.faultCode == 504: - if self.config['verbose']: + if ( + isinstance(e, xmlrpc.client.Fault) + and e.faultCode == 504 + ): + if self.config["verbose"]: print(f" ERROR {e}") raise if i >= num_attempts - 1: raise - if self.config['verbose']: + if self.config["verbose"]: print(f" ERROR {e}") - print(" - Query failed, going to try again in 20 seconds") + print( + " - Query failed, going to try again in 20 seconds" + ) # Wait 20 seconds and try again time.sleep(20) @@ -478,13 +527,13 @@ class BugzillaProxy: e.args = (data, e.faultCode, e.faultString) raise except xmlrpc.client.ProtocolError as e: - e.args = ('ProtocolError', e.errcode, e.errmsg) + e.args = ("ProtocolError", e.errcode, e.errmsg) raise if owner_changed: self.update_open_bugs( new_poc=owner_email, - prev_poc=product[pkg_key]['initialowner'], + prev_poc=product[pkg_key]["initialowner"], name=package, product=bz_product_name, print_fas_names=print_fas_names, @@ -494,39 +543,50 @@ class BugzillaProxy: print(f"[NOCHANGE] {package}/{bz_product_name}") else: if retired: - if self.config['verbose']: + if self.config["verbose"]: print(f"[NOADD] {bz_product_name}/{package} (is retired)") return # Add component data = { - 'product': bz_product_name, - 'component': package, - 'description': description or 'NA', - 'initialowner': owner_email, - 'initialqacontact': qacontact_email, - 'is_active': not retired, + "product": bz_product_name, + "component": package, + "description": description or "NA", + "initialowner": owner_email, + "initialqacontact": qacontact_email, + "is_active": not retired, } if initial_cc_emails: - data['initialcclist'] = initial_cc_emails + data["initialcclist"] = initial_cc_emails if self.config["verbose"]: - for key in ["initialowner", "description", "initialqacontact", - "initialcclist", "is_active"]: - if print_fas_names and key in ('initialowner', - 'initialqacontact', - 'initialcclist'): - if key == 'initialowner': + for key in [ + "initialowner", + "description", + "initialqacontact", + "initialcclist", + "is_active", + ]: + if print_fas_names and key in ( + "initialowner", + "initialqacontact", + "initialcclist", + ): + if key == "initialowner": value = owner - elif key == 'initialqacontact': + elif key == "initialqacontact": value = qacontact else: value = initial_cc_fasnames - print(f"[ADDCOMP] {bz_product_name}/{package}" - f" {key} set to FAS name(s) `{value}`") + print( + f"[ADDCOMP] {bz_product_name}/{package}" + f" {key} set to FAS name(s) `{value}`" + ) else: - print(f"[ADDCOMP] {bz_product_name}/{package}" - f" {key} set to {data.get(key)}") + print( + f"[ADDCOMP] {bz_product_name}/{package}" + f" {key} set to {data.get(key)}" + ) def add_component(data, num_attempts=5): for i in range(num_attempts): @@ -534,17 +594,22 @@ class BugzillaProxy: self.server.addcomponent(data) break except Exception as e: - if isinstance(e, xmlrpc.client.Fault) and e.faultCode in [504, 1200]: + if isinstance(e, xmlrpc.client.Fault) and e.faultCode in [ + 504, + 1200, + ]: # error 504: user is not a valid username # error 1200: Product has already a component named - if self.config['verbose']: + if self.config["verbose"]: print(f" ERROR {e}") raise if i >= num_attempts - 1: raise - if self.config['verbose']: + if self.config["verbose"]: print(f" ERROR {e}") - print(" - Query failed, going to try again in 20 seconds") + print( + " - Query failed, going to try again in 20 seconds" + ) # Wait 20 seconds and try again time.sleep(20) @@ -563,31 +628,34 @@ def _get_pdc_branches(session, repo): :param repo: the project dict :return: a list of the repo's branches """ - branches_url = '{0}component-branches/'.format(env['pdc_url']) + branches_url = "{0}component-branches/".format(env["pdc_url"]) params = { - 'global_component': repo['name'], - 'type': env['pdc_types'][repo['namespace']], + "global_component": repo["name"], + "type": env["pdc_types"][repo["namespace"]], } if config["verbose"]: - print('Querying {0} {1}'.format(branches_url, params)) + print("Querying {0} {1}".format(branches_url, params)) rv = session.get(branches_url, params=params, timeout=60) # If the project's branches can't be reported, just return no branches and # it will be skipped later on if not rv.ok: - print(('The connection to "{0}" failed with the status code {1} and ' - 'output "{2}"'.format(branches_url, rv.status_code, rv.text)), - file=sys.stderr) + print( + ( + 'The connection to "{0}" failed with the status code {1} and ' + 'output "{2}"'.format(branches_url, rv.status_code, rv.text) + ), + file=sys.stderr, + ) return [] data = rv.json() - return [branch['name'] for branch in data['results']] + return [branch["name"] for branch in data["results"]] class ScriptExecError(RuntimeError): - def __init__(self, *args, **kwargs): - self.errorcode = kwargs.pop('errorcode', 1) + self.errorcode = kwargs.pop("errorcode", 1) super().__init__(*args, **kwargs) @@ -599,51 +667,57 @@ class DistgitBugzillaSync: _branch_regex_to_product = None def send_email(self, from_address, to_address, subject, message, cc_address=None): - '''Send an email if there's an error. + """Send an email if there's an error. This will be replaced by sending messages to a log later. - ''' - if not self.env['email']['send_mails']: + """ + if not self.env["email"]["send_mails"]: return msg = EmailMessage() - msg.add_header('To', ','.join(to_address)) - msg.add_header('From', from_address) - msg.add_header('Subject', subject) + msg.add_header("To", ",".join(to_address)) + msg.add_header("From", from_address) + msg.add_header("Subject", subject) if cc_address is not None: - msg.add_header('Cc', ','.join(cc_address)) + msg.add_header("Cc", ",".join(cc_address)) to_address += cc_address msg.set_payload(message) - smtp = smtplib.SMTP(self.env['email']['smtp_host']) + smtp = smtplib.SMTP(self.env["email"]["smtp_host"]) smtp.sendmail(from_address, to_address, msg.as_string()) smtp.quit() def notify_users(self, errors): - ''' Browse the list of errors and when we can retrieve the email + """ Browse the list of errors and when we can retrieve the email address, use it to notify the user about the issue. - ''' + """ data = {} - if os.path.exists(self.env['data_cache']): + if os.path.exists(self.env["data_cache"]): try: - with open(self.env['data_cache']) as stream: + with open(self.env["data_cache"]) as stream: data = json.load(stream) except Exception as err: - print('Could not read the json file at %s: \nError: %s' % ( - env['data_cache'], err)) + print( + "Could not read the json file at %s: \nError: %s" + % (env["data_cache"], err) + ) new_data = {} seen = [] for error in errors: notify_user = False - if 'The name ' in error and ' is not a valid username' in error: - user_email = error.split(' is not a valid username')[0].split( - 'The name ')[1].strip() + if "The name " in error and " is not a valid username" in error: + user_email = ( + error.split(" is not a valid username")[0] + .split("The name ")[1] + .strip() + ) now = datetime.datetime.utcnow() # See if we already know about this user - if user_email in data and data[user_email]['last_update']: + if user_email in data and data[user_email]["last_update"]: last_update = datetime.datetime.fromtimestamp( - int(data[user_email]['last_update'])) + int(data[user_email]["last_update"]) + ) # Only notify users once per hour if (now - last_update).seconds >= 3600: notify_user = True @@ -661,63 +735,94 @@ class DistgitBugzillaSync: if notify_user: self.send_email( - self.env['email']['from'], + self.env["email"]["from"], [user_email], - subject='Please fix your bugzilla.redhat.com account', - message=self.env['email']['templates']['user_notification'], - cc_address=self.env['email']['notify_admins'], + subject="Please fix your bugzilla.redhat.com account", + message=self.env["email"]["templates"]["user_notification"], + cc_address=self.env["email"]["notify_admins"], ) - new_data[user_email] = { - 'last_update': time.mktime(now.timetuple()) - } + new_data[user_email] = {"last_update": time.mktime(now.timetuple())} - with open(env['data_cache'], 'w') as stream: + with open(env["data_cache"], "w") as stream: json.dump(new_data, stream) def get_cli_arguments(self): """ Set the CLI argument parser and return the argument parsed. """ parser = argparse.ArgumentParser( - description='Script syncing information between Pagure and bugzilla' + description="Script syncing information between Pagure and bugzilla" ) parser.add_argument( - '--dry-run', dest='dryrun', action='store_true', default=False, - help='Do not actually make any changes - Overrides the configuration') + "--dry-run", + dest="dryrun", + action="store_true", + default=False, + help="Do not actually make any changes - Overrides the configuration", + ) parser.add_argument( - '--verbose', dest='verbose', action='store_true', default=False, - help='Print actions verbosely - Overrides the configuration') + "--verbose", + dest="verbose", + action="store_true", + default=False, + help="Print actions verbosely - Overrides the configuration", + ) parser.add_argument( - '--debug', dest='debug', action='store_true', default=False, - help='Combination of --verbose and --dry-run') + "--debug", + dest="debug", + action="store_true", + default=False, + help="Combination of --verbose and --dry-run", + ) parser.add_argument( - '--env', dest='env', - help='Run the script for a specific environment, overrides the one ' - 'set in the configuration file') + "--env", + dest="env", + help="Run the script for a specific environment, overrides the one " + "set in the configuration file", + ) parser.add_argument( - '--add-config-file', metavar='CONFIG_FILE', - dest='addl_config_files', action='append', - help="File(s) from which to read overriding configuration") + "--add-config-file", + metavar="CONFIG_FILE", + dest="addl_config_files", + action="append", + help="File(s) from which to read overriding configuration", + ) parser.add_argument( - '--add-email-overrides-file', metavar='EMAIL_OVERRIDES_FILE', - dest='addl_email_overrides_files', action='append', - help="File(s) from which to read additional email overrides") + "--add-email-overrides-file", + metavar="EMAIL_OVERRIDES_FILE", + dest="addl_email_overrides_files", + action="append", + help="File(s) from which to read additional email overrides", + ) parser.add_argument( - '-p', '--project', dest='projects', nargs='+', - help='Update one or more projects (provided as namespace/name), ' - 'in all of its products') + "-p", + "--project", + dest="projects", + nargs="+", + help="Update one or more projects (provided as namespace/name), " + "in all of its products", + ) parser.add_argument( - '--print-fas-names', action='store_true', default=False, + "--print-fas-names", + action="store_true", + default=False, help="Print FAS names rather than email addresses in output, useful when pasting into " - "public fora") + "public fora", + ) parser.add_argument( - '--print-no-change', action='store_true', default=False, - help="Print elements that are not being changed as they are checked") + "--print-no-change", + action="store_true", + default=False, + help="Print elements that are not being changed as they are checked", + ) parser.add_argument( - '--no-user-notifications', dest="user_notifications", action='store_false', + "--no-user-notifications", + dest="user_notifications", + action="store_false", default=True, help="Do not notify every packager whose account is wrongly set-up, but do send the " - "overall report to the admins") + "overall report to the admins", + ) self.args = parser.parse_args() @@ -733,12 +838,12 @@ class DistgitBugzillaSync: # Get the initial ownership and CC data from pagure # This part is easy. - poc_url = self.env['distgit_url'] + '/extras/pagure_poc.json' + poc_url = self.env["distgit_url"] + "/extras/pagure_poc.json" if self.env["verbose"]: print("Querying %r for points of contact." % poc_url) pagure_namespace_to_poc = self.session.get(poc_url, timeout=120).json() - cc_url = self.env['distgit_url'] + '/extras/pagure_bz.json' + cc_url = self.env["distgit_url"] + "/extras/pagure_bz.json" if self.env["verbose"]: print("Querying %r for initial cc list." % cc_url) pagure_namespace_to_cc = self.session.get(cc_url, timeout=120).json() @@ -750,20 +855,23 @@ class DistgitBugzillaSync: for namespace, entries in pagure_namespace_to_poc.items(): for name, poc in entries.items(): if not project_list or (namespace, name) in project_list: - self.pagure_projects.append({ - 'namespace': namespace, - 'name': name, - 'poc': poc['fedora'], - 'epelpoc': poc['epel'], - 'watchers': pagure_namespace_to_cc[namespace][name], - }) + self.pagure_projects.append( + { + "namespace": namespace, + "name": name, + "poc": poc["fedora"], + "epelpoc": poc["epel"], + "watchers": pagure_namespace_to_cc[namespace][name], + } + ) @property def namespace_to_product(self): if self._namespace_to_product is None: self._namespace_to_product = { - p['namespace']: n - for n, p in self.env['products'].items() if 'namespace' in p + p["namespace"]: n + for n, p in self.env["products"].items() + if "namespace" in p } return self._namespace_to_product @@ -771,19 +879,22 @@ class DistgitBugzillaSync: def product_to_branch_regex(self): if self._product_to_branch_regex is None: self._product_to_branch_regex = { - n: re.compile(p['branch_regex']) - for n, p in self.env['products'].items() if 'branch_regex' in p + n: re.compile(p["branch_regex"]) + for n, p in self.env["products"].items() + if "branch_regex" in p } return self._product_to_branch_regex @property def branch_regex_to_product(self): if self._branch_regex_to_product is None: - self._branch_regex_to_product = {n: r for r, n in self.product_to_branch_regex.items()} + self._branch_regex_to_product = { + n: r for r, n in self.product_to_branch_regex.items() + } return self._branch_regex_to_product def _is_retired(self, product, project): - branches = project['branches'] + branches = project["branches"] branch_regex = self.product_to_branch_regex.get(product) if branch_regex: for branch, active in branches: @@ -808,10 +919,12 @@ class DistgitBugzillaSync: The summaries are coming from the primary.xml file of the Rawhide repodata in Koji. """ - branches_url = "/".join([ - self.env['pdc_url'].split('rest_api')[0].rstrip("/"), - 'extras/active_branches.json', - ]) + branches_url = "/".join( + [ + self.env["pdc_url"].split("rest_api")[0].rstrip("/"), + "extras/active_branches.json", + ] + ) if self.env["verbose"]: print("Querying %r for EOL information." % branches_url) @@ -824,32 +937,35 @@ class DistgitBugzillaSync: project["summary"] = summary # Branches - if project['namespace'] not in self.env['pdc_types']: - project['branches'] = [] - project['products'] = [] + if project["namespace"] not in self.env["pdc_types"]: + project["branches"] = [] + project["products"] = [] self.errors["configuration"].append( f'Namespace `{project["namespace"]}` not found in the pdc_type ' f'configuration key, project {project["namespace"]}/{project["name"]} ' - 'ignored' + "ignored" ) continue - pdc_type = self.env['pdc_types'][project['namespace']] - project['branches'] = pdc_branches.get(pdc_type, {}).get(project['name'], []) - if not project['branches']: + pdc_type = self.env["pdc_types"][project["namespace"]] + project["branches"] = pdc_branches.get(pdc_type, {}).get( + project["name"], [] + ) + if not project["branches"]: self.errors["PDC"].append( - f"No PDC branch found for {project['namespace']}/{project['name']}") + f"No PDC branch found for {project['namespace']}/{project['name']}" + ) # Products products = set() - for branch, active in project.get('branches'): + for branch, active in project.get("branches"): for regex, product in self.branch_regex_to_product.items(): if regex.match(branch): products.add(product) break else: - products.add(self.namespace_to_product[project['namespace']]) - project['products'] = list(products) + products.add(self.namespace_to_product[project["namespace"]]) + project["products"] = list(products) products_poc = {} products_retired = {} @@ -857,7 +973,7 @@ class DistgitBugzillaSync: owner = project["poc"] # Check if the project is retired in PDC, and if so set assignee to orphan. if self._is_retired(product, project): - owner = 'orphan' + owner = "orphan" products_retired[product] = True else: products_retired[product] = False @@ -868,12 +984,11 @@ class DistgitBugzillaSync: else: products_poc[product] = owner - project['products_poc'] = products_poc + project["products_poc"] = products_poc project["products_retired"] = products_retired self.pagure_projects[idx] = project - @classmethod def main(cls): """The entrypoint for running the script.""" @@ -895,18 +1010,20 @@ class DistgitBugzillaSync: self.get_cli_arguments() - load_configuration(addl_config_files=self.args.addl_config_files, - addl_email_overrides_files=self.args.addl_email_overrides_files) + load_configuration( + addl_config_files=self.args.addl_config_files, + addl_email_overrides_files=self.args.addl_email_overrides_files, + ) self.config = config - envname = self.config['environment'] + envname = self.config["environment"] if self.args.env: - if self.args.env in self.config['environments']: + if self.args.env in self.config["environments"]: envname = self.args.env else: raise ScriptExecError(f"Invalid environment specified: {self.args.env}") - self.env = self.config['environments'][envname] + self.env = self.config["environments"][envname] if self.args.debug: self.env["verbose"] = True @@ -942,9 +1059,9 @@ class DistgitBugzillaSync: # Initialize the connection to bugzilla bugzilla = BugzillaProxy( - bz_server=self.env['bugzilla']['url'], - username=self.env['bugzilla']['user'], - password=self.env['bugzilla']['password'], + bz_server=self.env["bugzilla"]["url"], + username=self.env["bugzilla"]["user"], + password=self.env["bugzilla"]["password"], config=self.env, pre_cache_users=not self.args.projects or self.args.print_fas_names, ) @@ -962,9 +1079,9 @@ class DistgitBugzillaSync: else: print("Updating bugzilla") - for project in sorted(self.pagure_projects, key=itemgetter('name')): + for project in sorted(self.pagure_projects, key=itemgetter("name")): for product in project["products"]: - if product not in self.env['products']: + if product not in self.env["products"]: if self.env["verbose"]: print(f"Ignoring: {product}/{project['name']}") continue @@ -974,9 +1091,9 @@ class DistgitBugzillaSync: package=project["name"], collection=product, owner=project["products_poc"][product], - description=project['summary'], + description=project["summary"], qacontact=None, - cclist=project['watchers'], + cclist=project["watchers"], print_fas_names=self.args.print_fas_names, retired=project["products_retired"][product], ) @@ -1010,7 +1127,9 @@ class DistgitBugzillaSync: except xmlrpc.client.Error as e: # An error occurred in the xmlrpc call. Shouldn't happen but # we better see what it is - self.errors["bugzilla_raw"].append('%s -- %s' % (project["name"], e.args[-1])) + self.errors["bugzilla_raw"].append( + "%s -- %s" % (project["name"], e.args[-1]) + ) self.errors["bugzilla"].append( f"Failed to update: `{product}/{project['name']}`: " f"\n {e}" @@ -1027,23 +1146,23 @@ class DistgitBugzillaSync: for key in ["configuration", "PDC", "SCM overrides", "bugzilla"]: if self.errors[key]: report.append(key) - report.append(' - {}'.format("\n - ".join(self.errors[key]))) - report.append('') + report.append(" - {}".format("\n - ".join(self.errors[key]))) + report.append("") if self.env["verbose"] or self.env["dryrun"]: print("*" * 80) - print('\n'.join(report)) + print("\n".join(report)) if not self.env["dryrun"]: self.send_email( - self.env['email']['from'], - self.env['email']['notify_admins'], - 'Errors while syncing bugzilla with the PackageDB', - self.env['email']['templates']['admin_notification'].format( - errors='\n'.join(report) - ) + self.env["email"]["from"], + self.env["email"]["notify_admins"], + "Errors while syncing bugzilla with the PackageDB", + self.env["email"]["templates"]["admin_notification"].format( + errors="\n".join(report) + ), ) else: - with open(self.env['data_cache'], 'w') as stream: + with open(self.env["data_cache"], "w") as stream: json.dump({}, stream) if self.env["verbose"]: @@ -1071,5 +1190,5 @@ class DistgitBugzillaSync: print(f" Ran on {delta:.2f} seconds -- ie: {delta/60:.2f} minutes") -if __name__ == '__main__': +if __name__ == "__main__": DistgitBugzillaSync.main() diff --git a/setup.py b/setup.py index 15ae39b..4799f31 100644 --- a/setup.py +++ b/setup.py @@ -4,49 +4,49 @@ from setuptools import setup HERE = os.path.dirname(__file__) -with open(os.path.join(HERE, 'requirements.txt'), 'r') as f: +with open(os.path.join(HERE, "requirements.txt"), "r") as f: INSTALL_REQUIRES = [x.strip() for x in f.readlines()] -with open(os.path.join(HERE, 'test_requirements.txt'), 'r') as f: +with open(os.path.join(HERE, "test_requirements.txt"), "r") as f: TESTS_REQUIRE = [x.strip() for x in f.readlines()] setup( - name='distgit-bugzilla-sync', - version='0.1', - description='script to set default assignee, CC list from component owners', + name="distgit-bugzilla-sync", + version="0.1", + description="script to set default assignee, CC list from component owners", # Possible options are at https://pypi.python.org/pypi?%3Aaction=list_classifiers classifiers=[ - 'Development Status :: 3 - Alpha', - 'Intended Audience :: Developers', - 'Intended Audience :: System Administrators', - 'License :: OSI Approved :: GNU General Public License v2 or later (GPLv2+)', - 'Operating System :: POSIX :: Linux', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', - 'Topic :: Software Development :: Bug Tracking', + "Development Status :: 3 - Alpha", + "Intended Audience :: Developers", + "Intended Audience :: System Administrators", + "License :: OSI Approved :: GNU General Public License v2 or later (GPLv2+)", + "Operating System :: POSIX :: Linux", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Topic :: Software Development :: Bug Tracking", ], - license='GPLv2+', - maintainer='Fedora Infrastructure Team', - maintainer_email='infrastructure@lists.fedoraproject.org', - platforms=['Fedora', 'GNU/Linux'], - url='https://pagure.io/Fedora-Infra/distgit-bugzilla-sync', - keywords='fedora', - packages=['distgit_bugzilla_sync'], + license="GPLv2+", + maintainer="Fedora Infrastructure Team", + maintainer_email="infrastructure@lists.fedoraproject.org", + platforms=["Fedora", "GNU/Linux"], + url="https://pagure.io/Fedora-Infra/distgit-bugzilla-sync", + keywords="fedora", + packages=["distgit_bugzilla_sync"], include_package_data=False, package_data={ - 'distgit_bugzilla_sync': [ - 'default-config-files/configuration.toml', - 'default-config-files/email_overrides.toml', + "distgit_bugzilla_sync": [ + "default-config-files/configuration.toml", + "default-config-files/email_overrides.toml", ], }, zip_safe=False, install_requires=INSTALL_REQUIRES, tests_require=TESTS_REQUIRE, entry_points={ - 'console_scripts': [ - 'distgit-bugzilla-sync = distgit_bugzilla_sync.script:DistgitBugzillaSync.main', + "console_scripts": [ + "distgit-bugzilla-sync = distgit_bugzilla_sync.script:DistgitBugzillaSync.main", ], }, )