Modify owner-sync-pagure to accept multiple tags at once and use multithreading

This commit is contained in:
Matt Prahl 2017-08-11 12:30:56 +00:00 committed by Ralph Bean
parent bf97b87714
commit 44b4d88d35

View file

@ -16,9 +16,13 @@ This is heavily based on "owner-sync-pkgdb.j2" which was introduced in commit
from __future__ import print_function
import sys
import os
from time import sleep
import argparse
import ConfigParser
from urlparse import urljoin
import multiprocessing.pool
from math import ceil
from functools import partial
import requests
import koji
@ -158,48 +162,99 @@ def get_repo_name_and_arches(tag, version):
return repo_name, arches
def get_pagure_projects(session, namespace):
url = urljoin(PAGURE_URL, 'api/0/projects?namespace={0}'.format(namespace))
url = url + "&page=1&per_page=50&fork=false&short=true"
while url:
response = session.get(url, verify=VERIFY, timeout=120)
if not bool(response):
print("Failed to talk to %r %r." % (
response.request.url, response), file=sys.stderr)
break
data = response.json()
for project in data['projects']:
yield project
url = data['pagination']['next']
def get_project_ownership(tag, namespace):
projects = {}
def get_pagure_project_name_and_branch(namespace, project_name):
"""
Gets the branches on a project. This function is used for mapping.
:param namespace: string of the namespace the project is in
:param project: string of the project
:return: a tuple containing the string of the project and a list of
branches
"""
session = retry_session()
for project in get_pagure_projects(session, namespace=namespace):
# Check if this project has the branch we are interested in
project_branches_url = '{0}api/0/{1}/{2}/git/branches'.format(
PAGURE_URL, namespace, project['name'])
project_branches_rv = session.get(
project_branches_url, verify=VERIFY, timeout=60)
project_branches_url = '{0}api/0/{1}/{2}/git/branches'.format(
PAGURE_URL, namespace, project_name)
project_branches_rv = session.get(
project_branches_url, verify=VERIFY, timeout=60)
# If the project's branches can't be reported, let's skip the project
# for now.
if not project_branches_rv.ok:
continue
# If the project's branches can't be reported, just return no branches and
# it will be skipped later on
if not project_branches_rv.ok:
return project_name, []
project_branches_rv_json = project_branches_rv.json()
# The tag and branch names are the same for "old-style" branches
if tag in project_branches_rv_json['branches'] or tag == 'f' + RAWHIDE:
yield project['name']
# This is a special project, not in dist-git, but which needs to be in the package list.
yield 'module-build-macros'
return project_name, project_branches_rv.json()['branches']
def set_koji_ownership(packages, arches):
def get_pagure_project_names_from_page(namespace, page):
"""
Gets the names of all the Pagure projects on a page. This function is to be
used for mapping.
:param namespace: string of the namespace to query for projects
:param page: int of the page to query at
:return: list of project names on the page
"""
url = urljoin(PAGURE_URL, 'api/0/projects?namespace={0}'.format(namespace))
url = url + '&page={0}&per_page=100&fork=false&short=true'.format(page)
session = retry_session()
response = session.get(url, verify=VERIFY, timeout=120)
if not bool(response):
print("Failed to talk to %r %r." % (
response.request.url, response), file=sys.stderr)
sys.exit(1)
names = set()
for project in response.json()['projects']:
names.add(project['name'])
return names
def get_pagure_project_branches(namespace):
"""
Gets all the branches of all the Pagure projects in the desired namespace
:param namespace: string of the namespace to query for projects
:return: dictionary in the format of {project_name: [branch_one...]}
"""
first_page_url_path = ('api/0/projects?namespace={0}&fork=false&short=true'
'&page=1&per_page=1'.format(namespace))
first_page_url = urljoin(PAGURE_URL, first_page_url_path)
session = retry_session()
first_page_rv = session.get(first_page_url, verify=VERIFY, timeout=120)
if not bool(first_page_rv):
print("Failed to talk to %r %r." % (
first_page_rv.request.url, first_page_rv), file=sys.stderr)
sys.exit(1)
total_projects = first_page_rv.json()['total_projects']
num_pages = int(ceil(total_projects / 100.0))
pool = multiprocessing.pool.ThreadPool(4)
# Since we are going to multi-thread, we need to make a partial function
# call so that all the function needs is an iterable to run
partial_get_pagure_projects_page = partial(
get_pagure_project_names_from_page, namespace)
project_names_sets = pool.map(partial_get_pagure_projects_page,
range(1, num_pages + 1))
if project_names_sets:
# Combine all the project name sets
project_names = list(set.union(*project_names_sets))
# Hopefully save some RAM
del project_names_sets
else:
return {}
# Since we are going to multi-thread, we need to make a partial function
# call so that all the function needs is an iterable to run
partial_get_pagure_project_name_and_branch = partial(
get_pagure_project_name_and_branch, namespace)
# Get a list of tuples in the form of (project, [branch...]), then convert
# that to a dictionary
project_names_to_branches = dict(pool.map(
partial_get_pagure_project_name_and_branch, project_names))
pool.close()
return project_names_to_branches
def set_koji_ownership(tag, packages, arches):
koji_options = get_options()
for arch in arches:
@ -244,14 +299,42 @@ def set_koji_ownership(packages, arches):
if __name__ == '__main__':
try:
tag = sys.argv[1]
except IndexError:
print('Error: no tag specified', file=sys.stderr)
usage()
parser = argparse.ArgumentParser(description='Process some integers.')
parser.add_argument('tag', nargs='+',
help='tag to update the package list on')
args = parser.parse_args()
tags = args.tag
namespace, version = get_namespace_and_version_from_tag(tag)
repo_name, arches = get_repo_name_and_arches(tag, version)
pkgs = get_project_ownership(tag, namespace)
set_koji_ownership(pkgs, arches)
# Get all the info about the tags we are interested in
unique_namespaces = set()
tag_info = {}
for tag in tags:
namespace, version = get_namespace_and_version_from_tag(tag)
repo_name, arches = get_repo_name_and_arches(tag, version)
tag_info[tag] = {
'namespace': namespace,
'version': version,
'repo_name': repo_name,
'arches': arches
}
unique_namespaces.add(namespace)
# Get all the project to branch mappings for every namespace
namespace_to_projects = {}
for namespace in unique_namespaces:
namespace_to_projects[namespace] = \
get_pagure_project_branches(namespace)
for tag, info in tag_info.items():
namespace = info['namespace']
pkgs = []
for pkg, branches in namespace_to_projects[namespace].items():
# The tag and branch names are the same for "old-style" branches
if tag in branches or tag == ('f' + RAWHIDE):
pkgs.append(pkg)
# This is a special project, not in dist-git, but which needs to be in
# the package list.
if namespace == 'rpms':
pkgs.append('module-build-macros')
set_koji_ownership(tag, pkgs, tag_info['arches'])