Make updating gitolite and pagure multi-threaded

This speeds up the process a little bit but not enough for what we want
we'll still need to optimize this.
This commit is contained in:
Pierre-Yves Chibon 2017-02-24 19:05:55 +01:00
parent 94ec845fc7
commit ba5f8484a7

View file

@ -7,10 +7,13 @@
import copy import copy
import grp import grp
import itertools
import os import os
import sys import sys
import json import json
from multiprocessing import Pool, Manager
import requests import requests
from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.exc import SQLAlchemyError
@ -47,19 +50,19 @@ def get_user_info(username):
return user return user
def create_user_obj(username): def create_user_obj(session, username):
''' Creates a sqlalchemy user object for pagure db ''' ''' Creates a sqlalchemy user object for pagure db '''
try: try:
userinfo = get_user_info(username) userinfo = get_user_info(username)
user = pagure.lib.set_up_user( user = pagure.lib.set_up_user(
session=SESSION, session=session,
username=username, username=username,
fullname=userinfo['fullname'], fullname=userinfo['fullname'],
default_email=userinfo['default_email'] default_email=userinfo['default_email']
) )
SESSION.commit() session.commit()
except SQLAlchemyError: except SQLAlchemyError:
SESSION.rollback() session.rollback()
if TESTING: if TESTING:
print 'Creating user failed' print 'Creating user failed'
@ -127,25 +130,25 @@ def create_groups_in_db(groups):
print 'Adding a user to group failed' print 'Adding a user to group failed'
def update_owners_to_db(namespace, pkg, owners): def update_owners_to_db(session, namespace, pkg, owners):
''' Adds owners to pagure db ''' ''' Adds owners to pagure db '''
pkg_obj = pagure.lib.get_project(
session, name=pkg, namespace=namespace)
for owner in owners: for owner in owners:
# check if the owners are present in the db # check if the owners are present in the db
# if not create them # if not create them
owner_obj = pagure.lib.search_user(SESSION, username=owner) owner_obj = pagure.lib.search_user(session, username=owner)
if not owner_obj: if not owner_obj:
owner_obj = create_user_obj(owner) owner_obj = create_user_obj(session, owner)
pkg_obj = pagure.lib.get_project(
SESSION, name=pkg, namespace=namespace)
# this flag is for avoiding unnecessary db queries # this flag is for avoiding unnecessary db queries
flag = True created = False
if not pkg_obj: if not pkg_obj:
try: try:
pagure.lib.new_project( pagure.lib.new_project(
session=SESSION, session=session,
user=owner, user=owner,
namespace=namespace, namespace=namespace,
name=pkg, name=pkg,
@ -157,10 +160,10 @@ def update_owners_to_db(namespace, pkg, owners):
requestfolder=pagure.APP.config['REQUESTS_FOLDER'], requestfolder=pagure.APP.config['REQUESTS_FOLDER'],
ignore_existing_repo=True, ignore_existing_repo=True,
) )
SESSION.commit() session.commit()
flag = False created = True
except SQLAlchemyError as err: except SQLAlchemyError as err:
SESSION.rollback() session.rollback()
if TESTING: if TESTING:
print "Couldn't create project - %s" % pkg print "Couldn't create project - %s" % pkg
print "ERROR: %s" % err print "ERROR: %s" % err
@ -171,9 +174,9 @@ def update_owners_to_db(namespace, pkg, owners):
# so now the pkg surely exists, make the owner, # so now the pkg surely exists, make the owner,
# the owner of the repo if he is not # the owner of the repo if he is not
if not flag: if created:
pkg_obj = pagure.lib.get_project( pkg_obj = pagure.lib.get_project(
session=SESSION, session=session,
name=pkg, name=pkg,
namespace=namespace namespace=namespace
) )
@ -182,12 +185,12 @@ def update_owners_to_db(namespace, pkg, owners):
if owner_obj not in pkg_obj.users and owner_obj is not pkg_obj.user: if owner_obj not in pkg_obj.users and owner_obj is not pkg_obj.user:
try: try:
pagure.lib.add_user_to_project( pagure.lib.add_user_to_project(
session=SESSION, session=session,
project=pkg_obj, project=pkg_obj,
new_user=owner_obj.user, new_user=owner_obj.user,
user=pkg_obj.user.user, user=pkg_obj.user.user,
) )
SESSION.commit() session.commit()
except SQLAlchemyError as err: except SQLAlchemyError as err:
SESSION.rollback() SESSION.rollback()
if TESTING: if TESTING:
@ -195,23 +198,23 @@ def update_owners_to_db(namespace, pkg, owners):
print "ERROR: %s" % err print "ERROR: %s" % err
def update_groups_to_db(namespace, pkg, pkg_groups): def update_groups_to_db(session, namespace, pkg, pkg_groups):
''' Adds groups to projects in pagure db ''' ''' Adds groups to projects in pagure db '''
pkg_obj = pagure.lib.get_project(
session, name=pkg, namespace=namespace)
for group in pkg_groups: for group in pkg_groups:
# we have already created all the groups # we have already created all the groups
group_obj = pagure.lib.search_groups(SESSION, group_name=group) group_obj = pagure.lib.search_groups(session, group_name=group)
pkg_obj = pagure.lib.get_project(
SESSION, name=pkg, namespace=namespace)
# in case when there are only groups with commit access and no # in case when there are only groups with commit access and no
# people the flag is for cutting out db queries later # people the flag is for cutting out db queries later
flag = True created = False
if not pkg_obj: if not pkg_obj:
try: try:
pagure.lib.new_project( pagure.lib.new_project(
session=SESSION, session=session,
user=group_obj.creator.user, user=group_obj.creator.user,
namespace=namespace, namespace=namespace,
name=pkg, name=pkg,
@ -223,10 +226,10 @@ def update_groups_to_db(namespace, pkg, pkg_groups):
requestfolder=pagure.APP.config['REQUESTS_FOLDER'], requestfolder=pagure.APP.config['REQUESTS_FOLDER'],
ignore_existing_repo=True, ignore_existing_repo=True,
) )
SESSION.commit() session.commit()
flag = False created = True
except SQLAlchemyError as err: except SQLAlchemyError as err:
SESSION.rollback() session.rollback()
if TESTING: if TESTING:
print "Couldn't create project" print "Couldn't create project"
print "ERROR: %s" % err print "ERROR: %s" % err
@ -237,9 +240,9 @@ def update_groups_to_db(namespace, pkg, pkg_groups):
# for the case when the new project was just created # for the case when the new project was just created
# by the above call # by the above call
if not flag: if created:
pkg_obj = pagure.lib.get_project( pkg_obj = pagure.lib.get_project(
SESSION, name=pkg, namespace=namespace) session, name=pkg, namespace=namespace)
# if the group was initially empty, it was not # if the group was initially empty, it was not
# created in the db # created in the db
@ -251,15 +254,15 @@ def update_groups_to_db(namespace, pkg, pkg_groups):
if group_obj not in pkg_obj.groups: if group_obj not in pkg_obj.groups:
try: try:
pagure.lib.add_group_to_project( pagure.lib.add_group_to_project(
session=SESSION, session=session,
project=pkg_obj, project=pkg_obj,
new_group=group, new_group=group,
user=pkg_obj.user.user, user=pkg_obj.user.user,
#access='admin' access='admin'
) )
SESSION.commit() session.commit()
except SQLAlchemyError as err: except SQLAlchemyError as err:
SESSION.rollback() session.rollback()
if TESTING: if TESTING:
print "Adding a group to a project failed" print "Adding a group to a project failed"
print "ERROR: %s" % err print "ERROR: %s" % err
@ -267,12 +270,73 @@ def update_groups_to_db(namespace, pkg, pkg_groups):
def add_fork_to_gitolite(): def add_fork_to_gitolite():
''' Creates a sqlalchemy user object for pagure db ''' ''' Creates a sqlalchemy user object for pagure db '''
for fork in pagure.lib.search_projects(session=SESSION, forks=True): for fork in pagure.lib.search_projects(session=SESSION, fork=True):
print '' print ''
print 'repo %s' % (fork.fullname) print 'repo %s' % (fork.fullname)
if masters: print ' RWC = %s' % fork.user.username
print ' RWC = %s' % ' '.join(
[user.username for user in fork.committers])
def process_pkg(arg):
""" Process the given package, adjust pagure for it and queue all the
ACLs so we can send them to gitolite
"""
pkg, acls, myq = arg
session = pagure.lib.create_session(pagure.APP.config['DB_URL'])
branchAcls = {} # Check whether we need to set separate per branch acls
buffer = [] # Buffer the output per package
masters = [] # Folks that have commit to master
writers = [] # Anybody that has write access
# Examine each branch in the package
branches = acls[pkg].keys()
branches.sort()
for branch in branches:
if branch not in ACTIVE.keys():
continue
if 'packager' in acls[pkg][branch]['commit']['groups']:
# If the packager group is defined, everyone has access
buffer.append(' RWC %s = @all' % (ACTIVE[branch]))
branchAcls.setdefault('@all', []).append(
(pkg, ACTIVE[branch])
)
if branch == 'master':
masters.append('@all')
if '@all' not in writers:
writers.append('@all')
else:
# Extract the owners
committers = []
owners = acls[pkg][branch]['commit']['people']
owners.sort()
for owner in owners:
committers.append(owner)
for group in acls[pkg][branch]['commit']['groups']:
committers.append('@%s' % group)
if branch == 'master':
masters.extend(committers)
pkg_groups = acls[pkg][branch]['commit']['groups']
update_owners_to_db(session, namespace, pkg, owners)
update_groups_to_db(session, namespace, pkg, pkg_groups)
# add all the committers to the top writers list
for committer in committers:
if committer not in writers:
writers.append(committer)
# Print the committers to the acl for this package-branch
committers = ' '.join(committers)
buffer.append(
' RWC %s = %s' % (ACTIVE[branch], committers))
branchAcls.setdefault(committers, []).append(
(pkg, ACTIVE[branch])
)
session.close()
data = [pkg, buffer, writers, masters]
myq.put(data)
myq.task_done()
if __name__ == '__main__': if __name__ == '__main__':
@ -374,58 +438,17 @@ if __name__ == '__main__':
acls = data[namespace] acls = data[namespace]
pkglist = sorted(data[namespace].keys()) pkglist = sorted(data[namespace].keys())
for pkg in pkglist: m = Manager()
q = m.Queue()
branchAcls = {} # Check whether we need to set separate per branch acls p = Pool(5)
buffer = [] # Buffer the output per package p.map(process_pkg, itertools.product(pkglist, [acls], [q]))
masters = [] # Folks that have commit to master p.close()
writers = [] # Anybody that has write access p.join()
# Examine each branch in the package
branches = acls[pkg].keys()
branches.sort()
for branch in branches:
if branch not in ACTIVE.keys():
continue
if 'packager' in acls[pkg][branch]['commit']['groups']:
# If the packager group is defined, everyone has access
buffer.append(' RWC %s = @all' % (ACTIVE[branch]))
branchAcls.setdefault('@all', []).append(
(pkg, ACTIVE[branch])
)
if branch == 'master':
masters.append('@all')
if '@all' not in writers:
writers.append('@all')
else:
# Extract the owners
committers = []
owners = acls[pkg][branch]['commit']['people']
owners.sort()
for owner in owners:
committers.append(owner)
for group in acls[pkg][branch]['commit']['groups']:
committers.append('@%s' % group)
if branch == 'master':
masters.extend(committers)
pkg_groups = acls[pkg][branch]['commit']['groups']
update_owners_to_db(namespace, pkg, owners)
update_groups_to_db(namespace, pkg, pkg_groups)
# add all the committers to the top writers list
for committer in committers:
if committer not in writers:
writers.append(committer)
# Print the committers to the acl for this package-branch
committers = ' '.join(committers)
buffer.append(
' RWC %s = %s' % (ACTIVE[branch], committers))
branchAcls.setdefault(committers, []).append(
(pkg, ACTIVE[branch])
)
#for pkg in pkglist:
#process_pkg([pkg, acls, q])
while q.qsize():
pkg, buffer, writers, masters = q.get()
print '' print ''
print 'repo %s/%s' % (namespace, pkg) print 'repo %s/%s' % (namespace, pkg)
print '\n'.join(buffer) print '\n'.join(buffer)
@ -435,6 +458,8 @@ if __name__ == '__main__':
if masters: if masters:
print ' RWC = %s' % ' '.join(masters) print ' RWC = %s' % ' '.join(masters)
q.join()
add_fork_to_gitolite() add_fork_to_gitolite()
sys.exit(0) sys.exit(0)