Changes: Updates splitter.py to the one in upstream modules git repository.

Removes the two stage process for building repositories
	 Sets all the files to the same timestamp.

Signed-off-by: Stephen Smoogen <ssmoogen@redhat.com>
This commit is contained in:
Stephen Smoogen 2022-05-27 16:23:40 -04:00 committed by smooge
parent 7c77f430dd
commit 824654b955
3 changed files with 290 additions and 218 deletions

View file

@ -41,22 +41,22 @@ for ARCH in ${ARCHES}; do
fi
# Begin splitting the various packages into their subtrees
${BINDIR}/splitter.py --action hardlink --target CS-8-001 ${HOMEDIR}/BaseOS/${ARCH}/os/ --only-defaults &> /dev/null
${BINDIR}/splitter.py --action copy --target CS-8-001 ${HOMEDIR}/BaseOS/${ARCH}/os/ --only-defaults &> /dev/null
if [ $? -ne 0 ]; then
echo "splitter ${ARCH} baseos failed"
exit
fi
${BINDIR}/splitter.py --action hardlink --target CS-8-002 ${HOMEDIR}/AppStream/${ARCH}/os/ --only-defaults &> /dev/null
${BINDIR}/splitter.py --action copy --target CS-8-002 ${HOMEDIR}/AppStream/${ARCH}/os/ --only-defaults &> /dev/null
if [ $? -ne 0 ]; then
echo "splitter ${ARCH} appstream failed"
exit
fi
${BINDIR}/splitter.py --action hardlink --target CS-8-003 ${HOMEDIR}/PowerTools/${ARCH}/os/ &> /dev/null
${BINDIR}/splitter.py --action copy --target CS-8-003 ${HOMEDIR}/PowerTools/${ARCH}/os/ &> /dev/null
if [ $? -ne 0 ]; then
echo "splitter ${ARCH} codeready failed"
exit
fi
${BINDIR}/splitter.py --action hardlink --target CS-8-004 ${HOMEDIR}/Devel/${ARCH}/os/ &> /dev/null
${BINDIR}/splitter.py --action copy --target CS-8-004 ${HOMEDIR}/Devel/${ARCH}/os/ &> /dev/null
if [ $? -ne 0 ]; then
echo "splitter ${ARCH} devel failed"
exit
@ -71,6 +71,8 @@ for ARCH in ${ARCHES}; do
# Go into the main tree
pushd CS-8-001
touch timestamp
find . -type f -print | xargs touch -r timestamp
# Mergerepo didn't work so lets just createrepo in the top directory.
createrepo_c . &> /dev/null
popd
@ -82,35 +84,26 @@ done
## Set up the builds so they are pointing to the last working version
cd ${KOJIDIR}
if [[ -e staged ]]; then
if [[ -h staged ]]; then
rm -f staged
if [[ -e latest ]]; then
if [[ -h latest ]]; then
rm -f latest
else
echo "Unable to remove staged. it is not a symbolic link"
echo "Unable to remove staged. it is not a symbolic link. Trying to move to latest_${DATE}."
if [[ -d latest_${DATE} ]]; then
echo "latest_${DATE} exists. Exiting"
exit
else
mv latest latest_${DATE}
fi
fi
else
echo "No staged link found"
echo "No latest link found"
fi
echo "Linking ${DATE} to staged"
ln -s ${DATE} staged
echo "Linking ${DATE} to latest"
ln -s ${DATE} latest
for ARCH in ${ARCHES}; do
if [[ -d latest/ ]]; then
pushd latest/
else
mkdir latest/
pushd latest/
fi
mkdir -p ${ARCH}
dnf --disablerepo=\* --enablerepo=CS-8-001 --repofrompath=CS-8-001,https://infrastructure.fedoraproject.org/repo/centos/stream8-kojitarget/staged/${ARCH}/CS-8-001/ reposync -a ${ARCH} -a noarch -p ${ARCH} --newest --delete &> /dev/null
if [[ $? -eq 0 ]]; then
cd ${ARCH}/CS-8-001
createrepo_c . &> /dev/null
else
echo "Unable to run createrepo on latest/${ARCH}"
fi
popd
done
## Wish there was a clean way to tell koji to figure out the new repos
## from batcave.

View file

@ -21,15 +21,7 @@ mkdir -p ${DATEDIR}
fi
##
## Remove the old latest
rm -rf ${HOMEDIR}/koji/latest_old/
if [ $? -ne 0 ]; then
echo "removal of old latest failed"
exit
fi
##
## Go through each architecture and
## Go through each architecture and split out the trees.
##
for ARCH in ${ARCHES}; do
# The archdir is where we daily download updates for rhel8
@ -52,17 +44,17 @@ for ARCH in ${ARCHES}; do
fi
# Begin splitting the various packages into their subtrees
${BINDIR}/splitter.py --action hardlink --target RHEL-8-001 ${ARCHDIR}/rhel-8-for-${ARCH}-baseos-rpms/ --only-defaults &> /dev/null
${BINDIR}/splitter.py --action copy --target RHEL-8-001 ${ARCHDIR}/rhel-8-for-${ARCH}-baseos-rpms/ --only-defaults &> /dev/null
if [ $? -ne 0 ]; then
echo "splitter ${ARCH} baseos failed"
exit
fi
${BINDIR}/splitter.py --action hardlink --target RHEL-8-002 ${ARCHDIR}/rhel-8-for-${ARCH}-appstream-rpms/ --only-defaults &> /dev/null
${BINDIR}/splitter.py --action copy --target RHEL-8-002 ${ARCHDIR}/rhel-8-for-${ARCH}-appstream-rpms/ --only-defaults &> /dev/null
if [ $? -ne 0 ]; then
echo "splitter ${ARCH} appstream failed"
exit
fi
${BINDIR}/splitter.py --action hardlink --target RHEL-8-003 ${ARCHDIR}/codeready-builder-for-rhel-8-${ARCH}-rpms/ &> /dev/null
${BINDIR}/splitter.py --action copy --target RHEL-8-003 ${ARCHDIR}/codeready-builder-for-rhel-8-${ARCH}-rpms/ &> /dev/null
if [ $? -ne 0 ]; then
echo "splitter ${ARCH} codeready failed"
exit
@ -76,6 +68,8 @@ for ARCH in ${ARCHES}; do
# Go into the main tree
pushd RHEL-8-001
touch timestamp
find . -type f -print | xargs touch -r timestamp
# Mergerepo didn't work so lets just createrepo in the top directory.
createrepo_c . &> /dev/null
popd
@ -87,50 +81,25 @@ done
## Set up the builds so they are pointing to the last working version
cd ${HOMEDIR}/koji/
if [[ -e staged ]]; then
if [[ -h staged ]]; then
rm -f staged
if [[ -e latest ]]; then
if [[ -h latest ]]; then
rm -f latest
else
echo "Unable to remove staged. it is not a symbolic link"
echo "Unable to remove staged. it is not a symbolic link. Trying to move to latest_${DATE}."
if [[ -d latest_${DATE} ]]; then
echo "latest_${DATE} exists. Exiting"
exit
else
mv latest latest_${DATE}
fi
fi
else
echo "No staged link found"
echo "No latest link found"
fi
####
#### The following is overly complicated and makes thinking and
#### debugging hard. This needs to be fixed.
echo "Linking ${DATE} to latest"
ln -s ${DATE} latest
## The goal here is to take the staged code, and make a new repo with
## just the latest amount of rpms in it. We also want to try and cut
## the race condition down where koji sees one 'RHEL-8-001' with X.Y.Z
## rpms and then sees it with A.B.C or some mix.
# FIXME: Do we really need to make this linked staged?
echo "Linking ${DATE} to staged"
ln -s ${DATE} staged
NEW_LATEST=latest-${DATE}
mkdir -p ${NEW_LATEST}
# Go through each architecture
for ARCH in ${ARCHES}; do
# The following is overly complicated and needs to be cleaner.
pushd ${NEW_LATEST}
mkdir -p ${NEW_LATEST}/${ARCH}
dnf --disablerepo=\* --enablerepo=RHEL-8-001 --repofrompath=RHEL-8-001,https://infrastructure.fedoraproject.org/repo/rhel/rhel8/koji/staged/${ARCH}/RHEL-8-001/ reposync -a ${ARCH} -a noarch -p ${ARCH} --newest --delete &> /dev/null
if [[ $? -eq 0 ]]; then
cd ${ARCH}/RHEL-8-001
createrepo_c . &> /dev/null
else
echo "Unable to run createrepo on latest/${ARCH}"
fi
popd
done
## RACE CONDITION TIME!!!!
mv latest latest_old
mv ${NEW_LATEST} latest
## Wish there was a clean way to tell koji to figure out the new repos
## from batcave.

View file

@ -13,21 +13,22 @@ import tempfile
import os
import subprocess
import sys
import time
import logging
# Look for a specific version of modulemd. The 1.x series does not
# have the tools we need.
try:
gi.require_version('Modulemd', '2.0')
from gi.repository import Modulemd
except:
print("We require newer vesions of modulemd than installed..")
sys.exit(0)
from gi.repository import Modulemd as mmd
except ValueError:
print("libmodulemd 2.0 is not installed..")
sys.exit(1)
mmd = Modulemd
# We only want to load the module metadata once. It can be reused as often as required
_idx = None
# This code is from Stephen Gallagher to make my other caveman code
# less icky.
def _get_latest_streams (mymod, stream):
def _get_latest_streams(mymod, stream):
"""
Routine takes modulemd object and a stream name.
Finds the lates stream from that and returns that as a stream
@ -39,6 +40,7 @@ def _get_latest_streams (mymod, stream):
return latest_streams
def _get_repoinfo(directory):
"""
A function which goes into the given directory and sets up the
@ -55,6 +57,46 @@ def _get_repoinfo(directory):
r = h.perform()
return r.getinfo(librepo.LRR_YUM_REPO)
def _get_modulemd(directory=None, repo_info=None):
"""
Retrieve the module metadata from this repository.
:param directory: The path to the repository. Must contain repodata/repomd.xml and modules.yaml.
:param repo_info: An already-acquired repo_info structure
:return: A Modulemd.ModulemdIndex object containing the module metadata from this repository.
"""
# Return the cached value
global _idx
if _idx:
return _idx
# If we don't have a cached value, we need either directory or repo_info
assert directory or repo_info
if directory:
directory = os.path.abspath(directory)
repo_info = _get_repoinfo(directory)
if 'modules' not in repo_info:
return None
_idx = mmd.ModuleIndex.new()
with gzip.GzipFile(filename=repo_info['modules'], mode='r') as gzf:
mmdcts = gzf.read().decode('utf-8')
res, failures = _idx.update_from_string(mmdcts, True)
if len(failures) != 0:
raise Exception("YAML FAILURE: FAILURES: %s" % failures)
if not res:
raise Exception("YAML FAILURE: res != True")
# Ensure that every stream in the index is using v2
_idx.upgrade_streams(mmd.ModuleStreamVersionEnum.TWO)
return _idx
def _get_hawkey_sack(repo_info):
"""
A function to pull in the repository sack from hawkey.
@ -70,6 +112,7 @@ def _get_hawkey_sack(repo_info):
return primary_sack
def _get_filelist(package_sack):
"""
Determine the file locations of all packages in the sack. Use the
@ -78,10 +121,12 @@ def _get_filelist(package_sack):
"""
pkg_list = {}
for pkg in hawkey.Query(package_sack):
nevr="%s-%s:%s-%s.%s"% (pkg.name,pkg.epoch,pkg.version,pkg.release,pkg.arch)
nevr = "%s-%s:%s-%s.%s" % (pkg.name, pkg.epoch,
pkg.version, pkg.release, pkg.arch)
pkg_list[nevr] = pkg.location
return pkg_list
def _parse_repository_non_modular(package_sack, repo_info, modpkgset):
"""
Simple routine to go through a repo, and figure out which packages
@ -98,28 +143,14 @@ def _parse_repository_non_modular(package_sack, repo_info, modpkgset):
pkgs.add(pkg.location)
return pkgs
def _parse_repository_modular(repo_info,package_sack):
def _parse_repository_modular(repo_info, package_sack):
"""
Returns a dictionary of packages indexed by the modules they are
contained in.
"""
cts = {}
idx = mmd.ModuleIndex()
myfile = repo_info['modules']
if myfile.endswith(".gz"):
openfunc=gzip.GzipFile
elif myfile.endswith(".xz"):
openfunc=lzma.LZMAFile
else:
print("This file type is not fixed in this hack. Please fix code. (2021-05-20)");
sys.exit(1)
with openfunc(filename=myfile, mode='r') as gzf:
mmdcts = gzf.read().decode('utf-8')
res, failures = idx.update_from_string(mmdcts, True)
if len(failures) != 0:
raise Exception("YAML FAILURE: FAILURES: %s" % failures)
if not res:
raise Exception("YAML FAILURE: res != True")
idx = _get_modulemd(repo_info=repo_info)
pkgs_list = _get_filelist(package_sack)
idx.upgrade_streams(2)
@ -151,6 +182,7 @@ def _get_modular_pkgset(mod):
return list(pkgs)
def _perform_action(src, dst, action):
"""
Performs either a copy, hardlink or symlink of the file src to the
@ -169,6 +201,7 @@ def _perform_action(src, dst, action):
elif action == 'symlink':
os.symlink(src, dst)
def validate_filenames(directory, repoinfo):
"""
Take a directory and repository information. Test each file in
@ -185,108 +218,156 @@ def validate_filenames(directory, repoinfo):
return isok
def get_default_modules(directory):
def _get_recursive_dependencies(all_deps, idx, stream, ignore_missing_deps):
if stream.get_NSVCA() in all_deps:
# We've already encountered this NSVCA, so don't go through it again
logging.debug('Already included {}'.format(stream.get_NSVCA()))
return
# Store this NSVCA/NS pair
local_deps = all_deps
local_deps.add(stream.get_NSVCA())
logging.debug("Recursive deps: {}".format(stream.get_NSVCA()))
# Loop through the dependencies for this stream
deps = stream.get_dependencies()
# At least one of the dependency array entries must exist in the repo
found_dep = False
for dep in deps:
# Within an array entry, all of the modules must be present in the
# index
found_all_modules = True
for modname in dep.get_runtime_modules():
# Ignore "platform" because it's special
if modname == "platform":
logging.debug('Skipping platform')
continue
logging.debug('Processing dependency on module {}'.format(modname))
mod = idx.get_module(modname)
if not mod:
# This module wasn't present in the index.
found_module = False
continue
# Within a module, at least one of the requested streams must be
# present
streamnames = dep.get_runtime_streams(modname)
found_stream = False
for streamname in streamnames:
stream_list = _get_latest_streams(mod, streamname)
for inner_stream in stream_list:
try:
_get_recursive_dependencies(
local_deps, idx, inner_stream, ignore_missing_deps)
except FileNotFoundError as e:
# Could not find all of this stream's dependencies in
# the repo
continue
found_stream = True
# None of the streams were found for this module
if not found_stream:
found_all_modules = False
# We've iterated through all of the modules; if it's still True, this
# dependency is consistent in the index
if found_all_modules:
found_dep = True
# We were unable to resolve the dependencies for any of the array entries.
# raise FileNotFoundError
if not found_dep and not ignore_missing_deps:
raise FileNotFoundError(
"Could not resolve dependencies for {}".format(
stream.get_NSVCA()))
all_deps.update(local_deps)
def get_default_modules(directory, ignore_missing_deps):
"""
Work through the list of modules and come up with a default set of
modules which would be the minimum to output.
Returns a set of modules
"""
directory = os.path.abspath(directory)
repo_info = _get_repoinfo(directory)
provides = set()
contents = set()
if 'modules' not in repo_info:
return contents
idx = mmd.ModuleIndex()
myfile=repo_info['modules']
if myfile.endswith(".gz"):
openfunc=gzip.GzipFile
elif myfile.endswith(".xz"):
openfunc=lzma.LZMAFile
else:
print("This file type is not fixed in this hack. Please fix code. (2021-05-20)");
sys.exit(1)
with openfunc(filename=myfile, mode='r') as gzf:
mmdcts = gzf.read().decode('utf-8')
res, failures = idx.update_from_string(mmdcts, True)
if len(failures) != 0:
raise Exception("YAML FAILURE: FAILURES: %s" % failures)
if not res:
raise Exception("YAML FAILURE: res != True")
all_deps = set()
idx.upgrade_streams(2)
idx = _get_modulemd(directory)
if not idx:
return all_deps
# OK this is cave-man no-sleep programming. I expect there is a
# better way to do this that would be a lot better. However after
# a long long day.. this is what I have.
# First we oo through the default streams and create a set of
# provides that we can check against later.
for modname in idx.get_default_streams():
for modname, streamname in idx.get_default_streams().items():
# Only the latest version of a stream is important, as that is the only one that DNF will consider in its
# transaction logic. We still need to handle each context individually.
mod = idx.get_module(modname)
# Get the default streams and loop through them.
stream_set = mod.get_streams_by_stream_name(
mod.get_defaults().get_default_stream())
stream_set = _get_latest_streams(mod, streamname)
for stream in stream_set:
tempstr = "%s:%s" % (stream.props.module_name,
stream.props.stream_name)
provides.add(tempstr)
# Different contexts have different dependencies
try:
logging.debug("Processing {}".format(stream.get_NSVCA()))
_get_recursive_dependencies(all_deps, idx, stream, ignore_missing_deps)
logging.debug("----------")
except FileNotFoundError as e:
# Not all dependencies could be satisfied
print(
"Not all dependencies for {} could be satisfied. {}. Skipping".format(
stream.get_NSVCA(), e))
continue
logging.debug('Default module streams: {}'.format(all_deps))
return all_deps
# Now go through our list and build up a content lists which will
# have only modules which have their dependencies met
tempdict = {}
for modname in idx.get_default_streams():
mod = idx.get_module(modname)
# Get the default streams and loop through them.
# This is a sorted list with the latest in it. We could drop
# looking at later ones here in a future version. (aka lines
# 237 to later)
stream_set = mod.get_streams_by_stream_name(
mod.get_defaults().get_default_stream())
for stream in stream_set:
ourname = stream.get_NSVCA()
tmp_name = "%s:%s" % (stream.props.module_name,
stream.props.stream_name)
# Get dependencies is a list of items. All of the modules
# seem to only have 1 item in them, but we should loop
# over the list anyway.
for deps in stream.get_dependencies():
isprovided = True # a variable to say this can be added.
for mod in deps.get_runtime_modules():
tempstr=""
# It does not seem easy to figure out what the
# platform is so just assume we will meet it.
if mod != 'platform':
for stm in deps.get_runtime_streams(mod):
tempstr = "%s:%s" %(mod,stm)
if tempstr not in provides:
# print( "%s : %s not found." % (ourname,tempstr))
isprovided = False
if isprovided:
if tmp_name in tempdict:
# print("We found %s" % tmp_name)
# Get the stream version we are looking at
ts1=ourname.split(":")[2]
# Get the stream version we stored away
ts2=tempdict[tmp_name].split(":")[2]
# See if we got a newer one. We probably
# don't as it is a sorted list but we
# could have multiple contexts which would
# change things.
if ( int(ts1) > int(ts2) ):
# print ("%s > %s newer for %s", ts1,ts2,ourname)
tempdict[tmp_name] = ourname
else:
# print("We did not find %s" % tmp_name)
tempdict[tmp_name] = ourname
# OK we finally got all our stream names we want to send back to
# our calling function. Read them out and add them to the set.
for indx in tempdict:
contents.add(tempdict[indx])
def _pad_svca(svca, target_length):
"""
If the split() doesn't return all values (e.g. arch is missing), pad it
with `None`
"""
length = len(svca)
svca.extend([None] * (target_length - length))
return svca
return contents
def _dump_modulemd(modname, yaml_file):
idx = _get_modulemd()
assert idx
# Create a new index to hold the information about this particular
# module and stream
new_idx = mmd.ModuleIndex.new()
# Add the module streams
module_name, *svca = modname.split(':')
stream_name, version, context, arch = _pad_svca(svca, 4)
logging.debug("Dumping YAML for {}, {}, {}, {}, {}".format(
module_name, stream_name, version, context, arch))
mod = idx.get_module(module_name)
streams = mod.search_streams(stream_name, int(version), context, arch)
# This should usually be a single item, but we'll be future-compatible
# and account for the possibility of having multiple streams here.
for stream in streams:
new_idx.add_module_stream(stream)
# Add the module defaults
defs = mod.get_defaults()
if defs:
new_idx.add_defaults(defs)
# Write out the file
try:
with open(yaml_file, 'w') as output:
output.write(new_idx.dump_to_string())
except PermissionError as e:
logging.error("Could not write YAML to file: {}".format(e))
raise
def perform_split(repos, args, def_modules):
@ -304,19 +385,34 @@ def perform_split(repos, args, def_modules):
os.path.join(targetdir, pkgfile),
args.action)
# Extract the modular metadata for this module
if modname != 'non_modular':
_dump_modulemd(modname, os.path.join(targetdir, 'modules.yaml'))
def create_repos(target, repos,def_modules, only_defaults):
def create_repos(target, repos, def_modules, only_defaults):
"""
Routine to create repositories. Input is target directory and a
list of repositories.
Returns None
"""
for modname in repos:
if only_defaults and modname not in def_modules:
continue
targetdir = os.path.join(target, modname)
subprocess.run([
'createrepo_c', os.path.join(target, modname),
'createrepo_c', targetdir,
'--no-database'])
if modname != 'non_modular':
subprocess.run([
'modifyrepo_c',
'--mdtype=modules',
os.path.join(targetdir, 'modules.yaml'),
os.path.join(targetdir, 'repodata')
])
def parse_args():
@ -326,6 +422,8 @@ def parse_args():
"""
parser = argparse.ArgumentParser(description='Split repositories up')
parser.add_argument('repository', help='The repository to split')
parser.add_argument('--debug', help='Enable debug logging',
action='store_true', default=False)
parser.add_argument('--action', help='Method to create split repos files',
choices=('hardlink', 'symlink', 'copy'),
default='hardlink')
@ -336,6 +434,11 @@ def parse_args():
action='store_true', default=False)
parser.add_argument('--only-defaults', help='Only output default modules',
action='store_true', default=False)
parser.add_argument('--ignore-missing-default-deps',
help='When using --only-defaults, do not skip '
'default streams whose dependencies cannot be '
'resolved within this repository',
action='store_true', default=False)
return parser.parse_args()
@ -354,6 +457,7 @@ def setup_target(args):
else:
os.mkdir(args.target)
def parse_repository(directory):
"""
Parse a specific directory, returning a dict with keys module NSVC's and
@ -372,34 +476,39 @@ def parse_repository(directory):
# everything in a known sack (aka non_modular).
if 'modules' in repo_info:
mod = _parse_repository_modular(repo_info,package_sack)
mod = _parse_repository_modular(repo_info, package_sack)
modpkgset = _get_modular_pkgset(mod)
else:
mod = dict()
modpkgset = set()
non_modular = _parse_repository_non_modular(package_sack,repo_info,
non_modular = _parse_repository_non_modular(package_sack, repo_info,
modpkgset)
mod['non_modular'] = non_modular
## We should probably go through our default modules here and
## remove them from our mod. This would cut down some code paths.
# We should probably go through our default modules here and
# remove them from our mod. This would cut down some code paths.
return mod
def main():
# Determine what the arguments are and
args = parse_args()
if args.debug:
logging.basicConfig(level=logging.DEBUG)
# Go through arguments and act on their values.
setup_target(args)
repos = parse_repository(args.repository)
if args.only_defaults:
def_modules = get_default_modules(args.repository)
def_modules = get_default_modules(args.repository, args.ignore_missing_default_deps)
else:
def_modules = set()
def_modules.add('non_modular')
if not args.skip_missing:
@ -408,7 +517,8 @@ def main():
if args.target:
perform_split(repos, args, def_modules)
if args.create_repos:
create_repos(args.target, repos,def_modules,args.only_defaults)
create_repos(args.target, repos, def_modules, args.only_defaults)
if __name__ == '__main__':
main()