From 63a8c4a39e7aaa0a1a2f30d845f580ff63c08431 Mon Sep 17 00:00:00 2001 From: Stephen Smoogen Date: Tue, 31 May 2022 11:30:47 -0400 Subject: [PATCH] move back to the older splitter.py. clean up whitespace differences between it and upstream. put upstream in tree for someone to fix later. --- roles/batcave/files/rhel8-split.cron | 1 - roles/batcave/files/rhel8-split.sh | 99 ---- .../files/splitter-upstream-20220531.py | 532 ++++++++++++++++++ roles/grobisplitter/files/splitter.py | 326 ++++------- 4 files changed, 642 insertions(+), 316 deletions(-) delete mode 100644 roles/batcave/files/rhel8-split.cron delete mode 100644 roles/batcave/files/rhel8-split.sh create mode 100755 roles/grobisplitter/files/splitter-upstream-20220531.py diff --git a/roles/batcave/files/rhel8-split.cron b/roles/batcave/files/rhel8-split.cron deleted file mode 100644 index 5b9f3e3367..0000000000 --- a/roles/batcave/files/rhel8-split.cron +++ /dev/null @@ -1 +0,0 @@ -03 09 * * * root /usr/local/bin/rhel8-split.sh diff --git a/roles/batcave/files/rhel8-split.sh b/roles/batcave/files/rhel8-split.sh deleted file mode 100644 index a42399e730..0000000000 --- a/roles/batcave/files/rhel8-split.sh +++ /dev/null @@ -1,99 +0,0 @@ -#!/bin/bash -HOMEDIR=/mnt/fedora/app/fi-repo/rhel/rhel8 -BINDIR=/usr/local/bin - -ARCHES="aarch64 ppc64le s390x x86_64" -DATE=$(date -Ih | sed 's/+.*//') - -DATEDIR=${HOMEDIR}/koji/${DATE} - -if [ -d ${DATEDIR} ]; then - echo "Directory already exists. Please remove or fix" - exit -else -mkdir -p ${DATEDIR} -fi - -for ARCH in ${ARCHES}; do - # The archdir is where we daily download updates for rhel8 - ARCHDIR=${HOMEDIR}/${ARCH} - if [ ! -d ${ARCHDIR} ]; then - echo "Unable to find ${ARCHDIR}" - exit - fi - - # We consolidate all of the default repositories and remerge them - # in a daily tree. This allows us to point koji at a particular - # day if we have specific build concerns. - OUTDIR=${DATEDIR}/${ARCH} - mkdir -p ${OUTDIR} - if [ ! -d ${OUTDIR} ]; then - echo "Unable to find ${ARCHDIR}" - exit - else - cd ${OUTDIR} - fi - - # Begin splitting the various packages into their subtrees - ${BINDIR}/splitter.py --action hardlink --target RHEL-8-001 ${ARCHDIR}/rhel-8-for-${ARCH}-baseos-rpms/ --only-defaults &> /dev/null - if [ $? -ne 0 ]; then - echo "splitter ${ARCH} baseos failed" - exit - fi - ${BINDIR}/splitter.py --action hardlink --target RHEL-8-002 ${ARCHDIR}/rhel-8-for-${ARCH}-appstream-rpms/ --only-defaults &> /dev/null - if [ $? -ne 0 ]; then - echo "splitter ${ARCH} appstream failed" - exit - fi - ${BINDIR}/splitter.py --action hardlink --target RHEL-8-003 ${ARCHDIR}/codeready-builder-for-rhel-8-${ARCH}-rpms/ &> /dev/null - if [ $? -ne 0 ]; then - echo "splitter ${ARCH} codeready failed" - exit - fi - - # Copy the various module trees into RHEL-8-001 where we want them - # to work. - echo "Moving data to ${ARCH}/RHEL-8-001" - cp -anlr RHEL-8-002/* RHEL-8-001 - cp -anlr RHEL-8-003/* RHEL-8-001 - # Go into the main tree - pushd RHEL-8-001 - - # Mergerepo didn't work so lets just createrepo in the top directory. - createrepo_c . &> /dev/null - popd - - # Cleanup the trash - rm -rf RHEL-8-002 RHEL-8-003 -#loop to the next -done - -## Set up the builds so they are pointing to the last working version -cd ${HOMEDIR}/koji/ -if [[ -e staged ]]; then - if [[ -h staged ]]; then - rm -f staged - else - echo "Unable to remove staged. it is not a symbolic link" - exit - fi -else - echo "No staged link found" -fi - -echo "Linking ${DATE} to staged" -ln -s ${DATE} staged - - -for ARCH in ${ARCHES}; do - pushd latest/ - mkdir -p ${ARCH} - dnf --disablerepo=\* --enablerepo=RHEL-8-001 --repofrompath=RHEL-8-001,https://infrastructure.fedoraproject.org/repo/rhel/rhel8/koji/staged/${ARCH}/RHEL-8-001/ reposync -a ${ARCH} -a noarch -p ${ARCH} --newest --delete &> /dev/null - if [[ $? -eq 0 ]]; then - cd ${ARCH}/RHEL-8-001 - createrepo_c . &> /dev/null - else - echo "Unable to run createrepo on latest/${ARCH}" - fi - popd -done diff --git a/roles/grobisplitter/files/splitter-upstream-20220531.py b/roles/grobisplitter/files/splitter-upstream-20220531.py new file mode 100755 index 0000000000..1d0123f792 --- /dev/null +++ b/roles/grobisplitter/files/splitter-upstream-20220531.py @@ -0,0 +1,532 @@ +#!/bin/python3 + +# Import libraries needed for application to work + +import argparse +import shutil +import gi +import gzip +import librepo +import lzma +import hawkey +import tempfile +import os +import subprocess +import sys +import time +import logging + +# Look for a specific version of modulemd. The 1.x series does not +# have the tools we need. +try: + gi.require_version('Modulemd', '2.0') + from gi.repository import Modulemd as mmd +except ValueError: + print("libmodulemd 2.0 is not installed..") + sys.exit(1) + +# We only want to load the module metadata once. It can be reused as often as required +_idx = None + +def _get_latest_streams(mymod, stream): + """ + Routine takes modulemd object and a stream name. + Finds the lates stream from that and returns that as a stream + object. + """ + all_streams = mymod.search_streams(stream, 0) + latest_streams = mymod.search_streams(stream, + all_streams[0].props.version) + + return latest_streams + + +def _get_repoinfo(directory): + """ + A function which goes into the given directory and sets up the + needed data for the repository using librepo. + Returns the LRR_YUM_REPO + """ + with tempfile.TemporaryDirectory(prefix='elsplit_librepo_') as lrodir: + h = librepo.Handle() + h.setopt(librepo.LRO_URLS, ["file://%s" % directory]) + h.setopt(librepo.LRO_REPOTYPE, librepo.LR_YUMREPO) + h.setopt(librepo.LRO_DESTDIR, lrodir) + h.setopt(librepo.LRO_LOCAL, True) + h.setopt(librepo.LRO_IGNOREMISSING, False) + r = h.perform() + return r.getinfo(librepo.LRR_YUM_REPO) + + +def _get_modulemd(directory=None, repo_info=None): + """ + Retrieve the module metadata from this repository. + :param directory: The path to the repository. Must contain repodata/repomd.xml and modules.yaml. + :param repo_info: An already-acquired repo_info structure + :return: A Modulemd.ModulemdIndex object containing the module metadata from this repository. + """ + + # Return the cached value + global _idx + if _idx: + return _idx + + # If we don't have a cached value, we need either directory or repo_info + assert directory or repo_info + + if directory: + directory = os.path.abspath(directory) + repo_info = _get_repoinfo(directory) + + if 'modules' not in repo_info: + return None + + _idx = mmd.ModuleIndex.new() + + myfile=repo_info['modules'] + if myfile.endswith(".gz"): + openfunc=gzip.Gzipfile + elif myfile.endswith(".xz"): + openfunc=lzma.LZMAFile + else: + print("This file type is not fixed in this hack. Please fix code. (2021-05-20)"); + sys.exit(1) + with openfunc(filename=myfile, mode='r') as zipf: + mmdcts = zipf.read().decode('utf-8') + res, failures = _idx.update_from_string(mmdcts, True) + if len(failures) != 0: + raise Exception("YAML FAILURE: FAILURES: %s" % failures) + if not res: + raise Exception("YAML FAILURE: res != True") + + # Ensure that every stream in the index is using v2 + _idx.upgrade_streams(mmd.ModuleStreamVersionEnum.TWO) + + return _idx + + +def _get_hawkey_sack(repo_info): + """ + A function to pull in the repository sack from hawkey. + Returns the sack. + """ + hk_repo = hawkey.Repo("") + hk_repo.filelists_fn = repo_info["filelists"] + hk_repo.primary_fn = repo_info["primary"] + hk_repo.repomd_fn = repo_info["repomd"] + + primary_sack = hawkey.Sack() + primary_sack.load_repo(hk_repo, build_cache=False) + + return primary_sack + + +def _get_filelist(package_sack): + """ + Determine the file locations of all packages in the sack. Use the + package-name-epoch-version-release-arch as the key. + Returns a dictionary. + """ + pkg_list = {} + for pkg in hawkey.Query(package_sack): + nevr = "%s-%s:%s-%s.%s" % (pkg.name, pkg.epoch, + pkg.version, pkg.release, pkg.arch) + pkg_list[nevr] = pkg.location + return pkg_list + + +def _parse_repository_non_modular(package_sack, repo_info, modpkgset): + """ + Simple routine to go through a repo, and figure out which packages + are not in any module. Add the file locations for those packages + so we can link to them. + Returns a set of file locations. + """ + sack = package_sack + pkgs = set() + + for pkg in hawkey.Query(sack): + if pkg.location in modpkgset: + continue + pkgs.add(pkg.location) + return pkgs + + +def _parse_repository_modular(repo_info, package_sack): + """ + Returns a dictionary of packages indexed by the modules they are + contained in. + """ + cts = {} + idx = _get_modulemd(repo_info=repo_info) + + pkgs_list = _get_filelist(package_sack) + idx.upgrade_streams(2) + for modname in idx.get_module_names(): + mod = idx.get_module(modname) + for stream in mod.get_all_streams(): + templ = list() + for pkg in stream.get_rpm_artifacts(): + if pkg in pkgs_list: + templ.append(pkgs_list[pkg]) + else: + continue + cts[stream.get_NSVCA()] = templ + + return cts + + +def _get_modular_pkgset(mod): + """ + Takes a module and goes through the moduleset to determine which + packages are inside it. + Returns a list of packages + """ + pkgs = set() + + for modcts in mod.values(): + for pkg in modcts: + pkgs.add(pkg) + + return list(pkgs) + + +def _perform_action(src, dst, action): + """ + Performs either a copy, hardlink or symlink of the file src to the + file destination. + Returns None + """ + if action == 'copy': + try: + shutil.copy(src, dst) + except FileNotFoundError: + # Missing files are acceptable: they're already checked before + # this by validate_filenames. + pass + elif action == 'hardlink': + os.link(src, dst) + elif action == 'symlink': + os.symlink(src, dst) + + +def validate_filenames(directory, repoinfo): + """ + Take a directory and repository information. Test each file in + repository to exist in said module. This stops us when dealing + with broken repositories or missing modules. + Returns True if no problems found. False otherwise. + """ + isok = True + for modname in repoinfo: + for pkg in repoinfo[modname]: + if not os.path.exists(os.path.join(directory, pkg)): + isok = False + print("Path %s from mod %s did not exist" % (pkg, modname)) + return isok + + +def _get_recursive_dependencies(all_deps, idx, stream, ignore_missing_deps): + if stream.get_NSVCA() in all_deps: + # We've already encountered this NSVCA, so don't go through it again + logging.debug('Already included {}'.format(stream.get_NSVCA())) + return + + # Store this NSVCA/NS pair + local_deps = all_deps + local_deps.add(stream.get_NSVCA()) + + logging.debug("Recursive deps: {}".format(stream.get_NSVCA())) + + # Loop through the dependencies for this stream + deps = stream.get_dependencies() + + # At least one of the dependency array entries must exist in the repo + found_dep = False + for dep in deps: + # Within an array entry, all of the modules must be present in the + # index + found_all_modules = True + for modname in dep.get_runtime_modules(): + # Ignore "platform" because it's special + if modname == "platform": + logging.debug('Skipping platform') + continue + logging.debug('Processing dependency on module {}'.format(modname)) + + mod = idx.get_module(modname) + if not mod: + # This module wasn't present in the index. + found_module = False + continue + + # Within a module, at least one of the requested streams must be + # present + streamnames = dep.get_runtime_streams(modname) + found_stream = False + for streamname in streamnames: + stream_list = _get_latest_streams(mod, streamname) + for inner_stream in stream_list: + try: + _get_recursive_dependencies( + local_deps, idx, inner_stream, ignore_missing_deps) + except FileNotFoundError as e: + # Could not find all of this stream's dependencies in + # the repo + continue + found_stream = True + + # None of the streams were found for this module + if not found_stream: + found_all_modules = False + + # We've iterated through all of the modules; if it's still True, this + # dependency is consistent in the index + if found_all_modules: + found_dep = True + + # We were unable to resolve the dependencies for any of the array entries. + # raise FileNotFoundError + if not found_dep and not ignore_missing_deps: + raise FileNotFoundError( + "Could not resolve dependencies for {}".format( + stream.get_NSVCA())) + + all_deps.update(local_deps) + + +def get_default_modules(directory, ignore_missing_deps): + """ + Work through the list of modules and come up with a default set of + modules which would be the minimum to output. + Returns a set of modules + """ + + all_deps = set() + + idx = _get_modulemd(directory) + if not idx: + return all_deps + + for modname, streamname in idx.get_default_streams().items(): + # Only the latest version of a stream is important, as that is the only one that DNF will consider in its + # transaction logic. We still need to handle each context individually. + mod = idx.get_module(modname) + stream_set = _get_latest_streams(mod, streamname) + for stream in stream_set: + # Different contexts have different dependencies + try: + logging.debug("Processing {}".format(stream.get_NSVCA())) + _get_recursive_dependencies(all_deps, idx, stream, ignore_missing_deps) + logging.debug("----------") + except FileNotFoundError as e: + # Not all dependencies could be satisfied + print( + "Not all dependencies for {} could be satisfied. {}. Skipping".format( + stream.get_NSVCA(), e)) + continue + + logging.debug('Default module streams: {}'.format(all_deps)) + + return all_deps + + +def _pad_svca(svca, target_length): + """ + If the split() doesn't return all values (e.g. arch is missing), pad it + with `None` + """ + length = len(svca) + svca.extend([None] * (target_length - length)) + return svca + + +def _dump_modulemd(modname, yaml_file): + idx = _get_modulemd() + assert idx + + # Create a new index to hold the information about this particular + # module and stream + new_idx = mmd.ModuleIndex.new() + + # Add the module streams + module_name, *svca = modname.split(':') + stream_name, version, context, arch = _pad_svca(svca, 4) + + logging.debug("Dumping YAML for {}, {}, {}, {}, {}".format( + module_name, stream_name, version, context, arch)) + + mod = idx.get_module(module_name) + streams = mod.search_streams(stream_name, int(version), context, arch) + + # This should usually be a single item, but we'll be future-compatible + # and account for the possibility of having multiple streams here. + for stream in streams: + new_idx.add_module_stream(stream) + + # Add the module defaults + defs = mod.get_defaults() + if defs: + new_idx.add_defaults(defs) + + # Write out the file + try: + with open(yaml_file, 'w') as output: + output.write(new_idx.dump_to_string()) + except PermissionError as e: + logging.error("Could not write YAML to file: {}".format(e)) + raise + + +def perform_split(repos, args, def_modules): + for modname in repos: + if args.only_defaults and modname not in def_modules: + continue + + targetdir = os.path.join(args.target, modname) + os.mkdir(targetdir) + + for pkg in repos[modname]: + _, pkgfile = os.path.split(pkg) + _perform_action( + os.path.join(args.repository, pkg), + os.path.join(targetdir, pkgfile), + args.action) + + # Extract the modular metadata for this module + if modname != 'non_modular': + _dump_modulemd(modname, os.path.join(targetdir, 'modules.yaml')) + + +def create_repos(target, repos, def_modules, only_defaults): + """ + Routine to create repositories. Input is target directory and a + list of repositories. + Returns None + """ + + for modname in repos: + if only_defaults and modname not in def_modules: + continue + + targetdir = os.path.join(target, modname) + + subprocess.run([ + 'createrepo_c', targetdir, + '--no-database']) + if modname != 'non_modular': + subprocess.run([ + 'modifyrepo_c', + '--mdtype=modules', + os.path.join(targetdir, 'modules.yaml'), + os.path.join(targetdir, 'repodata') + ]) + + +def parse_args(): + """ + A standard argument parser routine which pulls in values from the + command line and returns a parsed argument dictionary. + """ + parser = argparse.ArgumentParser(description='Split repositories up') + parser.add_argument('repository', help='The repository to split') + parser.add_argument('--debug', help='Enable debug logging', + action='store_true', default=False) + parser.add_argument('--action', help='Method to create split repos files', + choices=('hardlink', 'symlink', 'copy'), + default='hardlink') + parser.add_argument('--target', help='Target directory for split repos') + parser.add_argument('--skip-missing', help='Skip missing packages', + action='store_true', default=False) + parser.add_argument('--create-repos', help='Create repository metadatas', + action='store_true', default=False) + parser.add_argument('--only-defaults', help='Only output default modules', + action='store_true', default=False) + parser.add_argument('--ignore-missing-default-deps', + help='When using --only-defaults, do not skip ' + 'default streams whose dependencies cannot be ' + 'resolved within this repository', + action='store_true', default=False) + return parser.parse_args() + + +def setup_target(args): + """ + Checks that the target directory exists and is empty. If not it + exits the program. Returns nothing. + """ + if args.target: + args.target = os.path.abspath(args.target) + if os.path.exists(args.target): + if not os.path.isdir(args.target): + raise ValueError("Target must be a directory") + elif len(os.listdir(args.target)) != 0: + raise ValueError("Target must be empty") + else: + os.mkdir(args.target) + + +def parse_repository(directory): + """ + Parse a specific directory, returning a dict with keys module NSVC's and + values a list of package NVRs. + The dict will also have a key "non_modular" for the non-modular packages. + """ + directory = os.path.abspath(directory) + repo_info = _get_repoinfo(directory) + + # Get the package sack and get a filelist of all packages. + package_sack = _get_hawkey_sack(repo_info) + _get_filelist(package_sack) + + # If we have a repository with no modules we do not want our + # script to error out but just remake the repository with + # everything in a known sack (aka non_modular). + + if 'modules' in repo_info: + mod = _parse_repository_modular(repo_info, package_sack) + modpkgset = _get_modular_pkgset(mod) + else: + mod = dict() + modpkgset = set() + + non_modular = _parse_repository_non_modular(package_sack, repo_info, + modpkgset) + mod['non_modular'] = non_modular + + # We should probably go through our default modules here and + # remove them from our mod. This would cut down some code paths. + + return mod + + +def main(): + # Determine what the arguments are and + args = parse_args() + + if args.debug: + logging.basicConfig(level=logging.DEBUG) + + # Go through arguments and act on their values. + setup_target(args) + + repos = parse_repository(args.repository) + + if args.only_defaults: + def_modules = get_default_modules(args.repository, args.ignore_missing_default_deps) + else: + def_modules = set() + + def_modules.add('non_modular') + + if not args.skip_missing: + if not validate_filenames(args.repository, repos): + raise ValueError("Package files were missing!") + if args.target: + perform_split(repos, args, def_modules) + if args.create_repos: + create_repos(args.target, repos, def_modules, args.only_defaults) + + +if __name__ == '__main__': + main() diff --git a/roles/grobisplitter/files/splitter.py b/roles/grobisplitter/files/splitter.py index 1d0123f792..9b40d0e5d2 100755 --- a/roles/grobisplitter/files/splitter.py +++ b/roles/grobisplitter/files/splitter.py @@ -25,10 +25,9 @@ except ValueError: print("libmodulemd 2.0 is not installed..") sys.exit(1) -# We only want to load the module metadata once. It can be reused as often as required -_idx = None - -def _get_latest_streams(mymod, stream): +# This code is from Stephen Gallagher to make my other caveman code +# less icky. +def _get_latest_streams (mymod, stream): """ Routine takes modulemd object and a stream name. Finds the lates stream from that and returns that as a stream @@ -57,54 +56,6 @@ def _get_repoinfo(directory): r = h.perform() return r.getinfo(librepo.LRR_YUM_REPO) - -def _get_modulemd(directory=None, repo_info=None): - """ - Retrieve the module metadata from this repository. - :param directory: The path to the repository. Must contain repodata/repomd.xml and modules.yaml. - :param repo_info: An already-acquired repo_info structure - :return: A Modulemd.ModulemdIndex object containing the module metadata from this repository. - """ - - # Return the cached value - global _idx - if _idx: - return _idx - - # If we don't have a cached value, we need either directory or repo_info - assert directory or repo_info - - if directory: - directory = os.path.abspath(directory) - repo_info = _get_repoinfo(directory) - - if 'modules' not in repo_info: - return None - - _idx = mmd.ModuleIndex.new() - - myfile=repo_info['modules'] - if myfile.endswith(".gz"): - openfunc=gzip.Gzipfile - elif myfile.endswith(".xz"): - openfunc=lzma.LZMAFile - else: - print("This file type is not fixed in this hack. Please fix code. (2021-05-20)"); - sys.exit(1) - with openfunc(filename=myfile, mode='r') as zipf: - mmdcts = zipf.read().decode('utf-8') - res, failures = _idx.update_from_string(mmdcts, True) - if len(failures) != 0: - raise Exception("YAML FAILURE: FAILURES: %s" % failures) - if not res: - raise Exception("YAML FAILURE: res != True") - - # Ensure that every stream in the index is using v2 - _idx.upgrade_streams(mmd.ModuleStreamVersionEnum.TWO) - - return _idx - - def _get_hawkey_sack(repo_info): """ A function to pull in the repository sack from hawkey. @@ -158,7 +109,22 @@ def _parse_repository_modular(repo_info, package_sack): contained in. """ cts = {} - idx = _get_modulemd(repo_info=repo_info) + idx = mmd.ModuleIndex() + myfile = repo_info['modules'] + if myfile.endswith(".gz"): + openfunc=gzip.GzipFile + elif myfile.endswith(".xz"): + openfunc=lzma.LZMAFile + else: + print("This file type is not fixed in this hack. Please fix code. (2021-05-20)"); + sys.exit(1) + with openfunc(filename=myfile, mode='r') as gzf: + mmdcts = gzf.read().decode('utf-8') + res, failures = idx.update_from_string(mmdcts, True) + if len(failures) != 0: + raise Exception("YAML FAILURE: FAILURES: %s" % failures) + if not res: + raise Exception("YAML FAILURE: res != True") pkgs_list = _get_filelist(package_sack) idx.upgrade_streams(2) @@ -226,156 +192,108 @@ def validate_filenames(directory, repoinfo): return isok -def _get_recursive_dependencies(all_deps, idx, stream, ignore_missing_deps): - if stream.get_NSVCA() in all_deps: - # We've already encountered this NSVCA, so don't go through it again - logging.debug('Already included {}'.format(stream.get_NSVCA())) - return - - # Store this NSVCA/NS pair - local_deps = all_deps - local_deps.add(stream.get_NSVCA()) - - logging.debug("Recursive deps: {}".format(stream.get_NSVCA())) - - # Loop through the dependencies for this stream - deps = stream.get_dependencies() - - # At least one of the dependency array entries must exist in the repo - found_dep = False - for dep in deps: - # Within an array entry, all of the modules must be present in the - # index - found_all_modules = True - for modname in dep.get_runtime_modules(): - # Ignore "platform" because it's special - if modname == "platform": - logging.debug('Skipping platform') - continue - logging.debug('Processing dependency on module {}'.format(modname)) - - mod = idx.get_module(modname) - if not mod: - # This module wasn't present in the index. - found_module = False - continue - - # Within a module, at least one of the requested streams must be - # present - streamnames = dep.get_runtime_streams(modname) - found_stream = False - for streamname in streamnames: - stream_list = _get_latest_streams(mod, streamname) - for inner_stream in stream_list: - try: - _get_recursive_dependencies( - local_deps, idx, inner_stream, ignore_missing_deps) - except FileNotFoundError as e: - # Could not find all of this stream's dependencies in - # the repo - continue - found_stream = True - - # None of the streams were found for this module - if not found_stream: - found_all_modules = False - - # We've iterated through all of the modules; if it's still True, this - # dependency is consistent in the index - if found_all_modules: - found_dep = True - - # We were unable to resolve the dependencies for any of the array entries. - # raise FileNotFoundError - if not found_dep and not ignore_missing_deps: - raise FileNotFoundError( - "Could not resolve dependencies for {}".format( - stream.get_NSVCA())) - - all_deps.update(local_deps) - - -def get_default_modules(directory, ignore_missing_deps): +def get_default_modules(directory): """ Work through the list of modules and come up with a default set of modules which would be the minimum to output. Returns a set of modules """ + directory = os.path.abspath(directory) + repo_info = _get_repoinfo(directory) - all_deps = set() + provides = set() + contents = set() + if 'modules' not in repo_info: + return contents + idx = mmd.ModuleIndex() + myfile=repo_info['modules'] + if myfile.endswith(".gz"): + openfunc=gzip.GzipFile + elif myfile.endswith(".xz"): + openfunc=lzma.LZMAFile + else: + print("This file type is not fixed in this hack. Please fix code. (2021-05-20)"); + sys.exit(1) + with openfunc(filename=myfile, mode='r') as gzf: + mmdcts = gzf.read().decode('utf-8') + res, failures = idx.update_from_string(mmdcts, True) + if len(failures) != 0: + raise Exception("YAML FAILURE: FAILURES: %s" % failures) + if not res: + raise Exception("YAML FAILURE: res != True") - idx = _get_modulemd(directory) - if not idx: - return all_deps + idx.upgrade_streams(2) - for modname, streamname in idx.get_default_streams().items(): - # Only the latest version of a stream is important, as that is the only one that DNF will consider in its - # transaction logic. We still need to handle each context individually. + # OK this is cave-man no-sleep programming. I expect there is a + # better way to do this that would be a lot better. However after + # a long long day.. this is what I have. + + # First we oo through the default streams and create a set of + # provides that we can check against later. + for modname in idx.get_default_streams(): mod = idx.get_module(modname) - stream_set = _get_latest_streams(mod, streamname) + # Get the default streams and loop through them. + stream_set = mod.get_streams_by_stream_name( + mod.get_defaults().get_default_stream()) for stream in stream_set: - # Different contexts have different dependencies - try: - logging.debug("Processing {}".format(stream.get_NSVCA())) - _get_recursive_dependencies(all_deps, idx, stream, ignore_missing_deps) - logging.debug("----------") - except FileNotFoundError as e: - # Not all dependencies could be satisfied - print( - "Not all dependencies for {} could be satisfied. {}. Skipping".format( - stream.get_NSVCA(), e)) - continue - - logging.debug('Default module streams: {}'.format(all_deps)) - - return all_deps + tempstr = "%s:%s" % (stream.props.module_name, + stream.props.stream_name) + provides.add(tempstr) -def _pad_svca(svca, target_length): - """ - If the split() doesn't return all values (e.g. arch is missing), pad it - with `None` - """ - length = len(svca) - svca.extend([None] * (target_length - length)) - return svca + # Now go through our list and build up a content lists which will + # have only modules which have their dependencies met + tempdict = {} + for modname in idx.get_default_streams(): + mod = idx.get_module(modname) + # Get the default streams and loop through them. + # This is a sorted list with the latest in it. We could drop + # looking at later ones here in a future version. (aka lines + # 237 to later) + stream_set = mod.get_streams_by_stream_name( + mod.get_defaults().get_default_stream()) + for stream in stream_set: + ourname = stream.get_NSVCA() + tmp_name = "%s:%s" % (stream.props.module_name, + stream.props.stream_name) + # Get dependencies is a list of items. All of the modules + # seem to only have 1 item in them, but we should loop + # over the list anyway. + for deps in stream.get_dependencies(): + isprovided = True # a variable to say this can be added. + for mod in deps.get_runtime_modules(): + tempstr="" + # It does not seem easy to figure out what the + # platform is so just assume we will meet it. + if mod != 'platform': + for stm in deps.get_runtime_streams(mod): + tempstr = "%s:%s" %(mod,stm) + if tempstr not in provides: + # print( "%s : %s not found." % (ourname,tempstr)) + isprovided = False + if isprovided: + if tmp_name in tempdict: + # print("We found %s" % tmp_name) + # Get the stream version we are looking at + ts1=ourname.split(":")[2] + # Get the stream version we stored away + ts2=tempdict[tmp_name].split(":")[2] + # See if we got a newer one. We probably + # don't as it is a sorted list but we + # could have multiple contexts which would + # change things. + if ( int(ts1) > int(ts2) ): + # print ("%s > %s newer for %s", ts1,ts2,ourname) + tempdict[tmp_name] = ourname + else: + # print("We did not find %s" % tmp_name) + tempdict[tmp_name] = ourname + # OK we finally got all our stream names we want to send back to + # our calling function. Read them out and add them to the set. + for indx in tempdict: + contents.add(tempdict[indx]) - -def _dump_modulemd(modname, yaml_file): - idx = _get_modulemd() - assert idx - - # Create a new index to hold the information about this particular - # module and stream - new_idx = mmd.ModuleIndex.new() - - # Add the module streams - module_name, *svca = modname.split(':') - stream_name, version, context, arch = _pad_svca(svca, 4) - - logging.debug("Dumping YAML for {}, {}, {}, {}, {}".format( - module_name, stream_name, version, context, arch)) - - mod = idx.get_module(module_name) - streams = mod.search_streams(stream_name, int(version), context, arch) - - # This should usually be a single item, but we'll be future-compatible - # and account for the possibility of having multiple streams here. - for stream in streams: - new_idx.add_module_stream(stream) - - # Add the module defaults - defs = mod.get_defaults() - if defs: - new_idx.add_defaults(defs) - - # Write out the file - try: - with open(yaml_file, 'w') as output: - output.write(new_idx.dump_to_string()) - except PermissionError as e: - logging.error("Could not write YAML to file: {}".format(e)) - raise + return contents def perform_split(repos, args, def_modules): @@ -393,10 +311,6 @@ def perform_split(repos, args, def_modules): os.path.join(targetdir, pkgfile), args.action) - # Extract the modular metadata for this module - if modname != 'non_modular': - _dump_modulemd(modname, os.path.join(targetdir, 'modules.yaml')) - def create_repos(target, repos, def_modules, only_defaults): """ @@ -408,19 +322,9 @@ def create_repos(target, repos, def_modules, only_defaults): for modname in repos: if only_defaults and modname not in def_modules: continue - - targetdir = os.path.join(target, modname) - subprocess.run([ - 'createrepo_c', targetdir, + 'createrepo_c', os.path.join(target, modname), '--no-database']) - if modname != 'non_modular': - subprocess.run([ - 'modifyrepo_c', - '--mdtype=modules', - os.path.join(targetdir, 'modules.yaml'), - os.path.join(targetdir, 'repodata') - ]) def parse_args(): @@ -430,8 +334,6 @@ def parse_args(): """ parser = argparse.ArgumentParser(description='Split repositories up') parser.add_argument('repository', help='The repository to split') - parser.add_argument('--debug', help='Enable debug logging', - action='store_true', default=False) parser.add_argument('--action', help='Method to create split repos files', choices=('hardlink', 'symlink', 'copy'), default='hardlink') @@ -442,11 +344,6 @@ def parse_args(): action='store_true', default=False) parser.add_argument('--only-defaults', help='Only output default modules', action='store_true', default=False) - parser.add_argument('--ignore-missing-default-deps', - help='When using --only-defaults, do not skip ' - 'default streams whose dependencies cannot be ' - 'resolved within this repository', - action='store_true', default=False) return parser.parse_args() @@ -504,16 +401,13 @@ def main(): # Determine what the arguments are and args = parse_args() - if args.debug: - logging.basicConfig(level=logging.DEBUG) - # Go through arguments and act on their values. setup_target(args) repos = parse_repository(args.repository) if args.only_defaults: - def_modules = get_default_modules(args.repository, args.ignore_missing_default_deps) + def_modules = get_default_modules(args.repository) else: def_modules = set()