From 63a8c4a39e7aaa0a1a2f30d845f580ff63c08431 Mon Sep 17 00:00:00 2001
From: Stephen Smoogen <ssmoogen@redhat.com>
Date: Tue, 31 May 2022 11:30:47 -0400
Subject: [PATCH] move back to the older splitter.py. clean up whitespace
 differences between it and upstream. put upstream in tree for someone to fix
 later.

---
 roles/batcave/files/rhel8-split.cron          |   1 -
 roles/batcave/files/rhel8-split.sh            |  99 ----
 .../files/splitter-upstream-20220531.py       | 532 ++++++++++++++++++
 roles/grobisplitter/files/splitter.py         | 326 ++++-------
 4 files changed, 642 insertions(+), 316 deletions(-)
 delete mode 100644 roles/batcave/files/rhel8-split.cron
 delete mode 100644 roles/batcave/files/rhel8-split.sh
 create mode 100755 roles/grobisplitter/files/splitter-upstream-20220531.py

diff --git a/roles/batcave/files/rhel8-split.cron b/roles/batcave/files/rhel8-split.cron
deleted file mode 100644
index 5b9f3e3367..0000000000
--- a/roles/batcave/files/rhel8-split.cron
+++ /dev/null
@@ -1 +0,0 @@
-03 09  * * * root /usr/local/bin/rhel8-split.sh
diff --git a/roles/batcave/files/rhel8-split.sh b/roles/batcave/files/rhel8-split.sh
deleted file mode 100644
index a42399e730..0000000000
--- a/roles/batcave/files/rhel8-split.sh
+++ /dev/null
@@ -1,99 +0,0 @@
-#!/bin/bash
-HOMEDIR=/mnt/fedora/app/fi-repo/rhel/rhel8
-BINDIR=/usr/local/bin
-
-ARCHES="aarch64 ppc64le s390x x86_64"
-DATE=$(date -Ih | sed 's/+.*//')
-
-DATEDIR=${HOMEDIR}/koji/${DATE}
-
-if [ -d ${DATEDIR} ]; then
-    echo "Directory already exists. Please remove or fix"
-    exit
-else
-mkdir -p ${DATEDIR}
-fi
-
-for ARCH in ${ARCHES}; do
-    # The archdir is where we daily download updates for rhel8
-    ARCHDIR=${HOMEDIR}/${ARCH}
-    if [ ! -d ${ARCHDIR} ]; then
-        echo "Unable to find ${ARCHDIR}"
-        exit
-    fi
-
-    # We consolidate all of the default repositories and remerge them
-    # in a daily tree. This allows us to point koji at a particular
-    # day if we have specific build concerns.
-    OUTDIR=${DATEDIR}/${ARCH}
-    mkdir -p ${OUTDIR}
-    if [ ! -d ${OUTDIR} ]; then
-        echo "Unable to find ${ARCHDIR}"
-        exit
-    else
-        cd ${OUTDIR}
-    fi
-
-    # Begin splitting the various packages into their subtrees
-    ${BINDIR}/splitter.py --action hardlink --target RHEL-8-001 ${ARCHDIR}/rhel-8-for-${ARCH}-baseos-rpms/ --only-defaults &> /dev/null
-    if [ $? -ne 0 ]; then
-        echo "splitter ${ARCH} baseos failed"
-        exit
-    fi
-    ${BINDIR}/splitter.py --action hardlink --target RHEL-8-002 ${ARCHDIR}/rhel-8-for-${ARCH}-appstream-rpms/ --only-defaults &> /dev/null
-    if [ $? -ne 0 ]; then
-        echo "splitter ${ARCH} appstream failed"
-        exit
-    fi
-    ${BINDIR}/splitter.py --action hardlink --target RHEL-8-003 ${ARCHDIR}/codeready-builder-for-rhel-8-${ARCH}-rpms/ &> /dev/null
-    if [ $? -ne 0 ]; then
-        echo "splitter ${ARCH} codeready failed"
-        exit
-    fi
-
-    # Copy the various module trees into RHEL-8-001 where we want them
-    # to work.
-    echo "Moving data to ${ARCH}/RHEL-8-001"
-    cp -anlr RHEL-8-002/* RHEL-8-001
-    cp -anlr RHEL-8-003/* RHEL-8-001
-    # Go into the main tree
-    pushd RHEL-8-001
-
-    # Mergerepo didn't work so lets just createrepo in the top directory.
-    createrepo_c .  &> /dev/null
-    popd
-
-    # Cleanup the trash
-    rm -rf RHEL-8-002 RHEL-8-003
-#loop to the next
-done
-
-## Set up the builds so they are pointing to the last working version
-cd ${HOMEDIR}/koji/
-if [[ -e staged ]]; then
-    if [[ -h staged ]]; then
-        rm -f staged
-    else
-        echo "Unable to remove staged. it is not a symbolic link"
-        exit
-    fi
-else
-    echo "No staged link found"
-fi
-
-echo "Linking ${DATE} to staged"
-ln -s ${DATE} staged
-
-
-for ARCH in ${ARCHES}; do
-    pushd latest/
-    mkdir -p ${ARCH}
-    dnf --disablerepo=\* --enablerepo=RHEL-8-001 --repofrompath=RHEL-8-001,https://infrastructure.fedoraproject.org/repo/rhel/rhel8/koji/staged/${ARCH}/RHEL-8-001/ reposync -a ${ARCH} -a noarch -p ${ARCH} --newest --delete  &> /dev/null
-    if [[ $? -eq 0 ]]; then
-        cd ${ARCH}/RHEL-8-001
-        createrepo_c .  &> /dev/null
-    else
-        echo "Unable to run createrepo on latest/${ARCH}"
-    fi
-    popd
-done
diff --git a/roles/grobisplitter/files/splitter-upstream-20220531.py b/roles/grobisplitter/files/splitter-upstream-20220531.py
new file mode 100755
index 0000000000..1d0123f792
--- /dev/null
+++ b/roles/grobisplitter/files/splitter-upstream-20220531.py
@@ -0,0 +1,532 @@
+#!/bin/python3
+
+# Import libraries needed for application to work
+
+import argparse
+import shutil
+import gi
+import gzip
+import librepo
+import lzma
+import hawkey
+import tempfile
+import os
+import subprocess
+import sys
+import time
+import logging
+
+# Look for a specific version of modulemd. The 1.x series does not
+# have the tools we need.
+try:
+    gi.require_version('Modulemd', '2.0')
+    from gi.repository import Modulemd as mmd
+except ValueError:
+    print("libmodulemd 2.0 is not installed..")
+    sys.exit(1)
+
+# We only want to load the module metadata once. It can be reused as often as required
+_idx = None
+
+def _get_latest_streams(mymod, stream):
+    """
+    Routine takes modulemd object and a stream name.
+    Finds the lates stream from that and returns that as a stream
+    object.
+    """
+    all_streams = mymod.search_streams(stream, 0)
+    latest_streams = mymod.search_streams(stream,
+                                          all_streams[0].props.version)
+
+    return latest_streams
+
+
+def _get_repoinfo(directory):
+    """
+    A function which goes into the given directory and sets up the
+    needed data for the repository using librepo.
+    Returns the LRR_YUM_REPO
+    """
+    with tempfile.TemporaryDirectory(prefix='elsplit_librepo_') as lrodir:
+        h = librepo.Handle()
+        h.setopt(librepo.LRO_URLS, ["file://%s" % directory])
+        h.setopt(librepo.LRO_REPOTYPE, librepo.LR_YUMREPO)
+        h.setopt(librepo.LRO_DESTDIR, lrodir)
+        h.setopt(librepo.LRO_LOCAL, True)
+        h.setopt(librepo.LRO_IGNOREMISSING, False)
+        r = h.perform()
+        return r.getinfo(librepo.LRR_YUM_REPO)
+
+
+def _get_modulemd(directory=None, repo_info=None):
+    """
+    Retrieve the module metadata from this repository.
+    :param directory: The path to the repository. Must contain repodata/repomd.xml and modules.yaml.
+    :param repo_info: An already-acquired repo_info structure
+    :return: A Modulemd.ModulemdIndex object containing the module metadata from this repository.
+    """
+
+    # Return the cached value
+    global _idx
+    if _idx:
+        return _idx
+
+    # If we don't have a cached value, we need either directory or repo_info
+    assert directory or repo_info
+
+    if directory:
+        directory = os.path.abspath(directory)
+        repo_info = _get_repoinfo(directory)
+
+    if 'modules' not in repo_info:
+        return None
+
+    _idx = mmd.ModuleIndex.new()
+
+    myfile=repo_info['modules']
+    if myfile.endswith(".gz"):
+        openfunc=gzip.Gzipfile
+    elif myfile.endswith(".xz"):
+        openfunc=lzma.LZMAFile
+    else:
+        print("This file type is not fixed in this hack. Please fix code. (2021-05-20)");
+        sys.exit(1)
+    with openfunc(filename=myfile, mode='r') as zipf:
+        mmdcts = zipf.read().decode('utf-8')
+        res, failures = _idx.update_from_string(mmdcts, True)
+        if len(failures) != 0:
+            raise Exception("YAML FAILURE: FAILURES: %s" % failures)
+        if not res:
+            raise Exception("YAML FAILURE: res != True")
+
+    # Ensure that every stream in the index is using v2
+    _idx.upgrade_streams(mmd.ModuleStreamVersionEnum.TWO)
+
+    return _idx
+
+
+def _get_hawkey_sack(repo_info):
+    """
+    A function to pull in the repository sack from hawkey.
+    Returns the sack.
+    """
+    hk_repo = hawkey.Repo("")
+    hk_repo.filelists_fn = repo_info["filelists"]
+    hk_repo.primary_fn = repo_info["primary"]
+    hk_repo.repomd_fn = repo_info["repomd"]
+
+    primary_sack = hawkey.Sack()
+    primary_sack.load_repo(hk_repo, build_cache=False)
+
+    return primary_sack
+
+
+def _get_filelist(package_sack):
+    """
+    Determine the file locations of all packages in the sack. Use the
+    package-name-epoch-version-release-arch as the key.
+    Returns a dictionary.
+    """
+    pkg_list = {}
+    for pkg in hawkey.Query(package_sack):
+        nevr = "%s-%s:%s-%s.%s" % (pkg.name, pkg.epoch,
+                                   pkg.version, pkg.release, pkg.arch)
+        pkg_list[nevr] = pkg.location
+    return pkg_list
+
+
+def _parse_repository_non_modular(package_sack, repo_info, modpkgset):
+    """
+    Simple routine to go through a repo, and figure out which packages
+    are not in any module. Add the file locations for those packages
+    so we can link to them.
+    Returns a set of file locations.
+    """
+    sack = package_sack
+    pkgs = set()
+
+    for pkg in hawkey.Query(sack):
+        if pkg.location in modpkgset:
+            continue
+        pkgs.add(pkg.location)
+    return pkgs
+
+
+def _parse_repository_modular(repo_info, package_sack):
+    """
+    Returns a dictionary of packages indexed by the modules they are
+    contained in.
+    """
+    cts = {}
+    idx = _get_modulemd(repo_info=repo_info)
+
+    pkgs_list = _get_filelist(package_sack)
+    idx.upgrade_streams(2)
+    for modname in idx.get_module_names():
+        mod = idx.get_module(modname)
+        for stream in mod.get_all_streams():
+            templ = list()
+            for pkg in stream.get_rpm_artifacts():
+                if pkg in pkgs_list:
+                    templ.append(pkgs_list[pkg])
+                else:
+                    continue
+            cts[stream.get_NSVCA()] = templ
+
+    return cts
+
+
+def _get_modular_pkgset(mod):
+    """
+    Takes a module and goes through the moduleset to determine which
+    packages are inside it.
+    Returns a list of packages
+    """
+    pkgs = set()
+
+    for modcts in mod.values():
+        for pkg in modcts:
+            pkgs.add(pkg)
+
+    return list(pkgs)
+
+
+def _perform_action(src, dst, action):
+    """
+    Performs either a copy, hardlink or symlink of the file src to the
+    file destination.
+    Returns None
+    """
+    if action == 'copy':
+        try:
+            shutil.copy(src, dst)
+        except FileNotFoundError:
+            # Missing files are acceptable: they're already checked before
+            # this by validate_filenames.
+            pass
+    elif action == 'hardlink':
+        os.link(src, dst)
+    elif action == 'symlink':
+        os.symlink(src, dst)
+
+
+def validate_filenames(directory, repoinfo):
+    """
+    Take a directory and repository information. Test each file in
+    repository to exist in said module. This stops us when dealing
+    with broken repositories or missing modules.
+    Returns True if no problems found. False otherwise.
+    """
+    isok = True
+    for modname in repoinfo:
+        for pkg in repoinfo[modname]:
+            if not os.path.exists(os.path.join(directory, pkg)):
+                isok = False
+                print("Path %s from mod %s did not exist" % (pkg, modname))
+    return isok
+
+
+def _get_recursive_dependencies(all_deps, idx, stream, ignore_missing_deps):
+    if stream.get_NSVCA() in all_deps:
+        # We've already encountered this NSVCA, so don't go through it again
+        logging.debug('Already included {}'.format(stream.get_NSVCA()))
+        return
+
+    # Store this NSVCA/NS pair
+    local_deps = all_deps
+    local_deps.add(stream.get_NSVCA())
+
+    logging.debug("Recursive deps: {}".format(stream.get_NSVCA()))
+
+    # Loop through the dependencies for this stream
+    deps = stream.get_dependencies()
+
+    # At least one of the dependency array entries must exist in the repo
+    found_dep = False
+    for dep in deps:
+        # Within an array entry, all of the modules must be present in the
+        # index
+        found_all_modules = True
+        for modname in dep.get_runtime_modules():
+            # Ignore "platform" because it's special
+            if modname == "platform":
+                logging.debug('Skipping platform')
+                continue
+            logging.debug('Processing dependency on module {}'.format(modname))
+
+            mod = idx.get_module(modname)
+            if not mod:
+                # This module wasn't present in the index.
+                found_module = False
+                continue
+
+            # Within a module, at least one of the requested streams must be
+            # present
+            streamnames = dep.get_runtime_streams(modname)
+            found_stream = False
+            for streamname in streamnames:
+                stream_list = _get_latest_streams(mod, streamname)
+                for inner_stream in stream_list:
+                    try:
+                        _get_recursive_dependencies(
+                            local_deps, idx, inner_stream, ignore_missing_deps)
+                    except FileNotFoundError as e:
+                        # Could not find all of this stream's dependencies in
+                        # the repo
+                        continue
+                    found_stream = True
+
+            # None of the streams were found for this module
+            if not found_stream:
+                found_all_modules = False
+
+        # We've iterated through all of the modules; if it's still True, this
+        # dependency is consistent in the index
+        if found_all_modules:
+            found_dep = True
+
+    # We were unable to resolve the dependencies for any of the array entries.
+    # raise FileNotFoundError
+    if not found_dep and not ignore_missing_deps:
+        raise FileNotFoundError(
+            "Could not resolve dependencies for {}".format(
+                stream.get_NSVCA()))
+
+    all_deps.update(local_deps)
+
+
+def get_default_modules(directory, ignore_missing_deps):
+    """
+    Work through the list of modules and come up with a default set of
+    modules which would be the minimum to output.
+    Returns a set of modules
+    """
+
+    all_deps = set()
+
+    idx = _get_modulemd(directory)
+    if not idx:
+        return all_deps
+
+    for modname, streamname in idx.get_default_streams().items():
+        # Only the latest version of a stream is important, as that is the only one that DNF will consider in its
+        # transaction logic. We still need to handle each context individually.
+        mod = idx.get_module(modname)
+        stream_set = _get_latest_streams(mod, streamname)
+        for stream in stream_set:
+            # Different contexts have different dependencies
+            try:
+                logging.debug("Processing {}".format(stream.get_NSVCA()))
+                _get_recursive_dependencies(all_deps, idx, stream, ignore_missing_deps)
+                logging.debug("----------")
+            except FileNotFoundError as e:
+                # Not all dependencies could be satisfied
+                print(
+                    "Not all dependencies for {} could be satisfied. {}. Skipping".format(
+                        stream.get_NSVCA(), e))
+                continue
+
+    logging.debug('Default module streams: {}'.format(all_deps))
+
+    return all_deps
+
+
+def _pad_svca(svca, target_length):
+    """
+    If the split() doesn't return all values (e.g. arch is missing), pad it
+    with `None`
+    """
+    length = len(svca)
+    svca.extend([None] * (target_length - length))
+    return svca
+
+
+def _dump_modulemd(modname, yaml_file):
+    idx = _get_modulemd()
+    assert idx
+
+    # Create a new index to hold the information about this particular
+    # module and stream
+    new_idx = mmd.ModuleIndex.new()
+
+    # Add the module streams
+    module_name, *svca = modname.split(':')
+    stream_name, version, context, arch = _pad_svca(svca, 4)
+
+    logging.debug("Dumping YAML for {}, {}, {}, {}, {}".format(
+        module_name, stream_name, version, context, arch))
+
+    mod = idx.get_module(module_name)
+    streams = mod.search_streams(stream_name, int(version), context, arch)
+
+    # This should usually be a single item, but we'll be future-compatible
+    # and account for the possibility of having multiple streams here.
+    for stream in streams:
+        new_idx.add_module_stream(stream)
+
+    # Add the module defaults
+    defs = mod.get_defaults()
+    if defs:
+        new_idx.add_defaults(defs)
+
+    # Write out the file
+    try:
+        with open(yaml_file, 'w') as output:
+            output.write(new_idx.dump_to_string())
+    except PermissionError as e:
+        logging.error("Could not write YAML to file: {}".format(e))
+        raise
+
+
+def perform_split(repos, args, def_modules):
+    for modname in repos:
+        if args.only_defaults and modname not in def_modules:
+            continue
+
+        targetdir = os.path.join(args.target, modname)
+        os.mkdir(targetdir)
+
+        for pkg in repos[modname]:
+            _, pkgfile = os.path.split(pkg)
+            _perform_action(
+                os.path.join(args.repository, pkg),
+                os.path.join(targetdir, pkgfile),
+                args.action)
+
+        # Extract the modular metadata for this module
+        if modname != 'non_modular':
+            _dump_modulemd(modname, os.path.join(targetdir, 'modules.yaml'))
+
+
+def create_repos(target, repos, def_modules, only_defaults):
+    """
+    Routine to create repositories. Input is target directory and a
+    list of repositories.
+    Returns None
+    """
+
+    for modname in repos:
+        if only_defaults and modname not in def_modules:
+            continue
+
+        targetdir = os.path.join(target, modname)
+
+        subprocess.run([
+            'createrepo_c', targetdir,
+            '--no-database'])
+        if modname != 'non_modular':
+            subprocess.run([
+                'modifyrepo_c',
+                '--mdtype=modules',
+                os.path.join(targetdir, 'modules.yaml'),
+                os.path.join(targetdir, 'repodata')
+            ])
+
+
+def parse_args():
+    """
+    A standard argument parser routine which pulls in values from the
+    command line and returns a parsed argument dictionary.
+    """
+    parser = argparse.ArgumentParser(description='Split repositories up')
+    parser.add_argument('repository', help='The repository to split')
+    parser.add_argument('--debug', help='Enable debug logging',
+                        action='store_true', default=False)
+    parser.add_argument('--action', help='Method to create split repos files',
+                        choices=('hardlink', 'symlink', 'copy'),
+                        default='hardlink')
+    parser.add_argument('--target', help='Target directory for split repos')
+    parser.add_argument('--skip-missing', help='Skip missing packages',
+                        action='store_true', default=False)
+    parser.add_argument('--create-repos', help='Create repository metadatas',
+                        action='store_true', default=False)
+    parser.add_argument('--only-defaults', help='Only output default modules',
+                        action='store_true', default=False)
+    parser.add_argument('--ignore-missing-default-deps',
+                        help='When using --only-defaults, do not skip '
+                             'default streams whose dependencies cannot be '
+                             'resolved within this repository',
+                        action='store_true', default=False)
+    return parser.parse_args()
+
+
+def setup_target(args):
+    """
+    Checks that the target directory exists and is empty. If not it
+    exits the program.  Returns nothing.
+    """
+    if args.target:
+        args.target = os.path.abspath(args.target)
+        if os.path.exists(args.target):
+            if not os.path.isdir(args.target):
+                raise ValueError("Target must be a directory")
+            elif len(os.listdir(args.target)) != 0:
+                raise ValueError("Target must be empty")
+        else:
+            os.mkdir(args.target)
+
+
+def parse_repository(directory):
+    """
+    Parse a specific directory, returning a dict with keys module NSVC's and
+    values a list of package NVRs.
+    The dict will also have a key "non_modular" for the non-modular packages.
+    """
+    directory = os.path.abspath(directory)
+    repo_info = _get_repoinfo(directory)
+
+    # Get the package sack and get a filelist of all packages.
+    package_sack = _get_hawkey_sack(repo_info)
+    _get_filelist(package_sack)
+
+    # If we have a repository with no modules we do not want our
+    # script to error out but just remake the repository with
+    # everything in a known sack (aka non_modular).
+
+    if 'modules' in repo_info:
+        mod = _parse_repository_modular(repo_info, package_sack)
+        modpkgset = _get_modular_pkgset(mod)
+    else:
+        mod = dict()
+        modpkgset = set()
+
+    non_modular = _parse_repository_non_modular(package_sack, repo_info,
+                                                modpkgset)
+    mod['non_modular'] = non_modular
+
+    # We should probably go through our default modules here and
+    # remove them from our mod. This would cut down some code paths.
+
+    return mod
+
+
+def main():
+    # Determine what the arguments are and
+    args = parse_args()
+
+    if args.debug:
+        logging.basicConfig(level=logging.DEBUG)
+
+    # Go through arguments and act on their values.
+    setup_target(args)
+
+    repos = parse_repository(args.repository)
+
+    if args.only_defaults:
+        def_modules = get_default_modules(args.repository, args.ignore_missing_default_deps)
+    else:
+        def_modules = set()
+
+    def_modules.add('non_modular')
+
+    if not args.skip_missing:
+        if not validate_filenames(args.repository, repos):
+            raise ValueError("Package files were missing!")
+    if args.target:
+        perform_split(repos, args, def_modules)
+        if args.create_repos:
+            create_repos(args.target, repos, def_modules, args.only_defaults)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/roles/grobisplitter/files/splitter.py b/roles/grobisplitter/files/splitter.py
index 1d0123f792..9b40d0e5d2 100755
--- a/roles/grobisplitter/files/splitter.py
+++ b/roles/grobisplitter/files/splitter.py
@@ -25,10 +25,9 @@ except ValueError:
     print("libmodulemd 2.0 is not installed..")
     sys.exit(1)
 
-# We only want to load the module metadata once. It can be reused as often as required
-_idx = None
-
-def _get_latest_streams(mymod, stream):
+# This code is from Stephen Gallagher to make my other caveman code
+# less icky.
+def _get_latest_streams (mymod, stream):
     """
     Routine takes modulemd object and a stream name.
     Finds the lates stream from that and returns that as a stream
@@ -57,54 +56,6 @@ def _get_repoinfo(directory):
         r = h.perform()
         return r.getinfo(librepo.LRR_YUM_REPO)
 
-
-def _get_modulemd(directory=None, repo_info=None):
-    """
-    Retrieve the module metadata from this repository.
-    :param directory: The path to the repository. Must contain repodata/repomd.xml and modules.yaml.
-    :param repo_info: An already-acquired repo_info structure
-    :return: A Modulemd.ModulemdIndex object containing the module metadata from this repository.
-    """
-
-    # Return the cached value
-    global _idx
-    if _idx:
-        return _idx
-
-    # If we don't have a cached value, we need either directory or repo_info
-    assert directory or repo_info
-
-    if directory:
-        directory = os.path.abspath(directory)
-        repo_info = _get_repoinfo(directory)
-
-    if 'modules' not in repo_info:
-        return None
-
-    _idx = mmd.ModuleIndex.new()
-
-    myfile=repo_info['modules']
-    if myfile.endswith(".gz"):
-        openfunc=gzip.Gzipfile
-    elif myfile.endswith(".xz"):
-        openfunc=lzma.LZMAFile
-    else:
-        print("This file type is not fixed in this hack. Please fix code. (2021-05-20)");
-        sys.exit(1)
-    with openfunc(filename=myfile, mode='r') as zipf:
-        mmdcts = zipf.read().decode('utf-8')
-        res, failures = _idx.update_from_string(mmdcts, True)
-        if len(failures) != 0:
-            raise Exception("YAML FAILURE: FAILURES: %s" % failures)
-        if not res:
-            raise Exception("YAML FAILURE: res != True")
-
-    # Ensure that every stream in the index is using v2
-    _idx.upgrade_streams(mmd.ModuleStreamVersionEnum.TWO)
-
-    return _idx
-
-
 def _get_hawkey_sack(repo_info):
     """
     A function to pull in the repository sack from hawkey.
@@ -158,7 +109,22 @@ def _parse_repository_modular(repo_info, package_sack):
     contained in.
     """
     cts = {}
-    idx = _get_modulemd(repo_info=repo_info)
+    idx = mmd.ModuleIndex()
+    myfile = repo_info['modules']
+    if myfile.endswith(".gz"):
+        openfunc=gzip.GzipFile
+    elif myfile.endswith(".xz"):
+        openfunc=lzma.LZMAFile
+    else:
+        print("This file type is not fixed in this hack. Please fix code. (2021-05-20)");
+        sys.exit(1)
+    with openfunc(filename=myfile, mode='r') as gzf:
+        mmdcts = gzf.read().decode('utf-8')
+        res, failures = idx.update_from_string(mmdcts, True)
+        if len(failures) != 0:
+            raise Exception("YAML FAILURE: FAILURES: %s" % failures)
+        if not res:
+            raise Exception("YAML FAILURE: res != True")
 
     pkgs_list = _get_filelist(package_sack)
     idx.upgrade_streams(2)
@@ -226,156 +192,108 @@ def validate_filenames(directory, repoinfo):
     return isok
 
 
-def _get_recursive_dependencies(all_deps, idx, stream, ignore_missing_deps):
-    if stream.get_NSVCA() in all_deps:
-        # We've already encountered this NSVCA, so don't go through it again
-        logging.debug('Already included {}'.format(stream.get_NSVCA()))
-        return
-
-    # Store this NSVCA/NS pair
-    local_deps = all_deps
-    local_deps.add(stream.get_NSVCA())
-
-    logging.debug("Recursive deps: {}".format(stream.get_NSVCA()))
-
-    # Loop through the dependencies for this stream
-    deps = stream.get_dependencies()
-
-    # At least one of the dependency array entries must exist in the repo
-    found_dep = False
-    for dep in deps:
-        # Within an array entry, all of the modules must be present in the
-        # index
-        found_all_modules = True
-        for modname in dep.get_runtime_modules():
-            # Ignore "platform" because it's special
-            if modname == "platform":
-                logging.debug('Skipping platform')
-                continue
-            logging.debug('Processing dependency on module {}'.format(modname))
-
-            mod = idx.get_module(modname)
-            if not mod:
-                # This module wasn't present in the index.
-                found_module = False
-                continue
-
-            # Within a module, at least one of the requested streams must be
-            # present
-            streamnames = dep.get_runtime_streams(modname)
-            found_stream = False
-            for streamname in streamnames:
-                stream_list = _get_latest_streams(mod, streamname)
-                for inner_stream in stream_list:
-                    try:
-                        _get_recursive_dependencies(
-                            local_deps, idx, inner_stream, ignore_missing_deps)
-                    except FileNotFoundError as e:
-                        # Could not find all of this stream's dependencies in
-                        # the repo
-                        continue
-                    found_stream = True
-
-            # None of the streams were found for this module
-            if not found_stream:
-                found_all_modules = False
-
-        # We've iterated through all of the modules; if it's still True, this
-        # dependency is consistent in the index
-        if found_all_modules:
-            found_dep = True
-
-    # We were unable to resolve the dependencies for any of the array entries.
-    # raise FileNotFoundError
-    if not found_dep and not ignore_missing_deps:
-        raise FileNotFoundError(
-            "Could not resolve dependencies for {}".format(
-                stream.get_NSVCA()))
-
-    all_deps.update(local_deps)
-
-
-def get_default_modules(directory, ignore_missing_deps):
+def get_default_modules(directory):
     """
     Work through the list of modules and come up with a default set of
     modules which would be the minimum to output.
     Returns a set of modules
     """
+    directory = os.path.abspath(directory)
+    repo_info = _get_repoinfo(directory)
 
-    all_deps = set()
+    provides = set()
+    contents = set()
+    if 'modules' not in repo_info:
+        return contents
+    idx = mmd.ModuleIndex()
+    myfile=repo_info['modules']
+    if myfile.endswith(".gz"):
+        openfunc=gzip.GzipFile
+    elif myfile.endswith(".xz"):
+        openfunc=lzma.LZMAFile
+    else:
+        print("This file type is not fixed in this hack. Please fix code. (2021-05-20)");
+        sys.exit(1)
+    with openfunc(filename=myfile, mode='r') as gzf:
+        mmdcts = gzf.read().decode('utf-8')
+        res, failures = idx.update_from_string(mmdcts, True)
+        if len(failures) != 0:
+            raise Exception("YAML FAILURE: FAILURES: %s" % failures)
+        if not res:
+            raise Exception("YAML FAILURE: res != True")
 
-    idx = _get_modulemd(directory)
-    if not idx:
-        return all_deps
+    idx.upgrade_streams(2)
 
-    for modname, streamname in idx.get_default_streams().items():
-        # Only the latest version of a stream is important, as that is the only one that DNF will consider in its
-        # transaction logic. We still need to handle each context individually.
+    # OK this is cave-man no-sleep programming. I expect there is a
+    # better way to do this that would be a lot better. However after
+    # a long long day.. this is what I have.
+
+    # First we oo through the default streams and create a set of
+    # provides that we can check against later.
+    for modname in idx.get_default_streams():
         mod = idx.get_module(modname)
-        stream_set = _get_latest_streams(mod, streamname)
+        # Get the default streams and loop through them.
+        stream_set = mod.get_streams_by_stream_name(
+            mod.get_defaults().get_default_stream())
         for stream in stream_set:
-            # Different contexts have different dependencies
-            try:
-                logging.debug("Processing {}".format(stream.get_NSVCA()))
-                _get_recursive_dependencies(all_deps, idx, stream, ignore_missing_deps)
-                logging.debug("----------")
-            except FileNotFoundError as e:
-                # Not all dependencies could be satisfied
-                print(
-                    "Not all dependencies for {} could be satisfied. {}. Skipping".format(
-                        stream.get_NSVCA(), e))
-                continue
-
-    logging.debug('Default module streams: {}'.format(all_deps))
-
-    return all_deps
+            tempstr = "%s:%s" % (stream.props.module_name,
+                                 stream.props.stream_name)
+            provides.add(tempstr)
 
 
-def _pad_svca(svca, target_length):
-    """
-    If the split() doesn't return all values (e.g. arch is missing), pad it
-    with `None`
-    """
-    length = len(svca)
-    svca.extend([None] * (target_length - length))
-    return svca
+    # Now go through our list and build up a content lists which will
+    # have only modules which have their dependencies met
+    tempdict = {}
+    for modname in idx.get_default_streams():
+        mod = idx.get_module(modname)
+        # Get the default streams and loop through them.
+        # This is a sorted list with the latest in it. We could drop
+        # looking at later ones here in a future version. (aka lines
+        # 237 to later)
+        stream_set = mod.get_streams_by_stream_name(
+            mod.get_defaults().get_default_stream())
+        for stream in stream_set:
+            ourname = stream.get_NSVCA()
+            tmp_name = "%s:%s" % (stream.props.module_name,
+                                 stream.props.stream_name)
+            # Get dependencies is a list of items. All of the modules
+            # seem to only have 1 item in them, but we should loop
+            # over the list anyway.
+            for deps in stream.get_dependencies():
+                isprovided = True # a variable to say this can be added.
+                for mod in deps.get_runtime_modules():
+                    tempstr=""
+                    # It does not seem easy to figure out what the
+                    # platform is so just assume we will meet it.
+                    if mod != 'platform':
+                        for stm in deps.get_runtime_streams(mod):
+                            tempstr = "%s:%s" %(mod,stm)
+                            if tempstr not in provides:
+                                # print( "%s : %s not found." % (ourname,tempstr))
+                                isprovided = False
+                    if isprovided:
+                        if tmp_name in tempdict:
+                            # print("We found %s" % tmp_name)
+                            # Get the stream version we are looking at
+                            ts1=ourname.split(":")[2]
+                            # Get the stream version we stored away
+                            ts2=tempdict[tmp_name].split(":")[2]
+                            # See if we got a newer one. We probably
+                            # don't as it is a sorted list but we
+                            # could have multiple contexts which would
+                            # change things.
+                            if ( int(ts1) > int(ts2) ):
+                                # print ("%s > %s newer for %s", ts1,ts2,ourname)
+                                tempdict[tmp_name] = ourname
+                        else:
+                            # print("We did not find %s" % tmp_name)
+                            tempdict[tmp_name] = ourname
+    # OK we finally got all our stream names we want to send back to
+    # our calling function. Read them out and add them to the set.
+    for indx in tempdict:
+        contents.add(tempdict[indx])
 
-
-def _dump_modulemd(modname, yaml_file):
-    idx = _get_modulemd()
-    assert idx
-
-    # Create a new index to hold the information about this particular
-    # module and stream
-    new_idx = mmd.ModuleIndex.new()
-
-    # Add the module streams
-    module_name, *svca = modname.split(':')
-    stream_name, version, context, arch = _pad_svca(svca, 4)
-
-    logging.debug("Dumping YAML for {}, {}, {}, {}, {}".format(
-        module_name, stream_name, version, context, arch))
-
-    mod = idx.get_module(module_name)
-    streams = mod.search_streams(stream_name, int(version), context, arch)
-
-    # This should usually be a single item, but we'll be future-compatible
-    # and account for the possibility of having multiple streams here.
-    for stream in streams:
-        new_idx.add_module_stream(stream)
-
-    # Add the module defaults
-    defs = mod.get_defaults()
-    if defs:
-        new_idx.add_defaults(defs)
-
-    # Write out the file
-    try:
-        with open(yaml_file, 'w') as output:
-            output.write(new_idx.dump_to_string())
-    except PermissionError as e:
-        logging.error("Could not write YAML to file: {}".format(e))
-        raise
+    return contents
 
 
 def perform_split(repos, args, def_modules):
@@ -393,10 +311,6 @@ def perform_split(repos, args, def_modules):
                 os.path.join(targetdir, pkgfile),
                 args.action)
 
-        # Extract the modular metadata for this module
-        if modname != 'non_modular':
-            _dump_modulemd(modname, os.path.join(targetdir, 'modules.yaml'))
-
 
 def create_repos(target, repos, def_modules, only_defaults):
     """
@@ -408,19 +322,9 @@ def create_repos(target, repos, def_modules, only_defaults):
     for modname in repos:
         if only_defaults and modname not in def_modules:
             continue
-
-        targetdir = os.path.join(target, modname)
-
         subprocess.run([
-            'createrepo_c', targetdir,
+            'createrepo_c', os.path.join(target, modname),
             '--no-database'])
-        if modname != 'non_modular':
-            subprocess.run([
-                'modifyrepo_c',
-                '--mdtype=modules',
-                os.path.join(targetdir, 'modules.yaml'),
-                os.path.join(targetdir, 'repodata')
-            ])
 
 
 def parse_args():
@@ -430,8 +334,6 @@ def parse_args():
     """
     parser = argparse.ArgumentParser(description='Split repositories up')
     parser.add_argument('repository', help='The repository to split')
-    parser.add_argument('--debug', help='Enable debug logging',
-                        action='store_true', default=False)
     parser.add_argument('--action', help='Method to create split repos files',
                         choices=('hardlink', 'symlink', 'copy'),
                         default='hardlink')
@@ -442,11 +344,6 @@ def parse_args():
                         action='store_true', default=False)
     parser.add_argument('--only-defaults', help='Only output default modules',
                         action='store_true', default=False)
-    parser.add_argument('--ignore-missing-default-deps',
-                        help='When using --only-defaults, do not skip '
-                             'default streams whose dependencies cannot be '
-                             'resolved within this repository',
-                        action='store_true', default=False)
     return parser.parse_args()
 
 
@@ -504,16 +401,13 @@ def main():
     # Determine what the arguments are and
     args = parse_args()
 
-    if args.debug:
-        logging.basicConfig(level=logging.DEBUG)
-
     # Go through arguments and act on their values.
     setup_target(args)
 
     repos = parse_repository(args.repository)
 
     if args.only_defaults:
-        def_modules = get_default_modules(args.repository, args.ignore_missing_default_deps)
+        def_modules = get_default_modules(args.repository)
     else:
         def_modules = set()