Reverting part of previous update to splitter.py from Fedora

Modularity tree. That code is not functional and is causing the wrong packages to be brought into place. Signed-off-by: Stephen Smoogen <ssmoogen@redhat.com>
2020-12-09 09:18:48 -05:00 · 2020-12-09 09:18:48 -05:00 · 741bf4d49a
commit 741bf4d49a
parent fcf6b1a34d
1 changed files with 123 additions and 260 deletions
--- a/roles/grobisplitter/files/splitter.py
+++ b/roles/grobisplitter/files/splitter.py
@ -12,33 +12,32 @@ import tempfile
 import os
 import subprocess
 import sys
 import logging
 # Look for a specific version of modulemd. The 1.x series does not
 # have the tools we need.
 try:
    gi.require_version('Modulemd', '2.0')
-    from gi.repository import Modulemd as mmd
+    from gi.repository import Modulemd
-except ValueError:
+except:
-    print("libmodulemd 2.0 is not installed..")
+    print("We require newer vesions of modulemd than installed..")
-    sys.exit(1)
+    sys.exit(0)
 mmd = Modulemd
-# We only want to load the module metadata once. It can be reused as often as required
+# This code is from Stephen Gallagher to make my other caveman code
-_idx = None
+# less icky.
-
+def _get_latest_streams (mymod, stream):
 def _get_latest_streams(mymod, stream):
    """
    Routine takes modulemd object and a stream name.
    Finds the lates stream from that and returns that as a stream
-    object.
+    object. 
    """
    all_streams = mymod.search_streams(stream, 0)
    latest_streams = mymod.search_streams(stream,
-                                          all_streams[0].props.version)
+                                          all_streams[0].props.version) 
-
+    
    return latest_streams
-
+    
 def _get_repoinfo(directory):
    """
    A function which goes into the given directory and sets up the
@ -55,46 +54,6 @@ def _get_repoinfo(directory):
        r = h.perform()
        return r.getinfo(librepo.LRR_YUM_REPO)
 def _get_modulemd(directory=None, repo_info=None):
    """
    Retrieve the module metadata from this repository.
    :param directory: The path to the repository. Must contain repodata/repomd.xml and modules.yaml.
    :param repo_info: An already-acquired repo_info structure
    :return: A Modulemd.ModulemdIndex object containing the module metadata from this repository.
    """
    # Return the cached value
    global _idx
    if _idx:
        return _idx
    # If we don't have a cached value, we need either directory or repo_info
    assert directory or repo_info
    if directory:
        directory = os.path.abspath(directory)
        repo_info = _get_repoinfo(directory)
    if 'modules' not in repo_info:
        return None
    _idx = mmd.ModuleIndex.new()
    with gzip.GzipFile(filename=repo_info['modules'], mode='r') as gzf:
        mmdcts = gzf.read().decode('utf-8')
        res, failures = _idx.update_from_string(mmdcts, True)
        if len(failures) != 0:
            raise Exception("YAML FAILURE: FAILURES: %s" % failures)
        if not res:
            raise Exception("YAML FAILURE: res != True")
    # Ensure that every stream in the index is using v2
    _idx.upgrade_streams(mmd.ModuleStreamVersionEnum.TWO)
    return _idx
 def _get_hawkey_sack(repo_info):
    """
    A function to pull in the repository sack from hawkey.
@ -107,10 +66,9 @@ def _get_hawkey_sack(repo_info):
    primary_sack = hawkey.Sack()
    primary_sack.load_repo(hk_repo, build_cache=False)
-
+    
    return primary_sack
 def _get_filelist(package_sack):
    """
    Determine the file locations of all packages in the sack. Use the
@ -119,12 +77,10 @@ def _get_filelist(package_sack):
    """
    pkg_list = {}
    for pkg in hawkey.Query(package_sack):
-        nevr = "%s-%s:%s-%s.%s" % (pkg.name, pkg.epoch,
+        nevr="%s-%s:%s-%s.%s"% (pkg.name,pkg.epoch,pkg.version,pkg.release,pkg.arch)
                                   pkg.version, pkg.release, pkg.arch)
        pkg_list[nevr] = pkg.location
    return pkg_list
 def _parse_repository_non_modular(package_sack, repo_info, modpkgset):
    """
    Simple routine to go through a repo, and figure out which packages
@ -141,14 +97,20 @@ def _parse_repository_non_modular(package_sack, repo_info, modpkgset):
        pkgs.add(pkg.location)
    return pkgs
-
+def _parse_repository_modular(repo_info,package_sack):
 def _parse_repository_modular(repo_info, package_sack):
    """
    Returns a dictionary of packages indexed by the modules they are
    contained in.
    """
    cts = {}
-    idx = _get_modulemd(repo_info=repo_info)
+    idx = mmd.ModuleIndex()
    with gzip.GzipFile(filename=repo_info['modules'], mode='r') as gzf:
        mmdcts = gzf.read().decode('utf-8')
        res, failures = idx.update_from_string(mmdcts, True)
        if len(failures) != 0:
            raise Exception("YAML FAILURE: FAILURES: %s" % failures)
        if not res:
            raise Exception("YAML FAILURE: res != True")
    pkgs_list = _get_filelist(package_sack)
    idx.upgrade_streams(2)
@ -162,14 +124,14 @@ def _parse_repository_modular(repo_info, package_sack):
                else:
                    continue
            cts[stream.get_NSVCA()] = templ
-
+                
    return cts
 def _get_modular_pkgset(mod):
    """
    Takes a module and goes through the moduleset to determine which
-    packages are inside it.
+    packages are inside it. 
    Returns a list of packages
    """
    pkgs = set()
@ -180,7 +142,6 @@ def _get_modular_pkgset(mod):
    return list(pkgs)
 def _perform_action(src, dst, action):
    """
    Performs either a copy, hardlink or symlink of the file src to the
@ -199,7 +160,6 @@ def _perform_action(src, dst, action):
    elif action == 'symlink':
        os.symlink(src, dst)
 def validate_filenames(directory, repoinfo):
    """
    Take a directory and repository information. Test each file in
@ -216,176 +176,107 @@ def validate_filenames(directory, repoinfo):
    return isok
-def _get_recursive_dependencies(all_deps, idx, stream, ignore_missing_deps):
+def get_default_modules(directory):
    if stream.get_NSVCA() in all_deps:
        # We've already encountered this NSVCA, so don't go through it again
        logging.debug('Already included {}'.format(stream.get_NSVCA()))
        return
    # Store this NSVCA/NS pair
    local_deps = all_deps
    local_deps.add(stream.get_NSVCA())
    logging.debug("Recursive deps: {}".format(stream.get_NSVCA()))
    # Loop through the dependencies for this stream
    deps = stream.get_dependencies()
    # At least one of the dependency array entries must exist in the repo
    found_dep = False
    for dep in deps:
        # Within an array entry, all of the modules must be present in the
        # index
        found_all_modules = True
        for modname in dep.get_runtime_modules():
            # Ignore "platform" because it's special
            if modname == "platform":
                logging.debug('Skipping platform')
                continue
            logging.debug('Processing dependency on module {}'.format(modname))
            mod = idx.get_module(modname)
            if not mod:
                # This module wasn't present in the index.
                found_module = False
                continue
            # Within a module, at least one of the requested streams must be
            # present
            streamnames = dep.get_runtime_streams(modname)
            found_stream = False
            for streamname in streamnames:
                stream_list = _get_latest_streams(mod, streamname)
                for inner_stream in stream_list:
                    try:
                        _get_recursive_dependencies(
                            local_deps, idx, inner_stream, ignore_missing_deps)
                    except FileNotFoundError as e:
                        # Could not find all of this stream's dependencies in
                        # the repo
                        continue
                    found_stream = True
            # None of the streams were found for this module
            if not found_stream:
                found_all_modules = False
        # We've iterated through all of the modules; if it's still True, this
        # dependency is consistent in the index
        if found_all_modules:
            found_dep = True
    # We were unable to resolve the dependencies for any of the array entries.
    # raise FileNotFoundError
    if not found_dep and not ignore_missing_deps:
        raise FileNotFoundError(
            "Could not resolve dependencies for {}".format(
                stream.get_NSVCA()))
    all_deps.update(local_deps)
 def get_default_modules(directory, ignore_missing_deps):
    """
    Work through the list of modules and come up with a default set of
-    modules which would be the minimum to output.
+    modules which would be the minimum to output. 
-    Returns a set of modules
+    Returns a set of modules 
    """
    directory = os.path.abspath(directory)
    repo_info = _get_repoinfo(directory)
-    all_deps = set()
+    provides = set()
    contents = set()
    if 'modules' not in repo_info:
        return contents
    idx = mmd.ModuleIndex()
    with gzip.GzipFile(filename=repo_info['modules'], mode='r') as gzf:
        mmdcts = gzf.read().decode('utf-8')
        res, failures = idx.update_from_string(mmdcts, True)
        if len(failures) != 0:
            raise Exception("YAML FAILURE: FAILURES: %s" % failures)
        if not res:
            raise Exception("YAML FAILURE: res != True")
-    idx = _get_modulemd(directory)
+    idx.upgrade_streams(2)
    if not idx:
        return all_deps
-    for modname, streamname in idx.get_default_streams().items():
+    # OK this is cave-man no-sleep programming. I expect there is a
-        # Only the latest version of a stream is important, as that is the only one that DNF will consider in its
+    # better way to do this that would be a lot better. However after
-        # transaction logic. We still need to handle each context individually.
+    # a long long day.. this is what I have.
    # First we oo through the default streams and create a set of
    # provides that we can check against later.
    for modname in idx.get_default_streams():
        mod = idx.get_module(modname)
-        stream_set = _get_latest_streams(mod, streamname)
+        # Get the default streams and loop through them.
        stream_set = mod.get_streams_by_stream_name(
            mod.get_defaults().get_default_stream())
        for stream in stream_set:
-            # Different contexts have different dependencies
+            tempstr = "%s:%s" % (stream.props.module_name,
-            try:
+                                 stream.props.stream_name)
-                logging.debug("Processing {}".format(stream.get_NSVCA()))
+            provides.add(tempstr)
                _get_recursive_dependencies(all_deps, idx, stream, ignore_missing_deps)
                logging.debug("----------")
            except FileNotFoundError as e:
                # Not all dependencies could be satisfied
                print(
                    "Not all dependencies for {} could be satisfied. {}. Skipping".format(
                        stream.get_NSVCA(), e))
                continue
    logging.debug('Default module streams: {}'.format(all_deps))
    return all_deps
-def _pad_svca(svca, target_length):
+    # Now go through our list and build up a content lists which will
-    """
+    # have only modules which have their dependencies met
-    If the split() doesn't return all values (e.g. arch is missing), pad it
+    tempdict = {}
-    with `None`
+    for modname in idx.get_default_streams():
-    """
+        mod = idx.get_module(modname)
-    length = len(svca)
+        # Get the default streams and loop through them.
-    svca.extend([None] * (target_length - length))
+        # This is a sorted list with the latest in it. We could drop
-    return svca
+        # looking at later ones here in a future version. (aka lines
        # 237 to later)
        stream_set = mod.get_streams_by_stream_name(
            mod.get_defaults().get_default_stream())
        for stream in stream_set:
            ourname = stream.get_NSVCA()
            tmp_name = "%s:%s" % (stream.props.module_name,
                                 stream.props.stream_name)
            # Get dependencies is a list of items. All of the modules
            # seem to only have 1 item in them, but we should loop
            # over the list anyway.
            for deps in stream.get_dependencies():
                isprovided = True # a variable to say this can be added.
                for mod in deps.get_runtime_modules():
                    tempstr=""
                    # It does not seem easy to figure out what the
                    # platform is so just assume we will meet it.
                    if mod != 'platform':
                        for stm in deps.get_runtime_streams(mod):
                            tempstr = "%s:%s" %(mod,stm)
                            if tempstr not in provides:
                                # print( "%s : %s not found." % (ourname,tempstr))
                                isprovided = False
                    if isprovided:
                        if tmp_name in tempdict:
                            # print("We found %s" % tmp_name)
                            # Get the stream version we are looking at
                            ts1=ourname.split(":")[2]
                            # Get the stream version we stored away
                            ts2=tempdict[tmp_name].split(":")[2]
                            # See if we got a newer one. We probably
                            # don't as it is a sorted list but we
                            # could have multiple contexts which would
                            # change things.
                            if ( int(ts1) > int(ts2) ):
                                # print ("%s > %s newer for %s", ts1,ts2,ourname)
                                tempdict[tmp_name] = ourname
                        else:
                            # print("We did not find %s" % tmp_name)
                            tempdict[tmp_name] = ourname
    # OK we finally got all our stream names we want to send back to
    # our calling function. Read them out and add them to the set.
    for indx in tempdict:
        contents.add(tempdict[indx])
-
+    return contents
 def _dump_modulemd(modname, yaml_file):
    idx = _get_modulemd()
    assert idx
    # Create a new index to hold the information about this particular
    # module and stream
    new_idx = mmd.ModuleIndex.new()
    # Add the module streams
    module_name, *svca = modname.split(':')
    stream_name, version, context, arch = _pad_svca(svca, 4)
    logging.debug("Dumping YAML for {}, {}, {}, {}, {}".format(
        module_name, stream_name, version, context, arch))
    mod = idx.get_module(module_name)
    streams = mod.search_streams(stream_name, int(version), context, arch)
    # This should usually be a single item, but we'll be future-compatible
    # and account for the possibility of having multiple streams here.
    for stream in streams:
        new_idx.add_module_stream(stream)
    # Add the module defaults
    defs = mod.get_defaults()
    if defs:
        new_idx.add_defaults(defs)
    ## This is in the upstream but does not work in 2.9. commented out
    # # libmodulemd doesn't currently expose the get_translation()
    # # function, but that will be added in 2.8.0
    # try:
    #     # Add the translation object
    #     translation = mod.get_translation()
    #     if translation:
    #         new_idx.add_translation(translation)
    # except AttributeError as e:
    #     # This version of libmodulemd does not yet support this function.
    #     # Just ignore it.
    #     pass
    # Write out the file
    try:
        with open(yaml_file, 'w') as output:
            output.write(new_idx.dump_to_string())
    except PermissionError as e:
        logging.error("Could not write YAML to file: {}".format(e))
        raise
 def perform_split(repos, args, def_modules):
    for modname in repos:
        if args.only_defaults and modname not in def_modules:
            continue
-
+        
        targetdir = os.path.join(args.target, modname)
        os.mkdir(targetdir)
@ -396,12 +287,8 @@ def perform_split(repos, args, def_modules):
                os.path.join(targetdir, pkgfile),
                args.action)
        # Extract the modular metadata for this module
        if modname != 'non_modular':
            _dump_modulemd(modname, os.path.join(targetdir, 'modules.yaml'))
-
+def create_repos(target, repos,def_modules, only_defaults):
 def create_repos(target, repos, def_modules, only_defaults):
    """
    Routine to create repositories. Input is target directory and a
    list of repositories.
@ -410,19 +297,9 @@ def create_repos(target, repos, def_modules, only_defaults):
    for modname in repos:
        if only_defaults and modname not in def_modules:
            continue
        targetdir = os.path.join(target, modname)
        subprocess.run([
-            'createrepo_c', targetdir,
+            'createrepo_c', os.path.join(target, modname),
            '--no-database'])
        if modname != 'non_modular':
            subprocess.run([
                'modifyrepo_c',
                '--mdtype=modules',
                os.path.join(targetdir, 'modules.yaml'),
                os.path.join(targetdir, 'repodata')
            ])
 def parse_args():
@ -432,8 +309,6 @@ def parse_args():
    """
    parser = argparse.ArgumentParser(description='Split repositories up')
    parser.add_argument('repository', help='The repository to split')
    parser.add_argument('--debug', help='Enable debug logging',
                        action='store_true', default=False)
    parser.add_argument('--action', help='Method to create split repos files',
                        choices=('hardlink', 'symlink', 'copy'),
                        default='hardlink')
@ -444,11 +319,6 @@ def parse_args():
                        action='store_true', default=False)
    parser.add_argument('--only-defaults', help='Only output default modules',
                        action='store_true', default=False)
    parser.add_argument('--ignore-missing-default-deps',
                        help='When using --only-defaults, do not skip '
                             'default streams whose dependencies cannot be '
                             'resolved within this repository',
                        action='store_true', default=False)
    return parser.parse_args()
@ -467,7 +337,6 @@ def setup_target(args):
        else:
            os.mkdir(args.target)
 def parse_repository(directory):
    """
    Parse a specific directory, returning a dict with keys module NSVC's and
@ -484,51 +353,45 @@ def parse_repository(directory):
    # If we have a repository with no modules we do not want our
    # script to error out but just remake the repository with
    # everything in a known sack (aka non_modular).
-
+     
    if 'modules' in repo_info:
-        mod = _parse_repository_modular(repo_info, package_sack)
+        mod = _parse_repository_modular(repo_info,package_sack)
        modpkgset = _get_modular_pkgset(mod)
    else:
        mod = dict()
        modpkgset = set()
-    non_modular = _parse_repository_non_modular(package_sack, repo_info,
+    non_modular = _parse_repository_non_modular(package_sack,repo_info, 
-                                                modpkgset)
+                                  modpkgset) 
    mod['non_modular'] = non_modular
-    # We should probably go through our default modules here and
+    ## We should probably go through our default modules here and
-    # remove them from our mod. This would cut down some code paths.
+    ## remove them from our mod. This would cut down some code paths.
    return mod
 def main():
-    # Determine what the arguments are and
+    # Determine what the arguments are and 
    args = parse_args()
    if args.debug:
        logging.basicConfig(level=logging.DEBUG)
    # Go through arguments and act on their values.
    setup_target(args)
    repos = parse_repository(args.repository)
    if args.only_defaults:
-        def_modules = get_default_modules(args.repository, args.ignore_missing_default_deps)
+        def_modules = get_default_modules(args.repository)
    else:
        def_modules = set()
-
+    def_modules.add('non_modular')        
-    def_modules.add('non_modular')
+    
    if not args.skip_missing:
        if not validate_filenames(args.repository, repos):
            raise ValueError("Package files were missing!")
    if args.target:
        perform_split(repos, args, def_modules)
        if args.create_repos:
-            create_repos(args.target, repos, def_modules, args.only_defaults)
+            create_repos(args.target, repos,def_modules,args.only_defaults)
 if __name__ == '__main__':
    main()