Changes: Updates splitter.py to the one in upstream modules git repository.

Removes the two stage process for building repositories Sets all the files to the same timestamp. Signed-off-by: Stephen Smoogen <ssmoogen@redhat.com>
2022-05-27 16:23:40 -04:00 · 2022-05-27 16:23:40 -04:00 · 824654b955
commit 824654b955
parent 7c77f430dd
3 changed files with 290 additions and 218 deletions
--- a/roles/grobisplitter/files/cents8-split.sh
+++ b/roles/grobisplitter/files/cents8-split.sh
@ -41,22 +41,22 @@ for ARCH in ${ARCHES}; do
    fi

    # Begin splitting the various packages into their subtrees
-    ${BINDIR}/splitter.py --action hardlink --target CS-8-001 ${HOMEDIR}/BaseOS/${ARCH}/os/ --only-defaults &> /dev/null
+    ${BINDIR}/splitter.py --action copy --target CS-8-001 ${HOMEDIR}/BaseOS/${ARCH}/os/ --only-defaults &> /dev/null
    if [ $? -ne 0 ]; then
 	echo "splitter ${ARCH} baseos failed"
 	exit
    fi
-    ${BINDIR}/splitter.py --action hardlink --target CS-8-002 ${HOMEDIR}/AppStream/${ARCH}/os/ --only-defaults &> /dev/null
+    ${BINDIR}/splitter.py --action copy --target CS-8-002 ${HOMEDIR}/AppStream/${ARCH}/os/ --only-defaults &> /dev/null
    if [ $? -ne 0 ]; then
 	echo "splitter ${ARCH} appstream failed"
 	exit
    fi
-    ${BINDIR}/splitter.py --action hardlink --target CS-8-003 ${HOMEDIR}/PowerTools/${ARCH}/os/ &> /dev/null
+    ${BINDIR}/splitter.py --action copy --target CS-8-003 ${HOMEDIR}/PowerTools/${ARCH}/os/ &> /dev/null
    if [ $? -ne 0 ]; then
 	echo "splitter ${ARCH} codeready failed"
 	exit
    fi
-    ${BINDIR}/splitter.py --action hardlink --target CS-8-004 ${HOMEDIR}/Devel/${ARCH}/os/ &> /dev/null
+    ${BINDIR}/splitter.py --action copy --target CS-8-004 ${HOMEDIR}/Devel/${ARCH}/os/ &> /dev/null
    if [ $? -ne 0 ]; then
 	echo "splitter ${ARCH} devel failed"
 	exit
@ -71,6 +71,8 @@ for ARCH in ${ARCHES}; do
    # Go into the main tree
    pushd CS-8-001

+    touch timestamp
+    find . -type f -print | xargs touch -r timestamp
    # Mergerepo didn't work so lets just createrepo in the top directory.
    createrepo_c .  &> /dev/null
    popd
@ -82,35 +84,26 @@ done

 ## Set up the builds so they are pointing to the last working version
 cd ${KOJIDIR}
-if [[ -e staged ]]; then
-    if [[ -h staged ]]; then
-	rm -f staged
+if [[ -e latest ]]; then
+    if [[ -h latest ]]; then
+	rm -f latest
    else
-	echo "Unable to remove staged. it is not a symbolic link"
+	echo "Unable to remove staged. it is not a symbolic link. Trying to move to latest_${DATE}."
+	if [[ -d latest_${DATE} ]]; then
+	    echo "latest_${DATE} exists. Exiting"
 	    exit
+	else
+	    mv latest latest_${DATE}
+	fi
    fi
 else
-    echo "No staged link found"
+    echo "No latest link found"
 fi

-echo "Linking ${DATE} to staged"
-ln -s ${DATE} staged
+echo "Linking ${DATE} to latest"
+ln -s ${DATE} latest


-for ARCH in ${ARCHES}; do
-    if [[ -d latest/ ]]; then
-	pushd latest/
-    else
-	mkdir latest/
-	pushd latest/
-    fi
-    mkdir -p ${ARCH}
-    dnf --disablerepo=\* --enablerepo=CS-8-001 --repofrompath=CS-8-001,https://infrastructure.fedoraproject.org/repo/centos/stream8-kojitarget/staged/${ARCH}/CS-8-001/ reposync -a ${ARCH} -a noarch -p ${ARCH} --newest --delete  &> /dev/null
-    if [[ $? -eq 0 ]]; then
-	cd ${ARCH}/CS-8-001
-	createrepo_c .  &> /dev/null
-    else
-	echo "Unable to run createrepo on latest/${ARCH}"
-    fi
-    popd
-done
+## Wish there was a clean way to tell koji to figure out the new repos
+## from batcave.
+
--- a/roles/grobisplitter/files/rhel8-split.sh
+++ b/roles/grobisplitter/files/rhel8-split.sh
@ -21,15 +21,7 @@ mkdir -p ${DATEDIR}
 fi

 ##
-## Remove the old latest
-rm -rf ${HOMEDIR}/koji/latest_old/
-if [ $? -ne 0 ]; then
-    echo "removal of old latest failed"
-    exit
-fi
-
-##
-## Go through each architecture and 
+## Go through each architecture and split out the trees.
 ## 
 for ARCH in ${ARCHES}; do
    # The archdir is where we daily download updates for rhel8
@ -52,17 +44,17 @@ for ARCH in ${ARCHES}; do
    fi

    # Begin splitting the various packages into their subtrees
-    ${BINDIR}/splitter.py --action hardlink --target RHEL-8-001 ${ARCHDIR}/rhel-8-for-${ARCH}-baseos-rpms/ --only-defaults &> /dev/null
+    ${BINDIR}/splitter.py --action copy --target RHEL-8-001 ${ARCHDIR}/rhel-8-for-${ARCH}-baseos-rpms/ --only-defaults &> /dev/null
    if [ $? -ne 0 ]; then
 	echo "splitter ${ARCH} baseos failed"
 	exit
    fi
-    ${BINDIR}/splitter.py --action hardlink --target RHEL-8-002 ${ARCHDIR}/rhel-8-for-${ARCH}-appstream-rpms/ --only-defaults &> /dev/null
+    ${BINDIR}/splitter.py --action copy --target RHEL-8-002 ${ARCHDIR}/rhel-8-for-${ARCH}-appstream-rpms/ --only-defaults &> /dev/null
    if [ $? -ne 0 ]; then
 	echo "splitter ${ARCH} appstream failed"
 	exit
    fi
-    ${BINDIR}/splitter.py --action hardlink --target RHEL-8-003 ${ARCHDIR}/codeready-builder-for-rhel-8-${ARCH}-rpms/ &> /dev/null
+    ${BINDIR}/splitter.py --action copy --target RHEL-8-003 ${ARCHDIR}/codeready-builder-for-rhel-8-${ARCH}-rpms/ &> /dev/null
    if [ $? -ne 0 ]; then
 	echo "splitter ${ARCH} codeready failed"
 	exit
@ -76,6 +68,8 @@ for ARCH in ${ARCHES}; do
    # Go into the main tree
    pushd RHEL-8-001

+    touch timestamp
+    find . -type f -print | xargs touch -r timestamp
    # Mergerepo didn't work so lets just createrepo in the top directory.
    createrepo_c .  &> /dev/null
    popd
@ -87,50 +81,25 @@ done

 ## Set up the builds so they are pointing to the last working version
 cd ${HOMEDIR}/koji/
-if [[ -e staged ]]; then
-    if [[ -h staged ]]; then
-	rm -f staged
+if [[ -e latest ]]; then
+    if [[ -h latest ]]; then
+	rm -f latest
    else
-	echo "Unable to remove staged. it is not a symbolic link"
+	echo "Unable to remove staged. it is not a symbolic link. Trying to move to latest_${DATE}."
+	if [[ -d latest_${DATE} ]]; then
+	    echo "latest_${DATE} exists. Exiting"
 	    exit
+	else
+	    mv latest latest_${DATE}
+	fi
    fi
 else
-    echo "No staged link found"
+    echo "No latest link found"
 fi

-####
-#### The following is overly complicated and makes thinking and
-#### debugging hard. This needs to be fixed.
+echo "Linking ${DATE} to latest"
+ln -s ${DATE} latest

-## The goal here is to take the staged code, and make a new repo with
-## just the latest amount of rpms in it. We also want to try and cut
-## the race condition down where koji sees one 'RHEL-8-001' with X.Y.Z
-## rpms and then sees it with A.B.C or some mix.
-
-# FIXME: Do we really need to make this linked staged?
-echo "Linking ${DATE} to staged"
-ln -s ${DATE} staged
-
-NEW_LATEST=latest-${DATE}
-mkdir -p ${NEW_LATEST}
-# Go through each architecture
-for ARCH in ${ARCHES}; do
-    # The following is overly complicated and needs to be cleaner.
-    pushd ${NEW_LATEST}
-    mkdir -p ${NEW_LATEST}/${ARCH}
-    dnf --disablerepo=\* --enablerepo=RHEL-8-001 --repofrompath=RHEL-8-001,https://infrastructure.fedoraproject.org/repo/rhel/rhel8/koji/staged/${ARCH}/RHEL-8-001/ reposync -a ${ARCH} -a noarch -p ${ARCH} --newest --delete  &> /dev/null
-    if [[ $? -eq 0 ]]; then
-	cd ${ARCH}/RHEL-8-001
-	createrepo_c .  &> /dev/null
-    else
-	echo "Unable to run createrepo on latest/${ARCH}"
-    fi
-    popd
-done
-
-## RACE CONDITION TIME!!!!
-mv latest latest_old
-mv ${NEW_LATEST} latest

 ## Wish there was a clean way to tell koji to figure out the new repos
 ## from batcave.
--- a/roles/grobisplitter/files/splitter.py
+++ b/roles/grobisplitter/files/splitter.py
@ -13,21 +13,22 @@ import tempfile
 import os
 import subprocess
 import sys
+import time
+import logging

 # Look for a specific version of modulemd. The 1.x series does not
 # have the tools we need.
 try:
    gi.require_version('Modulemd', '2.0')
-    from gi.repository import Modulemd
-except:
-    print("We require newer vesions of modulemd than installed..")
-    sys.exit(0)
+    from gi.repository import Modulemd as mmd
+except ValueError:
+    print("libmodulemd 2.0 is not installed..")
+    sys.exit(1)

-mmd = Modulemd
+# We only want to load the module metadata once. It can be reused as often as required
+_idx = None

-# This code is from Stephen Gallagher to make my other caveman code
-# less icky.
-def _get_latest_streams (mymod, stream):
+def _get_latest_streams(mymod, stream):
    """
    Routine takes modulemd object and a stream name.
    Finds the lates stream from that and returns that as a stream
@ -39,6 +40,7 @@ def _get_latest_streams (mymod, stream):

    return latest_streams

+
 def _get_repoinfo(directory):
    """
    A function which goes into the given directory and sets up the
@ -55,6 +57,46 @@ def _get_repoinfo(directory):
        r = h.perform()
        return r.getinfo(librepo.LRR_YUM_REPO)

+
+def _get_modulemd(directory=None, repo_info=None):
+    """
+    Retrieve the module metadata from this repository.
+    :param directory: The path to the repository. Must contain repodata/repomd.xml and modules.yaml.
+    :param repo_info: An already-acquired repo_info structure
+    :return: A Modulemd.ModulemdIndex object containing the module metadata from this repository.
+    """
+
+    # Return the cached value
+    global _idx
+    if _idx:
+        return _idx
+
+    # If we don't have a cached value, we need either directory or repo_info
+    assert directory or repo_info
+
+    if directory:
+        directory = os.path.abspath(directory)
+        repo_info = _get_repoinfo(directory)
+
+    if 'modules' not in repo_info:
+        return None
+
+    _idx = mmd.ModuleIndex.new()
+
+    with gzip.GzipFile(filename=repo_info['modules'], mode='r') as gzf:
+        mmdcts = gzf.read().decode('utf-8')
+        res, failures = _idx.update_from_string(mmdcts, True)
+        if len(failures) != 0:
+            raise Exception("YAML FAILURE: FAILURES: %s" % failures)
+        if not res:
+            raise Exception("YAML FAILURE: res != True")
+
+    # Ensure that every stream in the index is using v2
+    _idx.upgrade_streams(mmd.ModuleStreamVersionEnum.TWO)
+
+    return _idx
+
+
 def _get_hawkey_sack(repo_info):
    """
    A function to pull in the repository sack from hawkey.
@ -70,6 +112,7 @@ def _get_hawkey_sack(repo_info):

    return primary_sack

+
 def _get_filelist(package_sack):
    """
    Determine the file locations of all packages in the sack. Use the
@ -78,10 +121,12 @@ def _get_filelist(package_sack):
    """
    pkg_list = {}
    for pkg in hawkey.Query(package_sack):
-        nevr="%s-%s:%s-%s.%s"% (pkg.name,pkg.epoch,pkg.version,pkg.release,pkg.arch)
+        nevr = "%s-%s:%s-%s.%s" % (pkg.name, pkg.epoch,
+                                   pkg.version, pkg.release, pkg.arch)
        pkg_list[nevr] = pkg.location
    return pkg_list

+
 def _parse_repository_non_modular(package_sack, repo_info, modpkgset):
    """
    Simple routine to go through a repo, and figure out which packages
@ -98,28 +143,14 @@ def _parse_repository_non_modular(package_sack, repo_info, modpkgset):
        pkgs.add(pkg.location)
    return pkgs

-def _parse_repository_modular(repo_info,package_sack):
+
+def _parse_repository_modular(repo_info, package_sack):
    """
    Returns a dictionary of packages indexed by the modules they are
    contained in.
    """
    cts = {}
-    idx = mmd.ModuleIndex()
-    myfile = repo_info['modules']
-    if myfile.endswith(".gz"):
-        openfunc=gzip.GzipFile
-    elif myfile.endswith(".xz"):
-        openfunc=lzma.LZMAFile
-    else:
-        print("This file type is not fixed in this hack. Please fix code. (2021-05-20)");
-        sys.exit(1)
-    with openfunc(filename=myfile, mode='r') as gzf:
-        mmdcts = gzf.read().decode('utf-8')
-        res, failures = idx.update_from_string(mmdcts, True)
-        if len(failures) != 0:
-            raise Exception("YAML FAILURE: FAILURES: %s" % failures)
-        if not res:
-            raise Exception("YAML FAILURE: res != True")
+    idx = _get_modulemd(repo_info=repo_info)

    pkgs_list = _get_filelist(package_sack)
    idx.upgrade_streams(2)
@ -151,6 +182,7 @@ def _get_modular_pkgset(mod):

    return list(pkgs)

+
 def _perform_action(src, dst, action):
    """
    Performs either a copy, hardlink or symlink of the file src to the
@ -169,6 +201,7 @@ def _perform_action(src, dst, action):
    elif action == 'symlink':
        os.symlink(src, dst)

+
 def validate_filenames(directory, repoinfo):
    """
    Take a directory and repository information. Test each file in
@ -185,108 +218,156 @@ def validate_filenames(directory, repoinfo):
    return isok


-def get_default_modules(directory):
+def _get_recursive_dependencies(all_deps, idx, stream, ignore_missing_deps):
+    if stream.get_NSVCA() in all_deps:
+        # We've already encountered this NSVCA, so don't go through it again
+        logging.debug('Already included {}'.format(stream.get_NSVCA()))
+        return
+
+    # Store this NSVCA/NS pair
+    local_deps = all_deps
+    local_deps.add(stream.get_NSVCA())
+
+    logging.debug("Recursive deps: {}".format(stream.get_NSVCA()))
+
+    # Loop through the dependencies for this stream
+    deps = stream.get_dependencies()
+
+    # At least one of the dependency array entries must exist in the repo
+    found_dep = False
+    for dep in deps:
+        # Within an array entry, all of the modules must be present in the
+        # index
+        found_all_modules = True
+        for modname in dep.get_runtime_modules():
+            # Ignore "platform" because it's special
+            if modname == "platform":
+                logging.debug('Skipping platform')
+                continue
+            logging.debug('Processing dependency on module {}'.format(modname))
+
+            mod = idx.get_module(modname)
+            if not mod:
+                # This module wasn't present in the index.
+                found_module = False
+                continue
+
+            # Within a module, at least one of the requested streams must be
+            # present
+            streamnames = dep.get_runtime_streams(modname)
+            found_stream = False
+            for streamname in streamnames:
+                stream_list = _get_latest_streams(mod, streamname)
+                for inner_stream in stream_list:
+                    try:
+                        _get_recursive_dependencies(
+                            local_deps, idx, inner_stream, ignore_missing_deps)
+                    except FileNotFoundError as e:
+                        # Could not find all of this stream's dependencies in
+                        # the repo
+                        continue
+                    found_stream = True
+
+            # None of the streams were found for this module
+            if not found_stream:
+                found_all_modules = False
+
+        # We've iterated through all of the modules; if it's still True, this
+        # dependency is consistent in the index
+        if found_all_modules:
+            found_dep = True
+
+    # We were unable to resolve the dependencies for any of the array entries.
+    # raise FileNotFoundError
+    if not found_dep and not ignore_missing_deps:
+        raise FileNotFoundError(
+            "Could not resolve dependencies for {}".format(
+                stream.get_NSVCA()))
+
+    all_deps.update(local_deps)
+
+
+def get_default_modules(directory, ignore_missing_deps):
    """
    Work through the list of modules and come up with a default set of
    modules which would be the minimum to output.
    Returns a set of modules
    """
-    directory = os.path.abspath(directory)
-    repo_info = _get_repoinfo(directory)

-    provides = set()
-    contents = set()
-    if 'modules' not in repo_info:
-        return contents
-    idx = mmd.ModuleIndex()
-    myfile=repo_info['modules']
-    if myfile.endswith(".gz"):
-        openfunc=gzip.GzipFile
-    elif myfile.endswith(".xz"):
-        openfunc=lzma.LZMAFile
-    else:
-        print("This file type is not fixed in this hack. Please fix code. (2021-05-20)");
-        sys.exit(1)
-    with openfunc(filename=myfile, mode='r') as gzf:
-        mmdcts = gzf.read().decode('utf-8')
-        res, failures = idx.update_from_string(mmdcts, True)
-        if len(failures) != 0:
-            raise Exception("YAML FAILURE: FAILURES: %s" % failures)
-        if not res:
-            raise Exception("YAML FAILURE: res != True")
+    all_deps = set()

-    idx.upgrade_streams(2)
+    idx = _get_modulemd(directory)
+    if not idx:
+        return all_deps

-    # OK this is cave-man no-sleep programming. I expect there is a
-    # better way to do this that would be a lot better. However after
-    # a long long day.. this is what I have.
-
-    # First we oo through the default streams and create a set of
-    # provides that we can check against later.
-    for modname in idx.get_default_streams():
+    for modname, streamname in idx.get_default_streams().items():
+        # Only the latest version of a stream is important, as that is the only one that DNF will consider in its
+        # transaction logic. We still need to handle each context individually.
        mod = idx.get_module(modname)
-        # Get the default streams and loop through them.
-        stream_set = mod.get_streams_by_stream_name(
-            mod.get_defaults().get_default_stream())
+        stream_set = _get_latest_streams(mod, streamname)
        for stream in stream_set:
-            tempstr = "%s:%s" % (stream.props.module_name,
-                                 stream.props.stream_name)
-            provides.add(tempstr)
+            # Different contexts have different dependencies
+            try:
+                logging.debug("Processing {}".format(stream.get_NSVCA()))
+                _get_recursive_dependencies(all_deps, idx, stream, ignore_missing_deps)
+                logging.debug("----------")
+            except FileNotFoundError as e:
+                # Not all dependencies could be satisfied
+                print(
+                    "Not all dependencies for {} could be satisfied. {}. Skipping".format(
+                        stream.get_NSVCA(), e))
+                continue
+
+    logging.debug('Default module streams: {}'.format(all_deps))
+
+    return all_deps


-    # Now go through our list and build up a content lists which will
-    # have only modules which have their dependencies met
-    tempdict = {}
-    for modname in idx.get_default_streams():
-        mod = idx.get_module(modname)
-        # Get the default streams and loop through them.
-        # This is a sorted list with the latest in it. We could drop
-        # looking at later ones here in a future version. (aka lines
-        # 237 to later)
-        stream_set = mod.get_streams_by_stream_name(
-            mod.get_defaults().get_default_stream())
-        for stream in stream_set:
-            ourname = stream.get_NSVCA()
-            tmp_name = "%s:%s" % (stream.props.module_name,
-                                 stream.props.stream_name)
-            # Get dependencies is a list of items. All of the modules
-            # seem to only have 1 item in them, but we should loop
-            # over the list anyway.
-            for deps in stream.get_dependencies():
-                isprovided = True # a variable to say this can be added.
-                for mod in deps.get_runtime_modules():
-                    tempstr=""
-                    # It does not seem easy to figure out what the
-                    # platform is so just assume we will meet it.
-                    if mod != 'platform':
-                        for stm in deps.get_runtime_streams(mod):
-                            tempstr = "%s:%s" %(mod,stm)
-                            if tempstr not in provides:
-                                # print( "%s : %s not found." % (ourname,tempstr))
-                                isprovided = False
-                    if isprovided:
-                        if tmp_name in tempdict:
-                            # print("We found %s" % tmp_name)
-                            # Get the stream version we are looking at
-                            ts1=ourname.split(":")[2]
-                            # Get the stream version we stored away
-                            ts2=tempdict[tmp_name].split(":")[2]
-                            # See if we got a newer one. We probably
-                            # don't as it is a sorted list but we
-                            # could have multiple contexts which would
-                            # change things.
-                            if ( int(ts1) > int(ts2) ):
-                                # print ("%s > %s newer for %s", ts1,ts2,ourname)
-                                tempdict[tmp_name] = ourname
-                        else:
-                            # print("We did not find %s" % tmp_name)
-                            tempdict[tmp_name] = ourname
-    # OK we finally got all our stream names we want to send back to
-    # our calling function. Read them out and add them to the set.
-    for indx in tempdict:
-        contents.add(tempdict[indx])
+def _pad_svca(svca, target_length):
+    """
+    If the split() doesn't return all values (e.g. arch is missing), pad it
+    with `None`
+    """
+    length = len(svca)
+    svca.extend([None] * (target_length - length))
+    return svca

-    return contents
+
+def _dump_modulemd(modname, yaml_file):
+    idx = _get_modulemd()
+    assert idx
+
+    # Create a new index to hold the information about this particular
+    # module and stream
+    new_idx = mmd.ModuleIndex.new()
+
+    # Add the module streams
+    module_name, *svca = modname.split(':')
+    stream_name, version, context, arch = _pad_svca(svca, 4)
+
+    logging.debug("Dumping YAML for {}, {}, {}, {}, {}".format(
+        module_name, stream_name, version, context, arch))
+
+    mod = idx.get_module(module_name)
+    streams = mod.search_streams(stream_name, int(version), context, arch)
+
+    # This should usually be a single item, but we'll be future-compatible
+    # and account for the possibility of having multiple streams here.
+    for stream in streams:
+        new_idx.add_module_stream(stream)
+
+    # Add the module defaults
+    defs = mod.get_defaults()
+    if defs:
+        new_idx.add_defaults(defs)
+
+    # Write out the file
+    try:
+        with open(yaml_file, 'w') as output:
+            output.write(new_idx.dump_to_string())
+    except PermissionError as e:
+        logging.error("Could not write YAML to file: {}".format(e))
+        raise


 def perform_split(repos, args, def_modules):
@ -304,19 +385,34 @@ def perform_split(repos, args, def_modules):
                os.path.join(targetdir, pkgfile),
                args.action)

+        # Extract the modular metadata for this module
+        if modname != 'non_modular':
+            _dump_modulemd(modname, os.path.join(targetdir, 'modules.yaml'))

-def create_repos(target, repos,def_modules, only_defaults):
+
+def create_repos(target, repos, def_modules, only_defaults):
    """
    Routine to create repositories. Input is target directory and a
    list of repositories.
    Returns None
    """
+
    for modname in repos:
        if only_defaults and modname not in def_modules:
            continue
+
+        targetdir = os.path.join(target, modname)
+
        subprocess.run([
-            'createrepo_c', os.path.join(target, modname),
+            'createrepo_c', targetdir,
            '--no-database'])
+        if modname != 'non_modular':
+            subprocess.run([
+                'modifyrepo_c',
+                '--mdtype=modules',
+                os.path.join(targetdir, 'modules.yaml'),
+                os.path.join(targetdir, 'repodata')
+            ])


 def parse_args():
@ -326,6 +422,8 @@ def parse_args():
    """
    parser = argparse.ArgumentParser(description='Split repositories up')
    parser.add_argument('repository', help='The repository to split')
+    parser.add_argument('--debug', help='Enable debug logging',
+                        action='store_true', default=False)
    parser.add_argument('--action', help='Method to create split repos files',
                        choices=('hardlink', 'symlink', 'copy'),
                        default='hardlink')
@ -336,6 +434,11 @@ def parse_args():
                        action='store_true', default=False)
    parser.add_argument('--only-defaults', help='Only output default modules',
                        action='store_true', default=False)
+    parser.add_argument('--ignore-missing-default-deps',
+                        help='When using --only-defaults, do not skip '
+                             'default streams whose dependencies cannot be '
+                             'resolved within this repository',
+                        action='store_true', default=False)
    return parser.parse_args()


@ -354,6 +457,7 @@ def setup_target(args):
        else:
            os.mkdir(args.target)

+
 def parse_repository(directory):
    """
    Parse a specific directory, returning a dict with keys module NSVC's and
@ -372,34 +476,39 @@ def parse_repository(directory):
    # everything in a known sack (aka non_modular).

    if 'modules' in repo_info:
-        mod = _parse_repository_modular(repo_info,package_sack)
+        mod = _parse_repository_modular(repo_info, package_sack)
        modpkgset = _get_modular_pkgset(mod)
    else:
        mod = dict()
        modpkgset = set()

-    non_modular = _parse_repository_non_modular(package_sack,repo_info, 
+    non_modular = _parse_repository_non_modular(package_sack, repo_info,
                                                modpkgset)
    mod['non_modular'] = non_modular

-    ## We should probably go through our default modules here and
-    ## remove them from our mod. This would cut down some code paths.
+    # We should probably go through our default modules here and
+    # remove them from our mod. This would cut down some code paths.

    return mod

+
 def main():
    # Determine what the arguments are and
    args = parse_args()

+    if args.debug:
+        logging.basicConfig(level=logging.DEBUG)
+
    # Go through arguments and act on their values.
    setup_target(args)

    repos = parse_repository(args.repository)

    if args.only_defaults:
-        def_modules = get_default_modules(args.repository)
+        def_modules = get_default_modules(args.repository, args.ignore_missing_default_deps)
    else:
        def_modules = set()
+
    def_modules.add('non_modular')

    if not args.skip_missing:
@ -408,7 +517,8 @@ def main():
    if args.target:
        perform_split(repos, args, def_modules)
        if args.create_repos:
-            create_repos(args.target, repos,def_modules,args.only_defaults)
+            create_repos(args.target, repos, def_modules, args.only_defaults)
+

 if __name__ == '__main__':
    main()