[grobisplitter] first attempt at adding configs for system

2019-06-01 17:32:14 +00:00 · 2019-06-01 17:32:14 +00:00 · 6f69b370f4
commit 6f69b370f4
parent d8f5db1213
6 changed files with 463 additions and 26 deletions
--- a/playbooks/hosts/grobisplitter01.phx2.fedoraproject.org.yml
+++ b/playbooks/hosts/grobisplitter01.phx2.fedoraproject.org.yml
@ -22,6 +22,9 @@
  - fas_client
  - collectd/base
  - sudo
+  - { role: nfs/client, mnt_dir: '/srv/web/pub',  nfs_src_dir: 'fedora_ftp/fedora.redhat.com/pub' }
+  - { role: nfs/client, mnt_dir: '/mnt/fedora/app',  nfs_src_dir: 'fedora_app/app' }
+  - grobisplitter

  tasks:
  - import_tasks: "{{ tasks_path }}/2fa_client.yml"
@ -29,27 +32,3 @@

  handlers:
  - import_tasks: "{{ handlers_path }}/restart_services.yml"
-
- name: set up packages
-  hosts: grobisplitter
-  user: root
-  gather_facts: True
-  vars_files:
-   - /srv/web/infra/ansible/vars/global.yml
-   - "/srv/private/ansible/vars.yml"
-   - /srv/web/infra/ansible/vars/{{ ansible_distribution }}.yml
-  handlers:
-  - import_tasks: "{{ handlers_path }}/restart_services.yml"
-
-
-  tasks:
-   - name: install needed packages
-     package: name={{ item }} state=present
-     with_items:
-       - rsync
-       - net-tools
-       - libmodulemd
-       - librepo
-       - python3-librepo
-       - python3-repomd
-       - createrepo_c
--- a/roles/grobisplitter/README.txt
+++ b/roles/grobisplitter/README.txt
@ -0,0 +1,12 @@
+The Current Master Git Repository for the grobisplitter program is
+https://github.com/smooge/GrobiSplitter.git to be moved under a
+Community Infrastructure repository later. The program depends upon
+python3 and other programs.
+
+gobject-introspection
+libmodulemd-2.5.0
+libmodulemd1-1.8.11
+librepo
+python3-gobject-base
+python3-hawkey
+python3-librepo
--- a/roles/grobisplitter/files/rhel8-split.sh
+++ b/roles/grobisplitter/files/rhel8-split.sh
@ -0,0 +1,62 @@
+#!/bin/bash
+HOMEDIR=/mnt/fedora/app/fi-repo/rhel/rhel8/
+BINDIR=/usr/local/bin
+
+ARCHES="aarch64 ppc64le s390x x86_64"
+DATE=$(date -Ih | sed 's/+.*//')
+
+if [ -d ${HOMEDIR}/${DATE} ]; then
+    echo "Directory already exists. Please remove or fix"
+    exit
+fi
+
+for ARCH in ARCHES; do
+    # The archdir is where we daily download updates for rhel8
+    ARCHDIR=${HOMEDIR}/${ARCHES}
+    if [ ! -d ${ARCHDIR} ]; then
+	echo "Unable to find ${ARCHDIR}"
+	exit
+    fi
+
+    # We consolidate all of the default repositories and remerge them
+    # in a daily tree. This allows us to point koji at a particular
+    # day if we have specific build concerns.
+    OUTDIR=${HOMEDIR}/koji/${DATE}/${ARCHES}
+    mkdir -vp ${OUTDIR}
+    if [ ! -d ${ARCHDIR} ]; then
+	echo "Unable to find ${ARCHDIR}"
+	exit
+    else
+	cd ${OUTDIR}
+    fi
+
+    # Begin splitting the various packages into their subtrees
+    ${BINDIR}/splitter.py --action hardlink --target RHEL-8-001 --create-repos ${ARCHDIR}/rhel-8-for-${ARCH}-baseos-rpms/ --only-defaults
+    ${BINDIR}/splitter.py --action hardlink --target RHEL-8-002 --create-repos ${ARCHDIR}/rhel-8-for-${ARCH}-appstream-rpms/ --only-defaults
+    ${BINDIR}/splitter.py --action hardlink --target RHEL-8-003 --create-repos ${ARCHDIR}/codeready-builder-for-rhel-8-${ARCH}-rpms/
+
+    # Copy the various module trees into RHEL-8-001 where we want them
+    # to work.
+    cp -avlr RHEL-8-002/* RHEL-8-001
+    cp -avlr RHEL-8-003/* RHEL-8-001
+    # Go into the main tree
+    pushd RHEL-8-001
+    # Go into its non_modular subtree and update its repo as its data
+    # is based off of the first split
+    pushd non_modular
+    createrepo -v .
+    popd
+
+    # Build out the repos we have and merge them together with
+    # mergerepo -k
+    repos=""
+    for i in $( ls -1 ); do
+	repos+="-r $i "
+    done
+    mergerepo_c -v -k ${repos}
+    popd
+
+    # Cleanup the trash 
+    rm -rf RHEL-8-002 RHEL-8-003
+#loop to the next
+done
--- a/roles/grobisplitter/files/splitter.py
+++ b/roles/grobisplitter/files/splitter.py
@ -0,0 +1,357 @@
+#!/bin/python3
+
+# Import libraries needed for application to work
+
+import argparse
+import shutil
+import gi
+import gzip
+import librepo
+import hawkey
+import tempfile
+import os
+import subprocess
+import sys
+
+# Look for a specific version of modulemd. The 1.x series does not
+# have the tools we need.
+try:
+    gi.require_version('Modulemd', '2.0')
+    from gi.repository import Modulemd
+except:
+    print("We require newer vesions of modulemd than installed..")
+    sys.exit(0)
+    
+mmd = Modulemd
+
+def _get_repoinfo(directory):
+    """
+    A function which goes into the given directory and sets up the
+    needed data for the repository using librepo.
+    Returns the LRR_YUM_REPO
+    """
+    with tempfile.TemporaryDirectory(prefix='elsplit_librepo_') as lrodir:
+        h = librepo.Handle()
+        h.setopt(librepo.LRO_URLS, ["file://%s" % directory])
+        h.setopt(librepo.LRO_REPOTYPE, librepo.LR_YUMREPO)
+        h.setopt(librepo.LRO_DESTDIR, lrodir)
+        h.setopt(librepo.LRO_LOCAL, True)
+        h.setopt(librepo.LRO_IGNOREMISSING, False)
+        r = h.perform()
+        return r.getinfo(librepo.LRR_YUM_REPO)
+
+def _get_hawkey_sack(repo_info):
+    """
+    A function to pull in the repository sack from hawkey.
+    Returns the sack.
+    """
+    hk_repo = hawkey.Repo("")
+    hk_repo.filelists_fn = repo_info["filelists"]
+    hk_repo.primary_fn = repo_info["primary"]
+    hk_repo.repomd_fn = repo_info["repomd"]
+
+    primary_sack = hawkey.Sack()
+    primary_sack.load_repo(hk_repo, build_cache=False)
+    
+    return primary_sack
+
+def _get_filelist(package_sack):
+    """
+    Determine the file locations of all packages in the sack. Use the
+    package-name-epoch-version-release-arch as the key.
+    Returns a dictionary.
+    """
+    pkg_list = {}
+    for pkg in hawkey.Query(package_sack):
+        nevr="%s-%s:%s-%s.%s"% (pkg.name,pkg.epoch,pkg.version,pkg.release,pkg.arch)
+        pkg_list[nevr] = pkg.location
+    return pkg_list
+
+def _parse_repository_non_modular(package_sack, repo_info, modpkgset):
+    """
+    Simple routine to go through a repo, and figure out which packages
+    are not in any module. Add the file locations for those packages
+    so we can link to them.
+    Returns a set of file locations.
+    """
+    sack = package_sack
+    pkgs = set()
+
+    for pkg in hawkey.Query(sack):
+        if pkg.location in modpkgset:
+            continue
+        pkgs.add(pkg.location)
+    return pkgs
+
+def _parse_repository_modular(repo_info,package_sack):
+    """
+    Returns a dictionary of packages indexed by the modules they are
+    contained in.
+    """
+    cts = {}
+    idx = mmd.ModuleIndex()
+    with gzip.GzipFile(filename=repo_info['modules'], mode='r') as gzf:
+        mmdcts = gzf.read().decode('utf-8')
+        res, failures = idx.update_from_string(mmdcts, True)
+        if len(failures) != 0:
+            raise Exception("YAML FAILURE: FAILURES: %s" % failures)
+        if not res:
+            raise Exception("YAML FAILURE: res != True")
+
+    pkgs_list = _get_filelist(package_sack)
+    idx.upgrade_streams(2)
+    for modname in idx.get_module_names():
+        mod = idx.get_module(modname)
+        for stream in mod.get_all_streams():
+            templ = list()
+            for pkg in stream.get_rpm_artifacts():
+                if pkg in pkgs_list:
+                    templ.append(pkgs_list[pkg])
+                else:
+                    continue
+            cts[stream.get_NSVCA()] = templ
+                
+    return cts
+
+
+def _get_modular_pkgset(mod):
+    """
+    Takes a module and goes through the moduleset to determine which
+    packages are inside it. 
+    Returns a list of packages
+    """
+    pkgs = set()
+
+    for modcts in mod.values():
+        for pkg in modcts:
+            pkgs.add(pkg)
+
+    return list(pkgs)
+
+def _perform_action(src, dst, action):
+    """
+    Performs either a copy, hardlink or symlink of the file src to the
+    file destination.
+    Returns None
+    """
+    if action == 'copy':
+        try:
+            shutil.copy(src, dst)
+        except FileNotFoundError:
+            # Missing files are acceptable: they're already checked before
+            # this by validate_filenames.
+            pass
+    elif action == 'hardlink':
+        os.link(src, dst)
+    elif action == 'symlink':
+        os.symlink(src, dst)
+
+def validate_filenames(directory, repoinfo):
+    """
+    Take a directory and repository information. Test each file in
+    repository to exist in said module. This stops us when dealing
+    with broken repositories or missing modules.
+    Returns True if no problems found. False otherwise.
+    """
+    isok = True
+    for modname in repoinfo:
+        for pkg in repoinfo[modname]:
+            if not os.path.exists(os.path.join(directory, pkg)):
+                isok = False
+                print("Path %s from mod %s did not exist" % (pkg, modname))
+    return isok
+
+
+def get_default_modules(directory):
+    """
+    Work through the list of modules and come up with a default set of
+    modules which would be the minimum to output. 
+    Returns a set of modules 
+    """
+    directory = os.path.abspath(directory)
+    repo_info = _get_repoinfo(directory)
+
+    provides = set()
+    contents = set()
+    if 'modules' not in repo_info:
+        return contents
+    idx = mmd.ModuleIndex()
+    with gzip.GzipFile(filename=repo_info['modules'], mode='r') as gzf:
+        mmdcts = gzf.read().decode('utf-8')
+        res, failures = idx.update_from_string(mmdcts, True)
+        if len(failures) != 0:
+            raise Exception("YAML FAILURE: FAILURES: %s" % failures)
+        if not res:
+            raise Exception("YAML FAILURE: res != True")
+
+    idx.upgrade_streams(2)
+
+    # OK this is cave-man no-sleep programming. I expect there is a
+    # better way to do this that would be a lot better. However after
+    # a long long day.. this is what I have.
+
+    # First we oo through the default streams and create a set of
+    # provides that we can check against later.
+    for modname in idx.get_default_streams():
+        mod = idx.get_module(modname)
+        # Get the default streams and loop through them.
+        stream_set = mod.get_streams_by_stream_name(
+            mod.get_defaults().get_default_stream())
+        for stream in stream_set:
+            templist = stream.get_NSVCA().split(":")
+            tempstr = "%s:%s" % (templist[0],templist[1])
+            provides.add(tempstr)
+
+
+    # Now go through our list and build up a content lists which will
+    # have only modules which both 
+    for modname in idx.get_default_streams():
+        mod = idx.get_module(modname)
+        # Get the default streams and loop through them.
+        stream_set = mod.get_streams_by_stream_name(
+            mod.get_defaults().get_default_stream())
+        for stream in stream_set:
+            isprovided = True # a variable to say this can be added.
+            ourname = stream.get_NSVCA()
+
+            # Get dependencies is a list of items. All of the modules
+            # seem to only have 1 item in them, but we should loop
+            # over the list anyway.
+            for deps in stream.get_dependencies():
+                for mod in deps.get_runtime_modules():
+                    # It does not seem easy to figure out what the
+                    # platform is so just assume we will meet it.
+                    if mod != 'platform':
+                        for stm in deps.get_runtime_streams(mod):
+                            tempstr = "%s:%s" %(mod,stm)
+                            if tempstr not in provides:
+                                print( "%s : %s not found." % (ourname,tempstr))
+                                isprovided = False
+                    if isprovided:
+                        contents.add(ourname)
+    return contents
+
+
+def perform_split(repos, args, def_modules):
+    for modname in repos:
+        if args.only_defaults and modname not in def_modules:
+            continue
+        
+        targetdir = os.path.join(args.target, modname)
+        os.mkdir(targetdir)
+
+        for pkg in repos[modname]:
+            _, pkgfile = os.path.split(pkg)
+            _perform_action(
+                os.path.join(args.repository, pkg),
+                os.path.join(targetdir, pkgfile),
+                args.action)
+
+
+def create_repos(target, repos,def_modules, only_defaults):
+    """
+    Routine to create repositories. Input is target directory and a
+    list of repositories.
+    Returns None
+    """
+    for modname in repos:
+        if only_defaults and modname not in def_modules:
+            continue
+        subprocess.run([
+            'createrepo_c', os.path.join(target, modname),
+            '--no-database'])
+
+
+def parse_args():
+    """
+    A standard argument parser routine which pulls in values from the
+    command line and returns a parsed argument dictionary.
+    """
+    parser = argparse.ArgumentParser(description='Split repositories up')
+    parser.add_argument('repository', help='The repository to split')
+    parser.add_argument('--action', help='Method to create split repos files',
+                        choices=('hardlink', 'symlink', 'copy'),
+                        default='hardlink')
+    parser.add_argument('--target', help='Target directory for split repos')
+    parser.add_argument('--skip-missing', help='Skip missing packages',
+                        action='store_true', default=False)
+    parser.add_argument('--create-repos', help='Create repository metadatas',
+                        action='store_true', default=False)
+    parser.add_argument('--only-defaults', help='Only output default modules',
+                        action='store_true', default=False)
+    return parser.parse_args()
+
+
+def setup_target(args):
+    """
+    Checks that the target directory exists and is empty. If not it
+    exits the program.  Returns nothing.
+    """
+    if args.target:
+        args.target = os.path.abspath(args.target)
+        if os.path.exists(args.target):
+            if not os.path.isdir(args.target):
+                raise ValueError("Target must be a directory")
+            elif len(os.listdir(args.target)) != 0:
+                raise ValueError("Target must be empty")
+        else:
+            os.mkdir(args.target)
+
+def parse_repository(directory):
+    """
+    Parse a specific directory, returning a dict with keys module NSVC's and
+    values a list of package NVRs.
+    The dict will also have a key "non_modular" for the non-modular packages.
+    """
+    directory = os.path.abspath(directory)
+    repo_info = _get_repoinfo(directory)
+
+    # Get the package sack and get a filelist of all packages.
+    package_sack = _get_hawkey_sack(repo_info)
+    _get_filelist(package_sack)
+
+    # If we have a repository with no modules we do not want our
+    # script to error out but just remake the repository with
+    # everything in a known sack (aka non_modular).
+     
+    if 'modules' in repo_info:
+        mod = _parse_repository_modular(repo_info,package_sack)
+        modpkgset = _get_modular_pkgset(mod)
+    else:
+        mod = dict()
+        modpkgset = set()
+
+    non_modular = _parse_repository_non_modular(package_sack,repo_info, 
+                                  modpkgset) 
+    mod['non_modular'] = non_modular
+
+    ## We should probably go through our default modules here and
+    ## remove them from our mod. This would cut down some code paths.
+
+    return mod
+
+def main():
+    # Determine what the arguments are and 
+    args = parse_args()
+
+    # Go through arguments and act on their values.
+    setup_target(args)
+
+    repos = parse_repository(args.repository)
+
+    if args.only_defaults:
+        def_modules = get_default_modules(args.repository)
+    else:
+        def_modules = set()
+    def_modules.add('non_modular')        
+    
+    if not args.skip_missing:
+        if not validate_filenames(args.repository, repos):
+            raise ValueError("Package files were missing!")
+    if args.target:
+        perform_split(repos, args, def_modules)
+        if args.create_repos:
+            create_repos(args.target, repos,def_modules,args.only_defaults)
+
+if __name__ == '__main__':
+    main()
--- a/roles/grobisplitter/tasks/main.yml
+++ b/roles/grobisplitter/tasks/main.yml
@ -0,0 +1,27 @@
+---
+- name: install python packages
+  package: name={{ item }} state=present
+  with_items:
+    - createrepo_c
+    - libmodulemd
+    - librepo
+    - python3-hawkey
+    - python3-librepo
+    - python3-repomd
+  tags:
+    - grobi
+
+- name: make sure that /usr/local/bin exists
+  file: path=/usr/local/bin state=directory
+  tags:
+    - grobi
+
+- name: copy local/bin files
+  copy: src={{item}} dest=/usr/local/bin/ mode=0755
+  with_items: 
+    - splitter.py
+    - rhel8-split.sh
+  tags:
+    - grobi
+
+## Cron job goes here.
--- a/roles/web-data-analysis/tasks/main.yml
+++ b/roles/web-data-analysis/tasks/main.yml
@ -2,8 +2,8 @@
 - name: install python-pandas package
  package: state=present name=python-pandas
  tags:
-  - packages
-  - web-data
+    - packages
+    - web-data

 - name: make sure the /usr/local/share/web-data-analysis exists
  file: path=/usr/local/share/web-data-analysis state=directory