diff --git a/playbooks/hosts/grobisplitter01.phx2.fedoraproject.org.yml b/playbooks/hosts/grobisplitter01.phx2.fedoraproject.org.yml index 80622bba86..3bd0068b10 100644 --- a/playbooks/hosts/grobisplitter01.phx2.fedoraproject.org.yml +++ b/playbooks/hosts/grobisplitter01.phx2.fedoraproject.org.yml @@ -22,6 +22,9 @@ - fas_client - collectd/base - sudo + - { role: nfs/client, mnt_dir: '/srv/web/pub', nfs_src_dir: 'fedora_ftp/fedora.redhat.com/pub' } + - { role: nfs/client, mnt_dir: '/mnt/fedora/app', nfs_src_dir: 'fedora_app/app' } + - grobisplitter tasks: - import_tasks: "{{ tasks_path }}/2fa_client.yml" @@ -29,27 +32,3 @@ handlers: - import_tasks: "{{ handlers_path }}/restart_services.yml" - -- name: set up packages - hosts: grobisplitter - user: root - gather_facts: True - vars_files: - - /srv/web/infra/ansible/vars/global.yml - - "/srv/private/ansible/vars.yml" - - /srv/web/infra/ansible/vars/{{ ansible_distribution }}.yml - handlers: - - import_tasks: "{{ handlers_path }}/restart_services.yml" - - - tasks: - - name: install needed packages - package: name={{ item }} state=present - with_items: - - rsync - - net-tools - - libmodulemd - - librepo - - python3-librepo - - python3-repomd - - createrepo_c diff --git a/roles/grobisplitter/README.txt b/roles/grobisplitter/README.txt new file mode 100644 index 0000000000..f72d4f9119 --- /dev/null +++ b/roles/grobisplitter/README.txt @@ -0,0 +1,12 @@ +The Current Master Git Repository for the grobisplitter program is +https://github.com/smooge/GrobiSplitter.git to be moved under a +Community Infrastructure repository later. The program depends upon +python3 and other programs. + +gobject-introspection +libmodulemd-2.5.0 +libmodulemd1-1.8.11 +librepo +python3-gobject-base +python3-hawkey +python3-librepo diff --git a/roles/grobisplitter/files/rhel8-split.sh b/roles/grobisplitter/files/rhel8-split.sh new file mode 100755 index 0000000000..30c8ccd8e1 --- /dev/null +++ b/roles/grobisplitter/files/rhel8-split.sh @@ -0,0 +1,62 @@ +#!/bin/bash +HOMEDIR=/mnt/fedora/app/fi-repo/rhel/rhel8/ +BINDIR=/usr/local/bin + +ARCHES="aarch64 ppc64le s390x x86_64" +DATE=$(date -Ih | sed 's/+.*//') + +if [ -d ${HOMEDIR}/${DATE} ]; then + echo "Directory already exists. Please remove or fix" + exit +fi + +for ARCH in ARCHES; do + # The archdir is where we daily download updates for rhel8 + ARCHDIR=${HOMEDIR}/${ARCHES} + if [ ! -d ${ARCHDIR} ]; then + echo "Unable to find ${ARCHDIR}" + exit + fi + + # We consolidate all of the default repositories and remerge them + # in a daily tree. This allows us to point koji at a particular + # day if we have specific build concerns. + OUTDIR=${HOMEDIR}/koji/${DATE}/${ARCHES} + mkdir -vp ${OUTDIR} + if [ ! -d ${ARCHDIR} ]; then + echo "Unable to find ${ARCHDIR}" + exit + else + cd ${OUTDIR} + fi + + # Begin splitting the various packages into their subtrees + ${BINDIR}/splitter.py --action hardlink --target RHEL-8-001 --create-repos ${ARCHDIR}/rhel-8-for-${ARCH}-baseos-rpms/ --only-defaults + ${BINDIR}/splitter.py --action hardlink --target RHEL-8-002 --create-repos ${ARCHDIR}/rhel-8-for-${ARCH}-appstream-rpms/ --only-defaults + ${BINDIR}/splitter.py --action hardlink --target RHEL-8-003 --create-repos ${ARCHDIR}/codeready-builder-for-rhel-8-${ARCH}-rpms/ + + # Copy the various module trees into RHEL-8-001 where we want them + # to work. + cp -avlr RHEL-8-002/* RHEL-8-001 + cp -avlr RHEL-8-003/* RHEL-8-001 + # Go into the main tree + pushd RHEL-8-001 + # Go into its non_modular subtree and update its repo as its data + # is based off of the first split + pushd non_modular + createrepo -v . + popd + + # Build out the repos we have and merge them together with + # mergerepo -k + repos="" + for i in $( ls -1 ); do + repos+="-r $i " + done + mergerepo_c -v -k ${repos} + popd + + # Cleanup the trash + rm -rf RHEL-8-002 RHEL-8-003 +#loop to the next +done diff --git a/roles/grobisplitter/files/splitter.py b/roles/grobisplitter/files/splitter.py new file mode 100755 index 0000000000..b2073600dd --- /dev/null +++ b/roles/grobisplitter/files/splitter.py @@ -0,0 +1,357 @@ +#!/bin/python3 + +# Import libraries needed for application to work + +import argparse +import shutil +import gi +import gzip +import librepo +import hawkey +import tempfile +import os +import subprocess +import sys + +# Look for a specific version of modulemd. The 1.x series does not +# have the tools we need. +try: + gi.require_version('Modulemd', '2.0') + from gi.repository import Modulemd +except: + print("We require newer vesions of modulemd than installed..") + sys.exit(0) + +mmd = Modulemd + +def _get_repoinfo(directory): + """ + A function which goes into the given directory and sets up the + needed data for the repository using librepo. + Returns the LRR_YUM_REPO + """ + with tempfile.TemporaryDirectory(prefix='elsplit_librepo_') as lrodir: + h = librepo.Handle() + h.setopt(librepo.LRO_URLS, ["file://%s" % directory]) + h.setopt(librepo.LRO_REPOTYPE, librepo.LR_YUMREPO) + h.setopt(librepo.LRO_DESTDIR, lrodir) + h.setopt(librepo.LRO_LOCAL, True) + h.setopt(librepo.LRO_IGNOREMISSING, False) + r = h.perform() + return r.getinfo(librepo.LRR_YUM_REPO) + +def _get_hawkey_sack(repo_info): + """ + A function to pull in the repository sack from hawkey. + Returns the sack. + """ + hk_repo = hawkey.Repo("") + hk_repo.filelists_fn = repo_info["filelists"] + hk_repo.primary_fn = repo_info["primary"] + hk_repo.repomd_fn = repo_info["repomd"] + + primary_sack = hawkey.Sack() + primary_sack.load_repo(hk_repo, build_cache=False) + + return primary_sack + +def _get_filelist(package_sack): + """ + Determine the file locations of all packages in the sack. Use the + package-name-epoch-version-release-arch as the key. + Returns a dictionary. + """ + pkg_list = {} + for pkg in hawkey.Query(package_sack): + nevr="%s-%s:%s-%s.%s"% (pkg.name,pkg.epoch,pkg.version,pkg.release,pkg.arch) + pkg_list[nevr] = pkg.location + return pkg_list + +def _parse_repository_non_modular(package_sack, repo_info, modpkgset): + """ + Simple routine to go through a repo, and figure out which packages + are not in any module. Add the file locations for those packages + so we can link to them. + Returns a set of file locations. + """ + sack = package_sack + pkgs = set() + + for pkg in hawkey.Query(sack): + if pkg.location in modpkgset: + continue + pkgs.add(pkg.location) + return pkgs + +def _parse_repository_modular(repo_info,package_sack): + """ + Returns a dictionary of packages indexed by the modules they are + contained in. + """ + cts = {} + idx = mmd.ModuleIndex() + with gzip.GzipFile(filename=repo_info['modules'], mode='r') as gzf: + mmdcts = gzf.read().decode('utf-8') + res, failures = idx.update_from_string(mmdcts, True) + if len(failures) != 0: + raise Exception("YAML FAILURE: FAILURES: %s" % failures) + if not res: + raise Exception("YAML FAILURE: res != True") + + pkgs_list = _get_filelist(package_sack) + idx.upgrade_streams(2) + for modname in idx.get_module_names(): + mod = idx.get_module(modname) + for stream in mod.get_all_streams(): + templ = list() + for pkg in stream.get_rpm_artifacts(): + if pkg in pkgs_list: + templ.append(pkgs_list[pkg]) + else: + continue + cts[stream.get_NSVCA()] = templ + + return cts + + +def _get_modular_pkgset(mod): + """ + Takes a module and goes through the moduleset to determine which + packages are inside it. + Returns a list of packages + """ + pkgs = set() + + for modcts in mod.values(): + for pkg in modcts: + pkgs.add(pkg) + + return list(pkgs) + +def _perform_action(src, dst, action): + """ + Performs either a copy, hardlink or symlink of the file src to the + file destination. + Returns None + """ + if action == 'copy': + try: + shutil.copy(src, dst) + except FileNotFoundError: + # Missing files are acceptable: they're already checked before + # this by validate_filenames. + pass + elif action == 'hardlink': + os.link(src, dst) + elif action == 'symlink': + os.symlink(src, dst) + +def validate_filenames(directory, repoinfo): + """ + Take a directory and repository information. Test each file in + repository to exist in said module. This stops us when dealing + with broken repositories or missing modules. + Returns True if no problems found. False otherwise. + """ + isok = True + for modname in repoinfo: + for pkg in repoinfo[modname]: + if not os.path.exists(os.path.join(directory, pkg)): + isok = False + print("Path %s from mod %s did not exist" % (pkg, modname)) + return isok + + +def get_default_modules(directory): + """ + Work through the list of modules and come up with a default set of + modules which would be the minimum to output. + Returns a set of modules + """ + directory = os.path.abspath(directory) + repo_info = _get_repoinfo(directory) + + provides = set() + contents = set() + if 'modules' not in repo_info: + return contents + idx = mmd.ModuleIndex() + with gzip.GzipFile(filename=repo_info['modules'], mode='r') as gzf: + mmdcts = gzf.read().decode('utf-8') + res, failures = idx.update_from_string(mmdcts, True) + if len(failures) != 0: + raise Exception("YAML FAILURE: FAILURES: %s" % failures) + if not res: + raise Exception("YAML FAILURE: res != True") + + idx.upgrade_streams(2) + + # OK this is cave-man no-sleep programming. I expect there is a + # better way to do this that would be a lot better. However after + # a long long day.. this is what I have. + + # First we oo through the default streams and create a set of + # provides that we can check against later. + for modname in idx.get_default_streams(): + mod = idx.get_module(modname) + # Get the default streams and loop through them. + stream_set = mod.get_streams_by_stream_name( + mod.get_defaults().get_default_stream()) + for stream in stream_set: + templist = stream.get_NSVCA().split(":") + tempstr = "%s:%s" % (templist[0],templist[1]) + provides.add(tempstr) + + + # Now go through our list and build up a content lists which will + # have only modules which both + for modname in idx.get_default_streams(): + mod = idx.get_module(modname) + # Get the default streams and loop through them. + stream_set = mod.get_streams_by_stream_name( + mod.get_defaults().get_default_stream()) + for stream in stream_set: + isprovided = True # a variable to say this can be added. + ourname = stream.get_NSVCA() + + # Get dependencies is a list of items. All of the modules + # seem to only have 1 item in them, but we should loop + # over the list anyway. + for deps in stream.get_dependencies(): + for mod in deps.get_runtime_modules(): + # It does not seem easy to figure out what the + # platform is so just assume we will meet it. + if mod != 'platform': + for stm in deps.get_runtime_streams(mod): + tempstr = "%s:%s" %(mod,stm) + if tempstr not in provides: + print( "%s : %s not found." % (ourname,tempstr)) + isprovided = False + if isprovided: + contents.add(ourname) + return contents + + +def perform_split(repos, args, def_modules): + for modname in repos: + if args.only_defaults and modname not in def_modules: + continue + + targetdir = os.path.join(args.target, modname) + os.mkdir(targetdir) + + for pkg in repos[modname]: + _, pkgfile = os.path.split(pkg) + _perform_action( + os.path.join(args.repository, pkg), + os.path.join(targetdir, pkgfile), + args.action) + + +def create_repos(target, repos,def_modules, only_defaults): + """ + Routine to create repositories. Input is target directory and a + list of repositories. + Returns None + """ + for modname in repos: + if only_defaults and modname not in def_modules: + continue + subprocess.run([ + 'createrepo_c', os.path.join(target, modname), + '--no-database']) + + +def parse_args(): + """ + A standard argument parser routine which pulls in values from the + command line and returns a parsed argument dictionary. + """ + parser = argparse.ArgumentParser(description='Split repositories up') + parser.add_argument('repository', help='The repository to split') + parser.add_argument('--action', help='Method to create split repos files', + choices=('hardlink', 'symlink', 'copy'), + default='hardlink') + parser.add_argument('--target', help='Target directory for split repos') + parser.add_argument('--skip-missing', help='Skip missing packages', + action='store_true', default=False) + parser.add_argument('--create-repos', help='Create repository metadatas', + action='store_true', default=False) + parser.add_argument('--only-defaults', help='Only output default modules', + action='store_true', default=False) + return parser.parse_args() + + +def setup_target(args): + """ + Checks that the target directory exists and is empty. If not it + exits the program. Returns nothing. + """ + if args.target: + args.target = os.path.abspath(args.target) + if os.path.exists(args.target): + if not os.path.isdir(args.target): + raise ValueError("Target must be a directory") + elif len(os.listdir(args.target)) != 0: + raise ValueError("Target must be empty") + else: + os.mkdir(args.target) + +def parse_repository(directory): + """ + Parse a specific directory, returning a dict with keys module NSVC's and + values a list of package NVRs. + The dict will also have a key "non_modular" for the non-modular packages. + """ + directory = os.path.abspath(directory) + repo_info = _get_repoinfo(directory) + + # Get the package sack and get a filelist of all packages. + package_sack = _get_hawkey_sack(repo_info) + _get_filelist(package_sack) + + # If we have a repository with no modules we do not want our + # script to error out but just remake the repository with + # everything in a known sack (aka non_modular). + + if 'modules' in repo_info: + mod = _parse_repository_modular(repo_info,package_sack) + modpkgset = _get_modular_pkgset(mod) + else: + mod = dict() + modpkgset = set() + + non_modular = _parse_repository_non_modular(package_sack,repo_info, + modpkgset) + mod['non_modular'] = non_modular + + ## We should probably go through our default modules here and + ## remove them from our mod. This would cut down some code paths. + + return mod + +def main(): + # Determine what the arguments are and + args = parse_args() + + # Go through arguments and act on their values. + setup_target(args) + + repos = parse_repository(args.repository) + + if args.only_defaults: + def_modules = get_default_modules(args.repository) + else: + def_modules = set() + def_modules.add('non_modular') + + if not args.skip_missing: + if not validate_filenames(args.repository, repos): + raise ValueError("Package files were missing!") + if args.target: + perform_split(repos, args, def_modules) + if args.create_repos: + create_repos(args.target, repos,def_modules,args.only_defaults) + +if __name__ == '__main__': + main() diff --git a/roles/grobisplitter/tasks/main.yml b/roles/grobisplitter/tasks/main.yml new file mode 100644 index 0000000000..8c3ec98513 --- /dev/null +++ b/roles/grobisplitter/tasks/main.yml @@ -0,0 +1,27 @@ +--- +- name: install python packages + package: name={{ item }} state=present + with_items: + - createrepo_c + - libmodulemd + - librepo + - python3-hawkey + - python3-librepo + - python3-repomd + tags: + - grobi + +- name: make sure that /usr/local/bin exists + file: path=/usr/local/bin state=directory + tags: + - grobi + +- name: copy local/bin files + copy: src={{item}} dest=/usr/local/bin/ mode=0755 + with_items: + - splitter.py + - rhel8-split.sh + tags: + - grobi + +## Cron job goes here. diff --git a/roles/web-data-analysis/tasks/main.yml b/roles/web-data-analysis/tasks/main.yml index bbcc8bf533..c389338891 100644 --- a/roles/web-data-analysis/tasks/main.yml +++ b/roles/web-data-analysis/tasks/main.yml @@ -2,8 +2,8 @@ - name: install python-pandas package package: state=present name=python-pandas tags: - - packages - - web-data + - packages + - web-data - name: make sure the /usr/local/share/web-data-analysis exists file: path=/usr/local/share/web-data-analysis state=directory