[grobisplitter] first attempt at adding configs for system

This commit is contained in:
Stephen Smoogen 2019-06-01 17:32:14 +00:00
parent d8f5db1213
commit 6f69b370f4
6 changed files with 463 additions and 26 deletions

View file

@ -22,6 +22,9 @@
- fas_client
- collectd/base
- sudo
- { role: nfs/client, mnt_dir: '/srv/web/pub', nfs_src_dir: 'fedora_ftp/fedora.redhat.com/pub' }
- { role: nfs/client, mnt_dir: '/mnt/fedora/app', nfs_src_dir: 'fedora_app/app' }
- grobisplitter
tasks:
- import_tasks: "{{ tasks_path }}/2fa_client.yml"
@ -29,27 +32,3 @@
handlers:
- import_tasks: "{{ handlers_path }}/restart_services.yml"
- name: set up packages
hosts: grobisplitter
user: root
gather_facts: True
vars_files:
- /srv/web/infra/ansible/vars/global.yml
- "/srv/private/ansible/vars.yml"
- /srv/web/infra/ansible/vars/{{ ansible_distribution }}.yml
handlers:
- import_tasks: "{{ handlers_path }}/restart_services.yml"
tasks:
- name: install needed packages
package: name={{ item }} state=present
with_items:
- rsync
- net-tools
- libmodulemd
- librepo
- python3-librepo
- python3-repomd
- createrepo_c

View file

@ -0,0 +1,12 @@
The Current Master Git Repository for the grobisplitter program is
https://github.com/smooge/GrobiSplitter.git to be moved under a
Community Infrastructure repository later. The program depends upon
python3 and other programs.
gobject-introspection
libmodulemd-2.5.0
libmodulemd1-1.8.11
librepo
python3-gobject-base
python3-hawkey
python3-librepo

View file

@ -0,0 +1,62 @@
#!/bin/bash
HOMEDIR=/mnt/fedora/app/fi-repo/rhel/rhel8/
BINDIR=/usr/local/bin
ARCHES="aarch64 ppc64le s390x x86_64"
DATE=$(date -Ih | sed 's/+.*//')
if [ -d ${HOMEDIR}/${DATE} ]; then
echo "Directory already exists. Please remove or fix"
exit
fi
for ARCH in ARCHES; do
# The archdir is where we daily download updates for rhel8
ARCHDIR=${HOMEDIR}/${ARCHES}
if [ ! -d ${ARCHDIR} ]; then
echo "Unable to find ${ARCHDIR}"
exit
fi
# We consolidate all of the default repositories and remerge them
# in a daily tree. This allows us to point koji at a particular
# day if we have specific build concerns.
OUTDIR=${HOMEDIR}/koji/${DATE}/${ARCHES}
mkdir -vp ${OUTDIR}
if [ ! -d ${ARCHDIR} ]; then
echo "Unable to find ${ARCHDIR}"
exit
else
cd ${OUTDIR}
fi
# Begin splitting the various packages into their subtrees
${BINDIR}/splitter.py --action hardlink --target RHEL-8-001 --create-repos ${ARCHDIR}/rhel-8-for-${ARCH}-baseos-rpms/ --only-defaults
${BINDIR}/splitter.py --action hardlink --target RHEL-8-002 --create-repos ${ARCHDIR}/rhel-8-for-${ARCH}-appstream-rpms/ --only-defaults
${BINDIR}/splitter.py --action hardlink --target RHEL-8-003 --create-repos ${ARCHDIR}/codeready-builder-for-rhel-8-${ARCH}-rpms/
# Copy the various module trees into RHEL-8-001 where we want them
# to work.
cp -avlr RHEL-8-002/* RHEL-8-001
cp -avlr RHEL-8-003/* RHEL-8-001
# Go into the main tree
pushd RHEL-8-001
# Go into its non_modular subtree and update its repo as its data
# is based off of the first split
pushd non_modular
createrepo -v .
popd
# Build out the repos we have and merge them together with
# mergerepo -k
repos=""
for i in $( ls -1 ); do
repos+="-r $i "
done
mergerepo_c -v -k ${repos}
popd
# Cleanup the trash
rm -rf RHEL-8-002 RHEL-8-003
#loop to the next
done

View file

@ -0,0 +1,357 @@
#!/bin/python3
# Import libraries needed for application to work
import argparse
import shutil
import gi
import gzip
import librepo
import hawkey
import tempfile
import os
import subprocess
import sys
# Look for a specific version of modulemd. The 1.x series does not
# have the tools we need.
try:
gi.require_version('Modulemd', '2.0')
from gi.repository import Modulemd
except:
print("We require newer vesions of modulemd than installed..")
sys.exit(0)
mmd = Modulemd
def _get_repoinfo(directory):
"""
A function which goes into the given directory and sets up the
needed data for the repository using librepo.
Returns the LRR_YUM_REPO
"""
with tempfile.TemporaryDirectory(prefix='elsplit_librepo_') as lrodir:
h = librepo.Handle()
h.setopt(librepo.LRO_URLS, ["file://%s" % directory])
h.setopt(librepo.LRO_REPOTYPE, librepo.LR_YUMREPO)
h.setopt(librepo.LRO_DESTDIR, lrodir)
h.setopt(librepo.LRO_LOCAL, True)
h.setopt(librepo.LRO_IGNOREMISSING, False)
r = h.perform()
return r.getinfo(librepo.LRR_YUM_REPO)
def _get_hawkey_sack(repo_info):
"""
A function to pull in the repository sack from hawkey.
Returns the sack.
"""
hk_repo = hawkey.Repo("")
hk_repo.filelists_fn = repo_info["filelists"]
hk_repo.primary_fn = repo_info["primary"]
hk_repo.repomd_fn = repo_info["repomd"]
primary_sack = hawkey.Sack()
primary_sack.load_repo(hk_repo, build_cache=False)
return primary_sack
def _get_filelist(package_sack):
"""
Determine the file locations of all packages in the sack. Use the
package-name-epoch-version-release-arch as the key.
Returns a dictionary.
"""
pkg_list = {}
for pkg in hawkey.Query(package_sack):
nevr="%s-%s:%s-%s.%s"% (pkg.name,pkg.epoch,pkg.version,pkg.release,pkg.arch)
pkg_list[nevr] = pkg.location
return pkg_list
def _parse_repository_non_modular(package_sack, repo_info, modpkgset):
"""
Simple routine to go through a repo, and figure out which packages
are not in any module. Add the file locations for those packages
so we can link to them.
Returns a set of file locations.
"""
sack = package_sack
pkgs = set()
for pkg in hawkey.Query(sack):
if pkg.location in modpkgset:
continue
pkgs.add(pkg.location)
return pkgs
def _parse_repository_modular(repo_info,package_sack):
"""
Returns a dictionary of packages indexed by the modules they are
contained in.
"""
cts = {}
idx = mmd.ModuleIndex()
with gzip.GzipFile(filename=repo_info['modules'], mode='r') as gzf:
mmdcts = gzf.read().decode('utf-8')
res, failures = idx.update_from_string(mmdcts, True)
if len(failures) != 0:
raise Exception("YAML FAILURE: FAILURES: %s" % failures)
if not res:
raise Exception("YAML FAILURE: res != True")
pkgs_list = _get_filelist(package_sack)
idx.upgrade_streams(2)
for modname in idx.get_module_names():
mod = idx.get_module(modname)
for stream in mod.get_all_streams():
templ = list()
for pkg in stream.get_rpm_artifacts():
if pkg in pkgs_list:
templ.append(pkgs_list[pkg])
else:
continue
cts[stream.get_NSVCA()] = templ
return cts
def _get_modular_pkgset(mod):
"""
Takes a module and goes through the moduleset to determine which
packages are inside it.
Returns a list of packages
"""
pkgs = set()
for modcts in mod.values():
for pkg in modcts:
pkgs.add(pkg)
return list(pkgs)
def _perform_action(src, dst, action):
"""
Performs either a copy, hardlink or symlink of the file src to the
file destination.
Returns None
"""
if action == 'copy':
try:
shutil.copy(src, dst)
except FileNotFoundError:
# Missing files are acceptable: they're already checked before
# this by validate_filenames.
pass
elif action == 'hardlink':
os.link(src, dst)
elif action == 'symlink':
os.symlink(src, dst)
def validate_filenames(directory, repoinfo):
"""
Take a directory and repository information. Test each file in
repository to exist in said module. This stops us when dealing
with broken repositories or missing modules.
Returns True if no problems found. False otherwise.
"""
isok = True
for modname in repoinfo:
for pkg in repoinfo[modname]:
if not os.path.exists(os.path.join(directory, pkg)):
isok = False
print("Path %s from mod %s did not exist" % (pkg, modname))
return isok
def get_default_modules(directory):
"""
Work through the list of modules and come up with a default set of
modules which would be the minimum to output.
Returns a set of modules
"""
directory = os.path.abspath(directory)
repo_info = _get_repoinfo(directory)
provides = set()
contents = set()
if 'modules' not in repo_info:
return contents
idx = mmd.ModuleIndex()
with gzip.GzipFile(filename=repo_info['modules'], mode='r') as gzf:
mmdcts = gzf.read().decode('utf-8')
res, failures = idx.update_from_string(mmdcts, True)
if len(failures) != 0:
raise Exception("YAML FAILURE: FAILURES: %s" % failures)
if not res:
raise Exception("YAML FAILURE: res != True")
idx.upgrade_streams(2)
# OK this is cave-man no-sleep programming. I expect there is a
# better way to do this that would be a lot better. However after
# a long long day.. this is what I have.
# First we oo through the default streams and create a set of
# provides that we can check against later.
for modname in idx.get_default_streams():
mod = idx.get_module(modname)
# Get the default streams and loop through them.
stream_set = mod.get_streams_by_stream_name(
mod.get_defaults().get_default_stream())
for stream in stream_set:
templist = stream.get_NSVCA().split(":")
tempstr = "%s:%s" % (templist[0],templist[1])
provides.add(tempstr)
# Now go through our list and build up a content lists which will
# have only modules which both
for modname in idx.get_default_streams():
mod = idx.get_module(modname)
# Get the default streams and loop through them.
stream_set = mod.get_streams_by_stream_name(
mod.get_defaults().get_default_stream())
for stream in stream_set:
isprovided = True # a variable to say this can be added.
ourname = stream.get_NSVCA()
# Get dependencies is a list of items. All of the modules
# seem to only have 1 item in them, but we should loop
# over the list anyway.
for deps in stream.get_dependencies():
for mod in deps.get_runtime_modules():
# It does not seem easy to figure out what the
# platform is so just assume we will meet it.
if mod != 'platform':
for stm in deps.get_runtime_streams(mod):
tempstr = "%s:%s" %(mod,stm)
if tempstr not in provides:
print( "%s : %s not found." % (ourname,tempstr))
isprovided = False
if isprovided:
contents.add(ourname)
return contents
def perform_split(repos, args, def_modules):
for modname in repos:
if args.only_defaults and modname not in def_modules:
continue
targetdir = os.path.join(args.target, modname)
os.mkdir(targetdir)
for pkg in repos[modname]:
_, pkgfile = os.path.split(pkg)
_perform_action(
os.path.join(args.repository, pkg),
os.path.join(targetdir, pkgfile),
args.action)
def create_repos(target, repos,def_modules, only_defaults):
"""
Routine to create repositories. Input is target directory and a
list of repositories.
Returns None
"""
for modname in repos:
if only_defaults and modname not in def_modules:
continue
subprocess.run([
'createrepo_c', os.path.join(target, modname),
'--no-database'])
def parse_args():
"""
A standard argument parser routine which pulls in values from the
command line and returns a parsed argument dictionary.
"""
parser = argparse.ArgumentParser(description='Split repositories up')
parser.add_argument('repository', help='The repository to split')
parser.add_argument('--action', help='Method to create split repos files',
choices=('hardlink', 'symlink', 'copy'),
default='hardlink')
parser.add_argument('--target', help='Target directory for split repos')
parser.add_argument('--skip-missing', help='Skip missing packages',
action='store_true', default=False)
parser.add_argument('--create-repos', help='Create repository metadatas',
action='store_true', default=False)
parser.add_argument('--only-defaults', help='Only output default modules',
action='store_true', default=False)
return parser.parse_args()
def setup_target(args):
"""
Checks that the target directory exists and is empty. If not it
exits the program. Returns nothing.
"""
if args.target:
args.target = os.path.abspath(args.target)
if os.path.exists(args.target):
if not os.path.isdir(args.target):
raise ValueError("Target must be a directory")
elif len(os.listdir(args.target)) != 0:
raise ValueError("Target must be empty")
else:
os.mkdir(args.target)
def parse_repository(directory):
"""
Parse a specific directory, returning a dict with keys module NSVC's and
values a list of package NVRs.
The dict will also have a key "non_modular" for the non-modular packages.
"""
directory = os.path.abspath(directory)
repo_info = _get_repoinfo(directory)
# Get the package sack and get a filelist of all packages.
package_sack = _get_hawkey_sack(repo_info)
_get_filelist(package_sack)
# If we have a repository with no modules we do not want our
# script to error out but just remake the repository with
# everything in a known sack (aka non_modular).
if 'modules' in repo_info:
mod = _parse_repository_modular(repo_info,package_sack)
modpkgset = _get_modular_pkgset(mod)
else:
mod = dict()
modpkgset = set()
non_modular = _parse_repository_non_modular(package_sack,repo_info,
modpkgset)
mod['non_modular'] = non_modular
## We should probably go through our default modules here and
## remove them from our mod. This would cut down some code paths.
return mod
def main():
# Determine what the arguments are and
args = parse_args()
# Go through arguments and act on their values.
setup_target(args)
repos = parse_repository(args.repository)
if args.only_defaults:
def_modules = get_default_modules(args.repository)
else:
def_modules = set()
def_modules.add('non_modular')
if not args.skip_missing:
if not validate_filenames(args.repository, repos):
raise ValueError("Package files were missing!")
if args.target:
perform_split(repos, args, def_modules)
if args.create_repos:
create_repos(args.target, repos,def_modules,args.only_defaults)
if __name__ == '__main__':
main()

View file

@ -0,0 +1,27 @@
---
- name: install python packages
package: name={{ item }} state=present
with_items:
- createrepo_c
- libmodulemd
- librepo
- python3-hawkey
- python3-librepo
- python3-repomd
tags:
- grobi
- name: make sure that /usr/local/bin exists
file: path=/usr/local/bin state=directory
tags:
- grobi
- name: copy local/bin files
copy: src={{item}} dest=/usr/local/bin/ mode=0755
with_items:
- splitter.py
- rhel8-split.sh
tags:
- grobi
## Cron job goes here.

View file

@ -2,8 +2,8 @@
- name: install python-pandas package
package: state=present name=python-pandas
tags:
- packages
- web-data
- packages
- web-data
- name: make sure the /usr/local/share/web-data-analysis exists
file: path=/usr/local/share/web-data-analysis state=directory