ansible/roles/repo2json/files/rhel_to_json.py
Jiri Podivin f513e7cbcd Linting python scripts
Signed-off-by: Jiri Podivin <jpodivin@redhat.com>
2024-09-18 19:57:29 +00:00

266 lines
11 KiB
Python

#!/usr/bin/env python2
"""
This script extracts the content of the primary.sqlite databases used by
RHEL and generates a big JSON out of it so that we can easily check which
packages already are in RHEL and on which arch.
requires:
sqlalchemy
lzma (only if there are .xz compressed primary.sqlite db)
"""
# These two lines are needed to run on EL6
__requires__ = ['SQLAlchemy >= 0.7']
import contextlib
import json
import os
import shutil
import tempfile
# Database related part
from sqlalchemy import Column, Integer, Text, create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
BASE = declarative_base()
class Package(BASE):
''' Maps the packages table in the primary.sqlite database from
repodata to a python object.
'''
__tablename__ = 'packages'
pkgKey = Column(Integer, primary_key=True)
name = Column(Text)
rpm_sourcerpm = Column(Text)
version = Column(Text)
epoch = Column(Text)
release = Column(Text)
arch = Column(Text)
@property
def basename(self):
''' Return the base package name using the rpm_sourcerpms info. '''
return self.rpm_sourcerpm.rsplit('-', 2)[0]
# Here below we tell the script where to look for the repodata, we could
# point it to the top level, but then we would miss the differences between
# el5, 6, 7, and 8.
# I tried to create some rhel5 and rhel6 folders in which I sym-linked the
# respective el5/6 folder from the level above. The problem was that
# os.path.walk() doesn't follow links, so it would not find any repodata.
PATHS = {
'el8': [
'/mnt/fedora/app/fi-repo/rhel/rhel8/x86_64/codeready-builder-for-rhel-8-x86_64-rpms/',
'/mnt/fedora/app/fi-repo/rhel/rhel8/x86_64/rhel-8-for-x86_64-appstream-rpms/',
'/mnt/fedora/app/fi-repo/rhel/rhel8/x86_64/rhel-8-for-x86_64-baseos-rpms/',
'/mnt/fedora/app/fi-repo/rhel/rhel8/aarch64/codeready-builder-for-rhel-8-aarch64-rpms/',
'/mnt/fedora/app/fi-repo/rhel/rhel8/aarch64/rhel-8-for-aarch64-appstream-rpms/',
'/mnt/fedora/app/fi-repo/rhel/rhel8/aarch64/rhel-8-for-aarch64-baseos-rpms/',
'/mnt/fedora/app/fi-repo/rhel/rhel8/ppc64le/codeready-builder-for-rhel-8-ppc64le-rpms/',
'/mnt/fedora/app/fi-repo/rhel/rhel8/ppc64le/rhel-8-for-ppc64le-appstream-rpms/',
'/mnt/fedora/app/fi-repo/rhel/rhel8/ppc64le/rhel-8-for-ppc64le-baseos-rpms/',
'/mnt/fedora/app/fi-repo/rhel/rhel8/s390x/codeready-builder-for-rhel-8-s390x-rpms/',
'/mnt/fedora/app/fi-repo/rhel/rhel8/s390x/rhel-8-for-s390x-appstream-rpms/',
'/mnt/fedora/app/fi-repo/rhel/rhel8/s390x/rhel-8-for-s390x-baseos-rpms/',
],
'el7': [
'/mnt/fedora/app/fi-repo/rhel/rhel7/x86_64/rhel-7-server-extras-rpms',
'/mnt/fedora/app/fi-repo/rhel/rhel7/x86_64/rhel-7-server-optional-rpms',
'/mnt/fedora/app/fi-repo/rhel/rhel7/x86_64/rhel-7-server-rpms',
'/mnt/fedora/app/fi-repo/rhel/rhel7/x86_64/rhel-ha-for-rhel-7-server-rpms',
'/mnt/fedora/app/fi-repo/rhel/rhel7/ppc64/rhel-7-server-extras-rpms',
'/mnt/fedora/app/fi-repo/rhel/rhel7/ppc64/rhel-7-server-optional-rpms',
'/mnt/fedora/app/fi-repo/rhel/rhel7/ppc64/rhel-7-server-rpms',
'/mnt/fedora/app/fi-repo/rhel/rhel7/ppc64/rhel-ha-for-rhel-7-server-rpms',
'/mnt/fedora/app/fi-repo/rhel/rhel7/ppc64le/rhel-7-server-optional-rpms',
'/mnt/fedora/app/fi-repo/rhel/rhel7/ppc64le/rhel-7-server-rpms',
'/mnt/fedora/app/fi-repo/rhel/rhel7/ppc64le/rhel-ha-for-rhel-7-server-rpms',
],
'el6': [
'/mnt/fedora/app/fi-repo/rhel/rhel-i386-server-6',
'/mnt/fedora/app/fi-repo/rhel/rhel-i386-server-fastrack-6',
'/mnt/fedora/app/fi-repo/rhel/rhel-i386-server-ha-6',
'/mnt/fedora/app/fi-repo/rhel/rhel-i386-server-ha-fastrack-6',
'/mnt/fedora/app/fi-repo/rhel/rhel-i386-server-lb-6',
'/mnt/fedora/app/fi-repo/rhel/rhel-i386-server-lb-fastrack-6',
'/mnt/fedora/app/fi-repo/rhel/rhel-i386-server-optional-6',
'/mnt/fedora/app/fi-repo/rhel/rhel-i386-server-optional-fastrack-6',
'/mnt/fedora/app/fi-repo/rhel/rhel-ppc64-server-6',
'/mnt/fedora/app/fi-repo/rhel/rhel-ppc64-server-fastrack-6',
'/mnt/fedora/app/fi-repo/rhel/rhel-ppc64-server-ha-6',
'/mnt/fedora/app/fi-repo/rhel/rhel-ppc64-server-ha-fastrack-6',
'/mnt/fedora/app/fi-repo/rhel/rhel-ppc64-server-lb-6',
'/mnt/fedora/app/fi-repo/rhel/rhel-ppc64-server-lb-fastrack-6',
'/mnt/fedora/app/fi-repo/rhel/rhel-ppc64-server-optional-6',
'/mnt/fedora/app/fi-repo/rhel/rhel-ppc64-server-optional-fastrack-6',
'/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-6',
'/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-fastrack-6',
'/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-ha-6',
'/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-ha-fastrack-6',
'/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-lb-6',
'/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-lb-fastrack-6',
'/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-optional-6',
'/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-optional-fastrack-6',
],
'el5': [
'/mnt/fedora/app/fi-repo/rhel/rhel-i386-server-5/',
'/mnt/fedora/app/fi-repo/rhel/rhel-i386-server-fastrack-5/',
'/mnt/fedora/app/fi-repo/rhel/rhel-i386-server-productivity-5/',
'/mnt/fedora/app/fi-repo/rhel/rhel-i386-server-vt-5/',
'/mnt/fedora/app/fi-repo/rhel/rhel-ppc-server-5/',
'/mnt/fedora/app/fi-repo/rhel/rhel-ppc-server-fastrack-5/',
'/mnt/fedora/app/fi-repo/rhel/rhel-ppc-server-productivity-5/',
'/mnt/fedora/app/fi-repo/rhel/rhel-ppc-server-vt-5/',
'/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-5/',
'/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-fastrack-5/',
'/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-productivity-5/',
'/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-vt-5/',
],
}
def find_primary_sqlite(paths):
''' Find all the primary.sqlite files located at or under the given
path.
'''
if not isinstance(paths, list):
paths = [paths]
files = []
for path in paths:
if not os.path.isdir(path):
continue
for (dirpath, dirnames, filenames) in os.walk(path):
for filename in filenames:
if 'primary.sqlite' in filename:
files.append(os.path.join(dirpath, filename))
return files
def decompress_primary_db(archive, location):
''' Decompress the given XZ archive at the specified location. '''
if archive.endswith('.xz'):
import lzma
with contextlib.closing(lzma.LZMAFile(archive)) as stream_xz:
data = stream_xz.read()
with open(location, 'wb') as stream:
stream.write(data)
elif archive.endswith('.gz'):
import tarfile
with tarfile.open(archive) as tar:
tar.extractall(path=location)
elif archive.endswith('.bz2'):
import bz2
with open(location, 'wb') as out:
bzar = bz2.BZ2File(archive)
out.write(bzar.read())
bzar.close()
elif archive.endswith('.sqlite'):
with open(location, 'w') as out:
with open(archive) as inp:
out.write(inp.read())
def get_pkg_info(session, pkg_name):
''' Query the sqlite database for the package specified. '''
pkg = session.query(Package).filter(Package.name == pkg_name).one()
return pkg
def main():
''' Main function, does the job :) '''
working_dir = tempfile.mkdtemp(prefix='rhel2json-')
print('working dir:', working_dir)
for el in PATHS:
output = {'packages': {}, 'arches': []}
dbfiles = find_primary_sqlite(PATHS[el])
for dbfile_xz in dbfiles:
cur_fold = os.path.join(*dbfile_xz.rsplit(os.sep, 2)[:-2])
channel = os.path.basename(cur_fold)
print('-', cur_fold)
dbfile = os.path.join(working_dir, 'primary_db_%s.sqlite' % el)
decompress_primary_db(dbfile_xz, dbfile)
if not os.path.isfile(dbfile):
print('%s was incorrectly decompressed -- ignoring' % dbfile)
continue
db_url = 'sqlite:///%s' % dbfile
db_session = sessionmaker(bind=create_engine(db_url))
session = db_session()
cnt = 0
new = 0
for pkg in session.query(Package).all():
if pkg.basename in output['packages']:
# Update the list of arches the package has
if pkg.arch not in output['packages'][
pkg.basename]['arch']:
output['packages'][pkg.basename]['arch'].append(
pkg.arch)
# Adjust the gobal list of all arches in the RHEL
if pkg.arch not in output['arches']:
output['arches'].append(pkg.arch)
# Update the list of channels the package is in
if channel not in output['packages'][
pkg.basename]['channel']:
output['packages'][pkg.basename]['channel'].append(
channel)
if channel not in output['packages'][pkg.basename]['channels']:
output['packages'][pkg.basename]['channels'][channel] = []
output['packages'][pkg.basename][
'channels'][channel].append({
'epoch': pkg.epoch,
'versions': pkg.version,
'release': pkg.release,
})
# TODO: checks if the evr is more recent or not
# (and update if it is)
else:
new += 1
output['packages'][pkg.basename] = {
'arch': [pkg.arch],
'epoch': pkg.epoch,
'version': pkg.version,
'release': pkg.release,
'channel': [channel],
'channels': {
channel: [{
'epoch': pkg.epoch,
'version': pkg.version,
'release': pkg.release,
}]
}
}
cnt += 1
print('%s packages in %s' % (cnt, cur_fold))
print('%s packages were new packages' % (new))
print('\n%s packages retrieved in %s' % (len(output['packages']), el))
print('%s arches for in %s' % (len(output['arches']), el))
outputfile = 'pkg_%s.json' % el
with open(outputfile, 'w') as stream:
stream.write(json.dumps(output))
print('Output File: %s\n' % outputfile)
# Drop the temp directory
shutil.rmtree(working_dir)
if __name__ == '__main__':
main()