Add the repo2json role
This commit is contained in:
parent
f6972b7476
commit
e6e33204af
3 changed files with 272 additions and 0 deletions
239
roles/repo2json/files/rhel_to_json.py
Normal file
239
roles/repo2json/files/rhel_to_json.py
Normal file
|
@ -0,0 +1,239 @@
|
|||
#!/usr/bin/env python2
|
||||
|
||||
"""
|
||||
This script extracts the content of the primary.sqlite databases used by
|
||||
RHEL and generates a big JSON out of it so that we can easily check which
|
||||
packages already are in RHEL and on which arch.
|
||||
|
||||
requires:
|
||||
sqlalchemy
|
||||
lzma (only if there are .xz compressed primary.sqlite db)
|
||||
|
||||
"""
|
||||
|
||||
# These two lines are needed to run on EL6
|
||||
__requires__ = ['SQLAlchemy >= 0.7']
|
||||
import pkg_resources
|
||||
|
||||
|
||||
import contextlib
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
import sys
|
||||
import tempfile
|
||||
|
||||
|
||||
# Database related part
|
||||
|
||||
from sqlalchemy import Column, ForeignKey, Integer, Text, create_engine
|
||||
from sqlalchemy.ext.declarative import declarative_base
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
|
||||
|
||||
BASE = declarative_base()
|
||||
|
||||
|
||||
class Package(BASE):
|
||||
''' Maps the packages table in the primary.sqlite database from
|
||||
repodata to a python object.
|
||||
'''
|
||||
__tablename__ = 'packages'
|
||||
pkgKey = Column(Integer, primary_key=True)
|
||||
name = Column(Text)
|
||||
rpm_sourcerpm = Column(Text)
|
||||
version = Column(Text)
|
||||
epoch = Column(Text)
|
||||
release = Column(Text)
|
||||
arch = Column(Text)
|
||||
|
||||
@property
|
||||
def basename(self):
|
||||
''' Return the base package name using the rpm_sourcerpms info. '''
|
||||
return self.rpm_sourcerpm.rsplit('-', 2)[0]
|
||||
|
||||
|
||||
# Here below we tell the script where to look for the repodata, we could
|
||||
# point it to the top level, but then we would miss the differences between
|
||||
# el5, 6 and 7.
|
||||
# I tried to create some rhel5 and rhel6 folders in which I sym-linked the
|
||||
# respective el5/6 folder from the level above. The problem was that
|
||||
# os.path.walk() doesn't follow links, so it would not find any repodata.
|
||||
|
||||
PATHS = {
|
||||
'el7': [
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel7/',
|
||||
],
|
||||
'el6': [
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-i386-server-6',
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-i386-server-fastrack-6',
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-i386-server-ha-6',
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-i386-server-ha-fastrack-6',
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-i386-server-lb-6',
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-i386-server-lb-fastrack-6',
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-i386-server-optional-6',
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-i386-server-optional-fastrack-6',
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-i386-server-rs-6',
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-ppc64-server-6',
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-ppc64-server-fastrack-6',
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-i386-server-rs-fastrack-6',
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-ppc64-server-ha-6',
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-ppc64-server-lb-6',
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-ppc64-server-optional-6',
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-ppc64-server-optional-fastrack-6',
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-6',
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-6-ost-preview',
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-6-rhevh',
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-6-rhevm-3',
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-6-rhs-rhsc-2.0',
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-fastrack-6',
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-ha-fastrack-6',
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-ha-6',
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-lb-6',
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-optional-6',
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-optional-fastrack-6',
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-lb-fastrack-6',
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-rhsclient-6',
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-rs-6',
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-rs-fastrack-6',
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-sfs-6',
|
||||
],
|
||||
'el5': [
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-i386-server-5/',
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-i386-server-5-mrg-grid-1/',
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-i386-server-5-mrg-grid-execute-1/',
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-i386-server-5-mrg-management-1/',
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-i386-server-5-mrg-messaging-1/',
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-i386-server-5-mrg-messaging-base-1/',
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-i386-server-5-mrg-realtime-1/',
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-i386-server-cluster-5/',
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-i386-server-cluster-storage-5/',
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-i386-server-fastrack-5/',
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-i386-server-productivity-5/',
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-i386-server-vt-5/',
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-ppc-server-5/',
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-ppc-server-5-mrg-messaging-1/',
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-ppc-server-5-mrg-messaging-base-1/',
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-ppc-server-cluster-5/',
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-ppc-server-cluster-storage-5/',
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-ppc-server-fastrack-5/',
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-ppc-server-productivity-5/',
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-ppc-server-vt-5/',
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-5/',
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-5-mrg-grid-1/',
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-5-mrg-grid-execute-1/',
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-5-mrg-management-1/',
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-5-mrg-messaging-1/',
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-5-mrg-messaging-base-1/',
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-5-mrg-realtime-1/',
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-fastrack-5/',
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-productivity-5/',
|
||||
'/mnt/fedora/app/fi-repo/rhel/rhel-x86_64-server-vt-5/',
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
def find_primary_sqlite(paths):
|
||||
''' Find all the primary.sqlite files located at or under the given
|
||||
path.
|
||||
'''
|
||||
if not isinstance(paths, list):
|
||||
paths = [paths]
|
||||
files = []
|
||||
for path in paths:
|
||||
if not os.path.isdir(path):
|
||||
continue
|
||||
for (dirpath, dirnames, filenames) in os.walk(path):
|
||||
for filename in filenames:
|
||||
if 'primary.sqlite' in filename:
|
||||
files.append(os.path.join(dirpath, filename))
|
||||
return files
|
||||
|
||||
|
||||
def decompress_primary_db(archive, location):
|
||||
''' Decompress the given XZ archive at the specified location. '''
|
||||
if archive.endswith('.xz'):
|
||||
import lzma
|
||||
with contextlib.closing(lzma.LZMAFile(archive)) as stream_xz:
|
||||
data = stream_xz.read()
|
||||
with open(location, 'wb') as stream:
|
||||
stream.write(data)
|
||||
elif archive.endswith('.gz'):
|
||||
import tarfile
|
||||
with tarfile.open(archive) as tar:
|
||||
tar.extractall(path=location)
|
||||
elif archive.endswith('.bz2'):
|
||||
import bz2
|
||||
with open(location, 'w') as out:
|
||||
bzar = bz2.BZ2File(archive)
|
||||
out.write(bzar.read())
|
||||
bzar.close()
|
||||
elif archive.endswith('.sqlite'):
|
||||
with open(location, 'w') as out:
|
||||
with open(archive) as inp:
|
||||
out.write(inp.read())
|
||||
|
||||
|
||||
def get_pkg_info(session, pkg_name):
|
||||
''' Query the sqlite database for the package specified. '''
|
||||
pkg = session.query(Package).filter(Package.name == pkg_name).one()
|
||||
return pkg
|
||||
|
||||
|
||||
def main():
|
||||
''' Main function, does the job :) '''
|
||||
working_dir = tempfile.mkdtemp(prefix='rhel2json-')
|
||||
print 'working dir:', working_dir
|
||||
|
||||
for el in PATHS:
|
||||
|
||||
output = {}
|
||||
|
||||
dbfiles = find_primary_sqlite(PATHS[el])
|
||||
|
||||
for dbfile_xz in dbfiles:
|
||||
cur_fold = os.path.join(*dbfile_xz.rsplit(os.sep, 2)[:-2])
|
||||
print '-', cur_fold
|
||||
dbfile = os.path.join(working_dir, 'primary_db_%s.sqlite' % el)
|
||||
decompress_primary_db(dbfile_xz, dbfile)
|
||||
|
||||
if not os.path.isfile(dbfile):
|
||||
print '%s was incorrectly decompressed -- ignoring' % dbfile
|
||||
continue
|
||||
|
||||
db_url = 'sqlite:///%s' % dbfile
|
||||
db_session = sessionmaker(bind=create_engine(db_url))
|
||||
session = db_session()
|
||||
|
||||
cnt = 0
|
||||
new = 0
|
||||
for pkg in session.query(Package).all():
|
||||
if pkg.basename in output:
|
||||
if pkg.arch not in output[pkg.basename]['arch']:
|
||||
output[pkg.basename]['arch'].append(pkg.arch)
|
||||
# TODO: checks if the evr is more recent or not
|
||||
# (and update if it is)
|
||||
else:
|
||||
new += 1
|
||||
output[pkg.basename] = {
|
||||
'arch': [pkg.arch],
|
||||
'epoch': pkg.epoch,
|
||||
'version': pkg.version,
|
||||
'release': pkg.release,
|
||||
}
|
||||
cnt += 1
|
||||
print '%s packages in %s' % (cnt, cur_fold)
|
||||
print '%s packages were new packages' % (new)
|
||||
|
||||
print '\n%s packages retrieved in %s' % (len(output), el)
|
||||
outputfile = 'pkg_%s.json' % el
|
||||
with open(outputfile, 'w') as stream:
|
||||
stream.write(json.dumps(output))
|
||||
print 'Output File: %s\n' % outputfile
|
||||
|
||||
# Drop the temp directory
|
||||
shutil.rmtree(working_dir)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
Loading…
Add table
Add a link
Reference in a new issue