This adopts https://pagure.io/quick-fedora-mirror/pull-request/27 and adapts to it, so we get `imagelist` files rather than `filterlist` files (see recent commits for this). The rationale is more fully explained in that PR (and in PR #26 also) - on further inspection it turns out that we have to filter out an awful lot of extensions to create small filterlists for all three modules, and I'm worried that other file extensions may appear in the future and cause the filterlists to suddenly get bigger again. Instead, we have create-filelist use the productmd constant that defines valid image formats, and only include files that match those formats in the list. The downside of this approach is we have to ensure productmd on all the systems that run `create-filelist` is kept up to date if the list of valid image formats changes.
147 lines
5.2 KiB
Python
Executable file
147 lines
5.2 KiB
Python
Executable file
#!/usr/bin/python
|
|
from __future__ import print_function
|
|
|
|
# A simple script to generate a file list in a format easily consumable by a
|
|
# shell script.
|
|
|
|
# Originally written by Jason Tibbitts <tibbs@math.uh.edu> in 2016.
|
|
# Donated to the public domain. If you require a statement of license, please
|
|
# consider this work to be licensed as "CC0 Universal", any version you choose.
|
|
|
|
import argparse
|
|
import hashlib
|
|
import os
|
|
import sys
|
|
from scandir import scandir
|
|
|
|
# productmd is optional, needed only for the imagelist feature
|
|
try:
|
|
from productmd.images import SUPPORTED_IMAGE_FORMATS
|
|
except ImportError:
|
|
SUPPORTED_IMAGE_FORMATS = []
|
|
|
|
|
|
def get_ftype(entry):
|
|
"""Return a simple indicator of the file type."""
|
|
if entry.is_symlink():
|
|
return 'l'
|
|
if entry.is_dir():
|
|
return 'd'
|
|
return 'f'
|
|
|
|
|
|
def sha1(fname):
|
|
"""Return the SHA1 checksum of a file in hex."""
|
|
fh = open(fname, 'rb')
|
|
sha1 = hashlib.sha1()
|
|
block = fh.read(2 ** 16)
|
|
while len(block) > 0:
|
|
sha1.update(block)
|
|
block = fh.read(2 ** 16)
|
|
|
|
return sha1.hexdigest()
|
|
|
|
|
|
def recursedir(path='.', skip=[], alwaysskip=['.~tmp~']):
|
|
"""Just like scandir, but recursively.
|
|
|
|
Will skip everything in the skip array, but only at the top level
|
|
directory.
|
|
"""
|
|
for entry in scandir(path):
|
|
if entry.name in skip:
|
|
continue
|
|
if entry.name in alwaysskip:
|
|
continue
|
|
if entry.is_dir(follow_symlinks=False):
|
|
# Don't pass skip here, because we only skip in the top level
|
|
for rentry in recursedir(entry.path, alwaysskip=alwaysskip):
|
|
yield rentry
|
|
yield entry
|
|
|
|
|
|
def parseopts():
|
|
null = open(os.devnull, 'w')
|
|
p = argparse.ArgumentParser(
|
|
description='Generate a list of files and times, suitable for consumption by quick-fedora-mirror, '
|
|
'and (optionally) a much smaller list of only files that match one of the productmd '
|
|
' supported image types, for use by fedfind.')
|
|
p.add_argument('-c', '--checksum', action='store_true',
|
|
help='Include checksums of all repomd.xml files in the file list.')
|
|
p.add_argument('-C', '--checksum-file', action='append', dest='checksum_files',
|
|
help='Include checksums of all instances of the specified file.')
|
|
p.add_argument('-s', '--skip', action='store_true',
|
|
help='Skip the file lists in the top directory')
|
|
p.add_argument('-S', '--skip-file', action='append', dest='skip_files',
|
|
help='Skip the specified file in the top directory.')
|
|
|
|
p.add_argument('-d', '--dir', help='Directory to scan (default: .).')
|
|
|
|
p.add_argument('-t', '--timelist', type=argparse.FileType('w'), default=sys.stdout,
|
|
help='Filename of the file list with times (default: stdout).')
|
|
p.add_argument('-f', '--filelist', type=argparse.FileType('w'), default=null,
|
|
help='Filename of the file list without times (default: no plain file list is generated).')
|
|
p.add_argument('-i', '--imagelist', type=argparse.FileType('w'), default=null,
|
|
help='Filename of the image file list for fedfind (default: not generated). Requires '
|
|
'the productmd library.')
|
|
|
|
opts = p.parse_args()
|
|
|
|
if not opts.dir:
|
|
opts.dir = '.'
|
|
|
|
opts.checksum_files = opts.checksum_files or []
|
|
if opts.checksum:
|
|
opts.checksum_files += ['repomd.xml']
|
|
|
|
opts.skip_files = opts.skip_files or []
|
|
if opts.skip:
|
|
if not opts.timelist.name == '<stdout>':
|
|
opts.skip_files += [opts.timelist.name]
|
|
if not opts.filelist.name == '<stdout>':
|
|
opts.skip_files += [opts.filelist.name]
|
|
|
|
return opts
|
|
|
|
|
|
def main():
|
|
opts = parseopts()
|
|
if opts.imagelist.name != os.devnull and not SUPPORTED_IMAGE_FORMATS:
|
|
sys.exit("--imagelist requires the productmd library!")
|
|
checksums = {}
|
|
|
|
os.chdir(opts.dir)
|
|
|
|
print('[Version]', file=opts.timelist)
|
|
print('2', file=opts.timelist)
|
|
print(file=opts.timelist)
|
|
print('[Files]', file=opts.timelist)
|
|
|
|
for entry in recursedir(skip=opts.skip_files):
|
|
# opts.filelist.write(entry.path + '\n')
|
|
print(entry.path, file=opts.filelist)
|
|
# write to filtered list if appropriate
|
|
imgs = ['.{0}'.format(form) for form in SUPPORTED_IMAGE_FORMATS]
|
|
if any(entry.path.endswith(img) for img in imgs):
|
|
print(entry.path, file=opts.imagelist)
|
|
if entry.name in opts.checksum_files:
|
|
checksums[entry.path[2:]] = True
|
|
info = entry.stat(follow_symlinks=False)
|
|
modtime = max(info.st_mtime, info.st_ctime)
|
|
size = info.st_size
|
|
ftype = get_ftype(entry)
|
|
# opts.timelist.write('{0}\t{1}\t{2}\n'.format(modtime, ftype, entry.path[2:]))
|
|
print('{0}\t{1}\t{2}\t{3}'.format(modtime, ftype, size, entry.path[2:]), file=opts.timelist)
|
|
|
|
print('\n[Checksums SHA1]', file=opts.timelist)
|
|
|
|
# It's OK if the checksum section is empty, but we should include it anyway
|
|
# as the client expects it.
|
|
for f in sorted(checksums):
|
|
print('{0}\t{1}'.format(sha1(f), f), file=opts.timelist)
|
|
|
|
print('\n[End]', file=opts.timelist)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|