ansible/files/scripts/create-filelist
Adam Williamson b99a4a6381 create-filelist: also skip the imagelist when making others
add the imagelist's name to skip_files as we do for the other
two files. tibbs is committing the same change to upstream.
2016-11-23 09:29:24 -08:00

149 lines
5.3 KiB
Python
Executable file

#!/usr/bin/python
from __future__ import print_function
# A simple script to generate a file list in a format easily consumable by a
# shell script.
# Originally written by Jason Tibbitts <tibbs@math.uh.edu> in 2016.
# Donated to the public domain. If you require a statement of license, please
# consider this work to be licensed as "CC0 Universal", any version you choose.
import argparse
import hashlib
import os
import sys
from scandir import scandir
# productmd is optional, needed only for the imagelist feature
try:
from productmd.images import SUPPORTED_IMAGE_FORMATS
except ImportError:
SUPPORTED_IMAGE_FORMATS = []
def get_ftype(entry):
"""Return a simple indicator of the file type."""
if entry.is_symlink():
return 'l'
if entry.is_dir():
return 'd'
return 'f'
def sha1(fname):
"""Return the SHA1 checksum of a file in hex."""
fh = open(fname, 'rb')
sha1 = hashlib.sha1()
block = fh.read(2 ** 16)
while len(block) > 0:
sha1.update(block)
block = fh.read(2 ** 16)
return sha1.hexdigest()
def recursedir(path='.', skip=[], alwaysskip=['.~tmp~']):
"""Just like scandir, but recursively.
Will skip everything in the skip array, but only at the top level
directory.
"""
for entry in scandir(path):
if entry.name in skip:
continue
if entry.name in alwaysskip:
continue
if entry.is_dir(follow_symlinks=False):
# Don't pass skip here, because we only skip in the top level
for rentry in recursedir(entry.path, alwaysskip=alwaysskip):
yield rentry
yield entry
def parseopts():
null = open(os.devnull, 'w')
p = argparse.ArgumentParser(
description='Generate a list of files and times, suitable for consumption by quick-fedora-mirror, '
'and (optionally) a much smaller list of only files that match one of the productmd '
' supported image types, for use by fedfind.')
p.add_argument('-c', '--checksum', action='store_true',
help='Include checksums of all repomd.xml files in the file list.')
p.add_argument('-C', '--checksum-file', action='append', dest='checksum_files',
help='Include checksums of all instances of the specified file.')
p.add_argument('-s', '--skip', action='store_true',
help='Skip the file lists in the top directory')
p.add_argument('-S', '--skip-file', action='append', dest='skip_files',
help='Skip the specified file in the top directory.')
p.add_argument('-d', '--dir', help='Directory to scan (default: .).')
p.add_argument('-t', '--timelist', type=argparse.FileType('w'), default=sys.stdout,
help='Filename of the file list with times (default: stdout).')
p.add_argument('-f', '--filelist', type=argparse.FileType('w'), default=null,
help='Filename of the file list without times (default: no plain file list is generated).')
p.add_argument('-i', '--imagelist', type=argparse.FileType('w'), default=null,
help='Filename of the image file list for fedfind (default: not generated). Requires '
'the productmd library.')
opts = p.parse_args()
if not opts.dir:
opts.dir = '.'
opts.checksum_files = opts.checksum_files or []
if opts.checksum:
opts.checksum_files += ['repomd.xml']
opts.skip_files = opts.skip_files or []
if opts.skip:
if not opts.timelist.name == '<stdout>':
opts.skip_files += [opts.timelist.name]
if not opts.filelist.name == '<stdout>':
opts.skip_files += [opts.filelist.name]
if not opts.imagelist.name == '<stdout>':
opts.skip_files += [opts.imagelist.name]
return opts
def main():
opts = parseopts()
if opts.imagelist.name != os.devnull and not SUPPORTED_IMAGE_FORMATS:
sys.exit("--imagelist requires the productmd library!")
checksums = {}
os.chdir(opts.dir)
print('[Version]', file=opts.timelist)
print('2', file=opts.timelist)
print(file=opts.timelist)
print('[Files]', file=opts.timelist)
for entry in recursedir(skip=opts.skip_files):
# opts.filelist.write(entry.path + '\n')
print(entry.path, file=opts.filelist)
# write to filtered list if appropriate
imgs = ['.{0}'.format(form) for form in SUPPORTED_IMAGE_FORMATS]
if any(entry.path.endswith(img) for img in imgs):
print(entry.path, file=opts.imagelist)
if entry.name in opts.checksum_files:
checksums[entry.path[2:]] = True
info = entry.stat(follow_symlinks=False)
modtime = max(info.st_mtime, info.st_ctime)
size = info.st_size
ftype = get_ftype(entry)
# opts.timelist.write('{0}\t{1}\t{2}\n'.format(modtime, ftype, entry.path[2:]))
print('{0}\t{1}\t{2}\t{3}'.format(modtime, ftype, size, entry.path[2:]), file=opts.timelist)
print('\n[Checksums SHA1]', file=opts.timelist)
# It's OK if the checksum section is empty, but we should include it anyway
# as the client expects it.
for f in sorted(checksums):
print('{0}\t{1}'.format(sha1(f), f), file=opts.timelist)
print('\n[End]', file=opts.timelist)
if __name__ == '__main__':
main()