#!/usr/bin/python3 # A simple script to generate a file list in a format easily consumable by a # shell script. # Originally written by Jason Tibbitts in 2016. # Donated to the public domain. If you require a statement of license, please # consider this work to be licensed as "CC0 Universal", any version you choose. import argparse import hashlib import os import stat import sys # Get scandir from whatever module provides it today try: from os import scandir except ImportError: from scandir import scandir # productmd is optional, needed only for the imagelist feature try: from productmd.images import SUPPORTED_IMAGE_FORMATS except ImportError: SUPPORTED_IMAGE_FORMATS = [] CHECKSUM_ALGORITHMS = ("sha1", "md5", "sha256", "sha512") class SEntry(object): """A simpler DirEntry-like object.""" def __init__(self, direntry, restricted=False): self.direntry = direntry self.restricted = restricted self.path = direntry.path self.name = direntry.name info = direntry.stat(follow_symlinks=False) # Make sure we have an int here. Whether the stat calls give us ints # or floats depends on the python version, and the extra precision # isn't really helpful. self.modtime = int(max(info.st_mtime, info.st_ctime)) self.readable_group = info.st_mode & stat.S_IRGRP self.readable_world = info.st_mode & stat.S_IROTH self.size = info.st_size ftype = 'f' perm = '' if direntry.is_symlink(): ftype = 'l' elif direntry.is_dir(): ftype = 'd' if self.restricted: perm = '*' # Note that we want an unreadable state to override the restricted state if not self.readable_world: perm = '-' self.ftype = ftype + perm def get_checksum(algo, fname): """Return the checksum of a file in hex.""" fh = open(fname, 'rb') hasher = getattr(hashlib, algo)() block = fh.read(2 ** 16) while len(block) > 0: hasher.update(block) block = fh.read(2 ** 16) return hasher.hexdigest() def recursedir(path='.', skip=[], alwaysskip=['.~tmp~'], in_restricted=False): """Like scandir, but recursively. Will skip everything in the skip array, but only at the top level directory. Returns SEntry objects. If in_restricted is true, all returned entries will be marked as restricted even if their permissions are not restricted. """ for dentry in scandir(path): if dentry.name in skip: continue if dentry.name in alwaysskip: continue if dentry.name.startswith('.nfs'): continue # Skip things which are not at least group readable # Symlinks are followed here so that clients won't see dangling # symlinks to content they can't transfer. It's the default, but to # avoid confusion it's been made explicit. try: s = dentry.stat(follow_symlinks=True) except os.error: print('Could not stat {0}. Dangling symlink?'.format(dentry.name), file=sys.stderr) continue if not (s.st_mode & stat.S_IRGRP): # print('{} is not group readable; skipping.'.format(dentry.path)) continue se = SEntry(dentry, in_restricted) if dentry.is_dir(follow_symlinks=False): this_restricted = in_restricted if not se.readable_world: # print('{} is not world readable; marking as restricted.'.format(se.path), file=sys.stderr) this_restricted = True # Don't pass skip here, because we only skip in the top level for re in recursedir(se.path, alwaysskip=alwaysskip, in_restricted=this_restricted): yield re yield se def write_checksum_section(algo, files, output): print('\n[Checksums {}]'.format(algo.upper()), file=output) # It's OK if the checksum section is empty, but we should include it anyway # as the client expects it. for f in sorted(files): print('{0}\t{1}'.format(get_checksum(algo, f), f), file=output) def parseopts(): null = open(os.devnull, 'w') p = argparse.ArgumentParser( description='Generate a list of files and times, suitable for consumption by quick-fedora-mirror, ' 'and (optionally) a much smaller list of only files that match one of the productmd ' ' supported image types, for use by fedfind.') p.add_argument('-c', '--checksum', action='store_true', help='Include checksums of all repomd.xml files in the file list.') p.add_argument('-C', '--checksum-file', action='append', dest='checksum_files', help='Include checksums of all instances of the specified file.') p.add_argument('-s', '--skip', action='store_true', help='Skip the file lists in the top directory') p.add_argument('-S', '--skip-file', action='append', dest='skip_files', help='Skip the specified file in the top directory.') p.add_argument('-d', '--dir', help='Directory to scan (default: .).') p.add_argument('-t', '--timelist', type=argparse.FileType('w'), default=sys.stdout, help='Filename of the file list with times (default: stdout).') p.add_argument('-f', '--filelist', type=argparse.FileType('w'), default=null, help='Filename of the file list without times (default: no plain file list is generated).') p.add_argument('-i', '--imagelist', type=argparse.FileType('w'), default=null, help='Filename of the image file list for fedfind (default: not generated). Requires ' 'the productmd library.') opts = p.parse_args() if not opts.dir: opts.dir = '.' opts.checksum_files = opts.checksum_files or [] if opts.checksum: opts.checksum_files += ['repomd.xml'] opts.skip_files = opts.skip_files or [] if opts.skip: if not opts.timelist.name == '': opts.skip_files += [os.path.basename(opts.timelist.name)] if not opts.filelist.name == '': opts.skip_files += [os.path.basename(opts.filelist.name)] if not opts.imagelist.name == '': opts.skip_files += [os.path.basename(opts.imagelist.name)] return opts def main(): opts = parseopts() if opts.imagelist.name != os.devnull and not SUPPORTED_IMAGE_FORMATS: sys.exit("--imagelist requires the productmd library!") checksums = {} os.chdir(opts.dir) print('[Version]', file=opts.timelist) # XXX Technically this should be version 3. But old clients will simply # ignore the extended file types for restricted directories, and so we can # add this now and let things simmer for a while before bumping the format # and hard-breaking old clients. print('2', file=opts.timelist) print(file=opts.timelist) print('[Files]', file=opts.timelist) for entry in recursedir(skip=opts.skip_files): print(entry.path, file=opts.filelist) # write to filtered list if appropriate imgs = ['.{0}'.format(form) for form in SUPPORTED_IMAGE_FORMATS] if any(entry.path.endswith(img) for img in imgs): print(entry.path, file=opts.imagelist) if entry.name in opts.checksum_files: checksums[entry.path[2:]] = True print('{0}\t{1}\t{2}\t{3}'.format(entry.modtime, entry.ftype, entry.size, entry.path[2:]), file=opts.timelist) for algo in CHECKSUM_ALGORITHMS: write_checksum_section(algo, checksums, opts.timelist) print('\n[End]', file=opts.timelist) if __name__ == '__main__': main()