diff --git a/roles/distgit/files/make-new-lookaside-links b/roles/distgit/files/make-new-lookaside-links new file mode 100755 index 0000000000..c5c9b5d1f7 --- /dev/null +++ b/roles/distgit/files/make-new-lookaside-links @@ -0,0 +1,177 @@ +#!/usr/bin/python + + +# Copyright (c) 2015 - Mathieu Bridon +# +# This script is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This script is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this script. If not, see . + + +import argparse +import errno +import hashlib +import os +import sys + + +def get_args(): + parser = argparse.ArgumentParser() + + parser.add_argument('--perform', action='store_true', default=False, + help="Actually do the hardlinking (default is to " + "report only)") + parser.add_argument('--link-hashtype', default='md5', + choices=('md5', 'sha512'), + help='The hash type to use in the new path of the' + 'hardlink. (default: "md5")') + parser.add_argument('lookasideroot', + help="The full path to the root of the lookaside " + "cache") + + return parser.parse_args() + + +def info(msg): + sys.stdout.write("%s\n" % msg) + + +def error(msg): + sys.stderr.write("ERROR: %s\n" % msg) + + +def die(msg): + sys.stderr.write("FATAL: %s\n" % msg) + sys.exit(1) + + +def get_file_hash(full_path, hashtype): + hash = hashlib.new(hashtype) + + with open(full_path, 'rb') as f: + chunk = f.read(4096) + + while chunk: + hash.update(chunk) + chunk = f.read(4096) + + return hash.hexdigest() + + +def verify_source(dir, expected_name, expected_hash, hashtype): + sources = os.listdir(dir) + + if len(sources) == 0: + raise Exception("No source file in %s" % dir) + + if len(sources) > 1: + raise Exception("Multiple source files in %s: %s" % (dir, sources)) + + if sources[0] != expected_name: + raise Exception("Badly named source file in %s: %s" + % (dir, sources[0])) + + source_path = os.path.join(dir, expected_name) + hash = get_file_hash(source_path, hashtype) + + if hash != expected_hash: + die("Invalid %s for %s: %s" % (hashtype.upper(), source_path, hash)) + + return source_path + + +def makedirs(dir): + try: + os.makedirs(dir) + + except OSError as e: + if e.errno != errno.EEXIST: + raise e + + +def hardlink(src, dst): + makedirs(os.path.dirname(dst)) + + try: + os.link(src, dst) + + except OSError as e: + if e.errno != errno.EEXIST: + raise e + + # The file already exists at the new-style path? + # Overwrite it with a hardlink. + os.unlink(dst) + os.link(src, dst) + + +def main(root, link_hashtype, perform=False): + try: + os.chdir(root) + info("All future paths relative to %s" % root) + + except OSError as e: + die(e) + + for pkg_name in os.listdir(root): + for source_name in os.listdir(pkg_name): + source_dir = os.path.join(pkg_name, source_name) + + for hash in os.listdir(source_dir): + if hash in ('md5', 'sha512'): + # This is not a hash, but a new-style path containing the + # hashtype. Let's just verify what it contains + hashtype = hash + hashtype_dir = os.path.join(source_dir, hash) + + for hash in os.listdir(hashtype_dir): + try: + verify_source(os.path.join(hashtype_dir, hash), + source_name, hash, hashtype) + except Exception as e: + error(e) + continue + + continue + + else: + # This is what is used for hashes which are not under a + # hashtype folder + hashtype = 'md5' + + try: + source_path = verify_source( + os.path.join(source_dir, hash), source_name, hash, + hashtype) + except Exception as e: + error(e) + continue + + if link_hashtype != hashtype: + new_hash = get_file_hash(source_path, link_hashtype) + + else: + new_hash = hash + + new_path = os.path.join(source_dir, link_hashtype, new_hash, + source_name) + info("Hardlinking: %s to %s" % (source_path, new_path)) + + if perform: + hardlink(source_path, new_path) + + +if __name__ == '__main__': + args = get_args() + + main(args.lookasideroot, args.link_hashtype, perform=args.perform) + sys.exit(0)