#!/usr/bin/python # Copyright (c) 2015 - Mathieu Bridon # # This script is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This script is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this script. If not, see . import argparse import errno from grp import getgrnam import hashlib import os from pwd import getpwnam import sys def get_args(): parser = argparse.ArgumentParser() parser.add_argument('--perform', action='store_true', default=False, help="Actually do the hardlinking (default is to " "report only)") parser.add_argument('--link-hashtype', default='md5', choices=('md5', 'sha512'), help='The hash type to use in the new path of the' 'hardlink. (default: "md5")') parser.add_argument('lookasideroot', help="The full path to the root of the lookaside " "cache") return parser.parse_args() def info(msg): sys.stdout.write("%s\n" % msg) def error(msg): sys.stderr.write("ERROR: %s\n" % msg) def die(msg): sys.stderr.write("FATAL: %s\n" % msg) sys.exit(1) def get_file_hash(full_path, hashtype): hash = hashlib.new(hashtype) with open(full_path, 'rb') as f: chunk = f.read(4096) while chunk: hash.update(chunk) chunk = f.read(4096) return hash.hexdigest() def verify_source(dir, expected_name, expected_hash, hashtype): sources = list(listdir(dir)) if len(sources) == 0: raise Exception("No source file in %s" % dir) if len(sources) > 1: raise Exception("Multiple source files in %s: %s" % (dir, sources)) if sources[0] != expected_name: raise Exception("Badly named source file in %s: %s" % (dir, sources[0])) source_path = os.path.join(dir, expected_name) hash = get_file_hash(source_path, hashtype) if hash != expected_hash: die("Invalid %s for %s: %s" % (hashtype.upper(), source_path, hash)) return source_path def listdir(dir): try: for f in os.listdir(dir): yield f except OSError as e: if e.errno == errno.ENOTDIR: error("%s is not a directory" % dir) else: raise def makedirs(dir): try: os.makedirs(dir) except OSError as e: if e.errno != errno.EEXIST: raise e def hardlink(src, dst): makedirs(os.path.dirname(dst)) try: os.link(src, dst) except OSError as e: if e.errno != errno.EEXIST: raise e # The file already exists at the new-style path? # Overwrite it with a hardlink. os.unlink(dst) os.link(src, dst) def main(root, link_hashtype, perform=False): ownerid = getpwnam('apache').pw_uid groupid = getgrnam('apache').gr_gid try: os.chdir(root) info("All future paths relative to %s" % root) except OSError as e: die(e) for pkg_name in listdir(root): for source_name in listdir(pkg_name): source_dir = os.path.join(pkg_name, source_name) for hash in listdir(source_dir): if hash in ('md5', 'sha512'): # This is not a hash, but a new-style path containing the # hashtype. Let's just verify what it contains hashtype = hash hashtype_dir = os.path.join(source_dir, hash) for hash in listdir(hashtype_dir): try: verify_source(os.path.join(hashtype_dir, hash), source_name, hash, hashtype) except Exception as e: error(e) continue continue else: # This is what is used for hashes which are not under a # hashtype folder hashtype = 'md5' try: source_path = verify_source( os.path.join(source_dir, hash), source_name, hash, hashtype) except Exception as e: error(e) continue if link_hashtype != hashtype: new_hash = get_file_hash(source_path, link_hashtype) else: new_hash = hash new_path = os.path.join(source_dir, link_hashtype, new_hash, source_name) info("Hardlinking: %s to %s" % (source_path, new_path)) if perform: hardlink(source_path, new_path) segments = new_path.split('/') for i in range(1, len(segments) + 1): os.chown('/'.join(segments[:i], ownerid, groupid) if __name__ == '__main__': args = get_args() main(args.lookasideroot, args.link_hashtype, perform=args.perform) sys.exit(0)