199 lines
5.5 KiB
Python
Executable file
199 lines
5.5 KiB
Python
Executable file
#!/usr/bin/python
|
|
|
|
|
|
# Copyright (c) 2015 - Mathieu Bridon <bochecha@daitauha.fr>
|
|
#
|
|
# This script is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This script is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this script. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
|
import argparse
|
|
import errno
|
|
from grp import getgrnam
|
|
import hashlib
|
|
import os
|
|
from pwd import getpwnam
|
|
import sys
|
|
|
|
|
|
def get_args():
|
|
parser = argparse.ArgumentParser()
|
|
|
|
parser.add_argument('--perform', action='store_true', default=False,
|
|
help="Actually do the hardlinking (default is to "
|
|
"report only)")
|
|
parser.add_argument('--link-hashtype', default='md5',
|
|
choices=('md5', 'sha512'),
|
|
help='The hash type to use in the new path of the'
|
|
'hardlink. (default: "md5")')
|
|
parser.add_argument('lookasideroot',
|
|
help="The full path to the root of the lookaside "
|
|
"cache")
|
|
|
|
return parser.parse_args()
|
|
|
|
|
|
def info(msg):
|
|
sys.stdout.write("%s\n" % msg)
|
|
|
|
|
|
def error(msg):
|
|
sys.stderr.write("ERROR: %s\n" % msg)
|
|
|
|
|
|
def die(msg):
|
|
sys.stderr.write("FATAL: %s\n" % msg)
|
|
sys.exit(1)
|
|
|
|
|
|
def get_file_hash(full_path, hashtype):
|
|
hash = hashlib.new(hashtype)
|
|
|
|
with open(full_path, 'rb') as f:
|
|
chunk = f.read(4096)
|
|
|
|
while chunk:
|
|
hash.update(chunk)
|
|
chunk = f.read(4096)
|
|
|
|
return hash.hexdigest()
|
|
|
|
|
|
def verify_source(dir, expected_name, expected_hash, hashtype):
|
|
sources = list(listdir(dir))
|
|
|
|
if len(sources) == 0:
|
|
raise Exception("No source file in %s" % dir)
|
|
|
|
if len(sources) > 1:
|
|
raise Exception("Multiple source files in %s: %s" % (dir, sources))
|
|
|
|
if sources[0] != expected_name:
|
|
raise Exception("Badly named source file in %s: %s"
|
|
% (dir, sources[0]))
|
|
|
|
source_path = os.path.join(dir, expected_name)
|
|
hash = get_file_hash(source_path, hashtype)
|
|
|
|
if hash != expected_hash:
|
|
die("Invalid %s for %s: %s" % (hashtype.upper(), source_path, hash))
|
|
|
|
return source_path
|
|
|
|
|
|
def listdir(dir):
|
|
try:
|
|
for f in os.listdir(dir):
|
|
yield f
|
|
|
|
except OSError as e:
|
|
if e.errno == errno.ENOTDIR:
|
|
error("%s is not a directory" % dir)
|
|
|
|
else:
|
|
raise
|
|
|
|
|
|
def makedirs(dir):
|
|
try:
|
|
os.makedirs(dir)
|
|
|
|
except OSError as e:
|
|
if e.errno != errno.EEXIST:
|
|
raise e
|
|
|
|
|
|
def hardlink(src, dst):
|
|
makedirs(os.path.dirname(dst))
|
|
|
|
try:
|
|
os.link(src, dst)
|
|
|
|
except OSError as e:
|
|
if e.errno != errno.EEXIST:
|
|
raise e
|
|
|
|
# The file already exists at the new-style path?
|
|
# Overwrite it with a hardlink.
|
|
os.unlink(dst)
|
|
os.link(src, dst)
|
|
|
|
|
|
def main(root, link_hashtype, perform=False):
|
|
ownerid = getpwnam('apache').pw_uid
|
|
groupid = getgrnam('apache').gr_gid
|
|
|
|
try:
|
|
os.chdir(root)
|
|
info("All future paths relative to %s" % root)
|
|
|
|
except OSError as e:
|
|
die(e)
|
|
|
|
for pkg_name in listdir(root):
|
|
for source_name in listdir(pkg_name):
|
|
source_dir = os.path.join(pkg_name, source_name)
|
|
|
|
for hash in listdir(source_dir):
|
|
if hash in ('md5', 'sha512'):
|
|
# This is not a hash, but a new-style path containing the
|
|
# hashtype. Let's just verify what it contains
|
|
hashtype = hash
|
|
hashtype_dir = os.path.join(source_dir, hash)
|
|
|
|
for hash in listdir(hashtype_dir):
|
|
try:
|
|
verify_source(os.path.join(hashtype_dir, hash),
|
|
source_name, hash, hashtype)
|
|
except Exception as e:
|
|
error(e)
|
|
continue
|
|
|
|
continue
|
|
|
|
else:
|
|
# This is what is used for hashes which are not under a
|
|
# hashtype folder
|
|
hashtype = 'md5'
|
|
|
|
try:
|
|
source_path = verify_source(
|
|
os.path.join(source_dir, hash), source_name, hash,
|
|
hashtype)
|
|
except Exception as e:
|
|
error(e)
|
|
continue
|
|
|
|
if link_hashtype != hashtype:
|
|
new_hash = get_file_hash(source_path, link_hashtype)
|
|
|
|
else:
|
|
new_hash = hash
|
|
|
|
new_path = os.path.join(source_dir, link_hashtype, new_hash,
|
|
source_name)
|
|
info("Hardlinking: %s to %s" % (source_path, new_path))
|
|
|
|
if perform:
|
|
hardlink(source_path, new_path)
|
|
segments = new_path.split('/')
|
|
|
|
for i in range(1, len(segments) + 1):
|
|
os.chown('/'.join(segments[:i], ownerid, groupid)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
args = get_args()
|
|
|
|
main(args.lookasideroot, args.link_hashtype, perform=args.perform)
|
|
sys.exit(0)
|