ansible/roles/distgit/files/make-new-lookaside-links
Mathieu Bridon af02e32085 distgit: Ensure the hardlinked sources all belong to apache
Without this, client uploads and downloads aren't working any more.
2015-07-15 17:20:35 +02:00

199 lines
5.5 KiB
Python
Executable file

#!/usr/bin/python
# Copyright (c) 2015 - Mathieu Bridon <bochecha@daitauha.fr>
#
# This script is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This script is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this script. If not, see <http://www.gnu.org/licenses/>.
import argparse
import errno
from grp import getgrnam
import hashlib
import os
from pwd import getpwnam
import sys
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument('--perform', action='store_true', default=False,
help="Actually do the hardlinking (default is to "
"report only)")
parser.add_argument('--link-hashtype', default='md5',
choices=('md5', 'sha512'),
help='The hash type to use in the new path of the'
'hardlink. (default: "md5")')
parser.add_argument('lookasideroot',
help="The full path to the root of the lookaside "
"cache")
return parser.parse_args()
def info(msg):
sys.stdout.write("%s\n" % msg)
def error(msg):
sys.stderr.write("ERROR: %s\n" % msg)
def die(msg):
sys.stderr.write("FATAL: %s\n" % msg)
sys.exit(1)
def get_file_hash(full_path, hashtype):
hash = hashlib.new(hashtype)
with open(full_path, 'rb') as f:
chunk = f.read(4096)
while chunk:
hash.update(chunk)
chunk = f.read(4096)
return hash.hexdigest()
def verify_source(dir, expected_name, expected_hash, hashtype):
sources = list(listdir(dir))
if len(sources) == 0:
raise Exception("No source file in %s" % dir)
if len(sources) > 1:
raise Exception("Multiple source files in %s: %s" % (dir, sources))
if sources[0] != expected_name:
raise Exception("Badly named source file in %s: %s"
% (dir, sources[0]))
source_path = os.path.join(dir, expected_name)
hash = get_file_hash(source_path, hashtype)
if hash != expected_hash:
die("Invalid %s for %s: %s" % (hashtype.upper(), source_path, hash))
return source_path
def listdir(dir):
try:
for f in os.listdir(dir):
yield f
except OSError as e:
if e.errno == errno.ENOTDIR:
error("%s is not a directory" % dir)
else:
raise
def makedirs(dir):
try:
os.makedirs(dir)
except OSError as e:
if e.errno != errno.EEXIST:
raise e
def hardlink(src, dst):
makedirs(os.path.dirname(dst))
try:
os.link(src, dst)
except OSError as e:
if e.errno != errno.EEXIST:
raise e
# The file already exists at the new-style path?
# Overwrite it with a hardlink.
os.unlink(dst)
os.link(src, dst)
def main(root, link_hashtype, perform=False):
ownerid = getpwnam('apache').pw_uid
groupid = getgrnam('apache').gr_gid
try:
os.chdir(root)
info("All future paths relative to %s" % root)
except OSError as e:
die(e)
for pkg_name in listdir(root):
for source_name in listdir(pkg_name):
source_dir = os.path.join(pkg_name, source_name)
for hash in listdir(source_dir):
if hash in ('md5', 'sha512'):
# This is not a hash, but a new-style path containing the
# hashtype. Let's just verify what it contains
hashtype = hash
hashtype_dir = os.path.join(source_dir, hash)
for hash in listdir(hashtype_dir):
try:
verify_source(os.path.join(hashtype_dir, hash),
source_name, hash, hashtype)
except Exception as e:
error(e)
continue
continue
else:
# This is what is used for hashes which are not under a
# hashtype folder
hashtype = 'md5'
try:
source_path = verify_source(
os.path.join(source_dir, hash), source_name, hash,
hashtype)
except Exception as e:
error(e)
continue
if link_hashtype != hashtype:
new_hash = get_file_hash(source_path, link_hashtype)
else:
new_hash = hash
new_path = os.path.join(source_dir, link_hashtype, new_hash,
source_name)
info("Hardlinking: %s to %s" % (source_path, new_path))
if perform:
hardlink(source_path, new_path)
segments = new_path.split('/')
for i in range(1, len(segments) + 1):
os.chown('/'.join(segments[:i], ownerid, groupid)
if __name__ == '__main__':
args = get_args()
main(args.lookasideroot, args.link_hashtype, perform=args.perform)
sys.exit(0)