Generate filtered file lists for fedfind to use

This adds `filterlist` files alongside the `fullfilelist` and
`fullfiletimelist` files. These are much, much shorter lists
which skip the entries for packages, ARM device tree boot files
and directories. They are intended for consumption by fedfind,
so it can stop using rync scraping to discover the image files
it looks for. To enable this, we update to a newer version of
`create-filelist` from upstream `quick-fedora-mirror` and make
`update-fullfiletimelist` create the filterlist files as well.

We also delete a couple of old copies of `create-filelist`;
nirik made the two roles that use it share a common copy a few
months back, but missed deleting the copy each role had in its
`files` directory.
This commit is contained in:
Adam Williamson 2016-11-18 16:34:38 -08:00
parent 77e47ec39b
commit 45d8ea3f89
4 changed files with 26 additions and 75 deletions

View file

@ -57,7 +57,9 @@ def recursedir(path='.', skip=[], alwaysskip=['.~tmp~']):
def parseopts(): def parseopts():
null = open(os.devnull, 'w') null = open(os.devnull, 'w')
p = argparse.ArgumentParser( p = argparse.ArgumentParser(
description='Generate a list of files and times, suitable for consumption by quick-fedora-mirror.') description='Generate a list of files and times, suitable for consumption by quick-fedora-mirror, '
'and a much smaller list with packages, Device Tree boot files, HTML files and '
'directories filtered out, for consumption by fedfind.')
p.add_argument('-c', '--checksum', action='store_true', p.add_argument('-c', '--checksum', action='store_true',
help='Include checksums of all repomd.xml files in the file list.') help='Include checksums of all repomd.xml files in the file list.')
p.add_argument('-C', '--checksum-file', action='append', dest='checksum_files', p.add_argument('-C', '--checksum-file', action='append', dest='checksum_files',
@ -73,6 +75,8 @@ def parseopts():
help='Filename of the file list with times (default: stdout).') help='Filename of the file list with times (default: stdout).')
p.add_argument('-f', '--filelist', type=argparse.FileType('w'), default=null, p.add_argument('-f', '--filelist', type=argparse.FileType('w'), default=null,
help='Filename of the file list without times (default: no plain file list is generated).') help='Filename of the file list without times (default: no plain file list is generated).')
p.add_argument('-F', '--filterlist', type=argparse.FileType('w'), default=null,
help='Filename of the filtered file list for fedfind (default: not generated).')
opts = p.parse_args() opts = p.parse_args()
@ -107,6 +111,10 @@ def main():
for entry in recursedir(skip=opts.skip_files): for entry in recursedir(skip=opts.skip_files):
# opts.filelist.write(entry.path + '\n') # opts.filelist.write(entry.path + '\n')
print(entry.path, file=opts.filelist) print(entry.path, file=opts.filelist)
# write to filtered list if appropriate
skips = ('.rpm', '.drpm', '.dtb', '.html')
if not any(entry.path.endswith(skip) for skip in skips) and not (entry.is_dir()):
print(entry.path, file=opts.filterlist)
if entry.name in opts.checksum_files: if entry.name in opts.checksum_files:
checksums[entry.path[2:]] = True checksums[entry.path[2:]] = True
info = entry.stat(follow_symlinks=False) info = entry.stat(follow_symlinks=False)

View file

@ -25,6 +25,7 @@ CREATE=/usr/local/bin/create-filelist
# context. # context.
FILELIST=fullfilelist FILELIST=fullfilelist
TIMELIST='fullfiletimelist-$mod' TIMELIST='fullfiletimelist-$mod'
FILTERLIST='filterlist-$mod'
usage () { usage () {
echo echo
@ -107,12 +108,14 @@ cd $tmpd
for mod in $MODS; do for mod in $MODS; do
currentfl=$TOPD/$mod/${FILELIST/'$mod'/$mod} currentfl=$TOPD/$mod/${FILELIST/'$mod'/$mod}
currenttl=$TOPD/$mod/${TIMELIST/'$mod'/$mod} currenttl=$TOPD/$mod/${TIMELIST/'$mod'/$mod}
currentsl=$TOPD/$mod/${FILTERLIST/'$mod'/$mod}
flname=$(basename $currentfl) flname=$(basename $currentfl)
tlname=$(basename $currenttl) tlname=$(basename $currenttl)
slname=$(basename $currentsl)
$CREATE -c -s -d $TOPD/$mod -f $flname -t $tlname $CREATE -c -s -d $TOPD/$mod -f $flname -t $tlname -F $slname
# If a file list exsts and doesn't differ from what we just generated, # If a file list exists and doesn't differ from what we just generated,
# delete the latter. # delete the latter.
if [[ -f $currentfl ]] && diff -q $currentfl $flname > /dev/null; then if [[ -f $currentfl ]] && diff -q $currentfl $flname > /dev/null; then
rm -f $flname rm -f $flname
@ -120,6 +123,9 @@ cd $tmpd
if [[ -f $currenttl ]] && diff -q $currenttl $tlname > /dev/null; then if [[ -f $currenttl ]] && diff -q $currenttl $tlname > /dev/null; then
rm -f $tlname rm -f $tlname
fi fi
if [[ -f $currentsl ]] && diff -q $currentsl $slname > /dev/null; then
rm -f $slname
fi
done done
# Now we have the new file lists but in a temporary directory which # Now we have the new file lists but in a temporary directory which
@ -128,10 +134,13 @@ cd $tmpd
for mod in $MODS; do for mod in $MODS; do
currentfl=$TOPD/$mod/${FILELIST/'$mod'/$mod} currentfl=$TOPD/$mod/${FILELIST/'$mod'/$mod}
currenttl=$TOPD/$mod/${TIMELIST/'$mod'/$mod} currenttl=$TOPD/$mod/${TIMELIST/'$mod'/$mod}
currentsl=$TOPD/$mod/${FILTERLIST/'$mod'/$mod}
flname=$(basename $currentfl) flname=$(basename $currentfl)
fldir=$(dirname $currentfl) fldir=$(dirname $currentfl)
tlname=$(basename $currenttl) tlname=$(basename $currenttl)
tldir=$(dirname $currenttl) tldir=$(dirname $currenttl)
slname=$(basename $currentsl)
sldir=$(dirname $currentsl)
if [[ -f $flname ]]; then if [[ -f $flname ]]; then
tmpf=$(mktemp -p $fldir $flname.XXXXXXXXXX) tmpf=$(mktemp -p $fldir $flname.XXXXXXXXXX)
@ -145,6 +154,12 @@ cd $tmpd
chmod 644 $tmpf chmod 644 $tmpf
mv $tmpf $currenttl mv $tmpf $currenttl
fi fi
if [[ -f $slname ]]; then
tmpf=$(mktemp -p $sldir $slname.XXXXXXXXXX)
cp -p $slname $tmpf
chmod 644 $tmpf
mv $tmpf $currentsl
fi
done done
) 9>$LOCKFILE ) 9>$LOCKFILE

View file

@ -1,36 +0,0 @@
#!/usr/bin/python
# A simple script to generate a file list in a format easily consumable by a
# shell script.
# Originally written by Jason Tibbitts <tibbs@math.uh.edu> in 2016.
# Donated to the public domain. If you require a statement of license, please
# consider this work to be licensed as "CC0 Universal", any version you choose.
from scandir import scandir
def get_ftype(entry):
"""Return a simple indicator of the file type."""
if entry.is_symlink():
return 'l'
if entry.is_dir():
return 'd'
return 'f'
def recursedir(path):
"""Just like scandir, but recursively."""
for entry in scandir(path):
if entry.is_dir(follow_symlinks=False):
for rentry in recursedir(entry.path):
yield rentry
yield entry
for entry in recursedir('.'):
info = entry.stat(follow_symlinks=False)
modtime = max(info.st_mtime, info.st_ctime)
ftype = get_ftype(entry)
print('{} {} {}'.format(modtime, ftype, entry.path[2:]))

View file

@ -1,36 +0,0 @@
#!/usr/bin/python
# A simple script to generate a file list in a format easily consumable by a
# shell script.
# Originally written by Jason Tibbitts <tibbs@math.uh.edu> in 2016.
# Donated to the public domain. If you require a statement of license, please
# consider this work to be licensed as "CC0 Universal", any version you choose.
from scandir import scandir
def get_ftype(entry):
"""Return a simple indicator of the file type."""
if entry.is_symlink():
return 'l'
if entry.is_dir():
return 'd'
return 'f'
def recursedir(path):
"""Just like scandir, but recursively."""
for entry in scandir(path):
if entry.is_dir(follow_symlinks=False):
for rentry in recursedir(entry.path):
yield rentry
yield entry
for entry in recursedir('.'):
info = entry.stat(follow_symlinks=False)
modtime = max(info.st_mtime, info.st_ctime)
ftype = get_ftype(entry)
print('{} {} {}'.format(modtime, ftype, entry.path[2:]))