From c8420ba58079377bfd1cc22ccf34f2cd86b36198 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Sun, 28 Jun 2015 10:11:05 +0000 Subject: [PATCH] Better distribution of mirrors to crawl. The script mm2_get-highest-active-host-id used to return the highest ID of the active mirrors. This number was divided by the number of active crawlers and then each crawler got its share of mirrors to crawl. This did not take into account that more active mirrors are in the higher IDs as old mirror IDs are not re-used and thus one crawler was getting much more mirrors to crawl than another. The new script (which will be renamed) now divides the list correctly by returning exactly the fraction which each crawler should crawl. --- .../files/mm2_get-highest-active-host-id | 29 +++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) mode change 100644 => 100755 roles/mirrormanager/crawler/files/mm2_get-highest-active-host-id diff --git a/roles/mirrormanager/crawler/files/mm2_get-highest-active-host-id b/roles/mirrormanager/crawler/files/mm2_get-highest-active-host-id old mode 100644 new mode 100755 index dd4299e85a..bebb77cbd6 --- a/roles/mirrormanager/crawler/files/mm2_get-highest-active-host-id +++ b/roles/mirrormanager/crawler/files/mm2_get-highest-active-host-id @@ -5,18 +5,32 @@ import sys import mirrormanager2.lib -parser = argparse.ArgumentParser(usage=sys.argv[0] + " [options]") + +parser = argparse.ArgumentParser(usage=sys.argv[0] + " [options]", + formatter_class=argparse.RawTextHelpFormatter) parser.add_argument( "-c", "--config", dest="config", default='/etc/mirrormanager/mirrormanager2.cfg', help="Configuration file to use") +parser.add_argument( + "-f", "--fraction", + dest="fraction", default="1:1", + help='''Specify which part of the mirror range should be returned +1:1 - all mirrors +1:2 - the first half of the mirrors +2:3 - the middle third of the mirrors''') + options = parser.parse_args() config = dict() with open(options.config) as config_file: exec(compile(config_file.read(), options.config, 'exec'), config) +if ':' not in options.fraction: + parser.print_help() + sys.exit(0) + session = mirrormanager2.lib.create_session(config['DB_URL']) # Get all active mirrors @@ -29,4 +43,15 @@ hosts = [ host.id for host in hosts ] session.close() -print max(hosts) +hosts.sort() + +total = int(options.fraction.split(':')[1]) +part = int(options.fraction.split(':')[0]) + +start = (part-1)*(len(hosts)/total) +stop = (len(hosts)/total)*part + +if total == part: + print "--startid=%d" % (hosts[start]) +else: + print "--startid=%d --stopid=%d" % (hosts[start], hosts[stop])