Better distribution of mirrors to crawl.
The script mm2_get-highest-active-host-id used to return the highest ID of the active mirrors. This number was divided by the number of active crawlers and then each crawler got its share of mirrors to crawl. This did not take into account that more active mirrors are in the higher IDs as old mirror IDs are not re-used and thus one crawler was getting much more mirrors to crawl than another. The new script (which will be renamed) now divides the list correctly by returning exactly the fraction which each crawler should crawl.
This commit is contained in:
parent
0553afe274
commit
c8420ba580
1 changed files with 27 additions and 2 deletions
29
roles/mirrormanager/crawler/files/mm2_get-highest-active-host-id
Normal file → Executable file
29
roles/mirrormanager/crawler/files/mm2_get-highest-active-host-id
Normal file → Executable file
|
@ -5,18 +5,32 @@ import sys
|
|||
|
||||
import mirrormanager2.lib
|
||||
|
||||
parser = argparse.ArgumentParser(usage=sys.argv[0] + " [options]")
|
||||
|
||||
parser = argparse.ArgumentParser(usage=sys.argv[0] + " [options]",
|
||||
formatter_class=argparse.RawTextHelpFormatter)
|
||||
parser.add_argument(
|
||||
"-c", "--config",
|
||||
dest="config", default='/etc/mirrormanager/mirrormanager2.cfg',
|
||||
help="Configuration file to use")
|
||||
|
||||
parser.add_argument(
|
||||
"-f", "--fraction",
|
||||
dest="fraction", default="1:1",
|
||||
help='''Specify which part of the mirror range should be returned
|
||||
1:1 - all mirrors
|
||||
1:2 - the first half of the mirrors
|
||||
2:3 - the middle third of the mirrors''')
|
||||
|
||||
options = parser.parse_args()
|
||||
|
||||
config = dict()
|
||||
with open(options.config) as config_file:
|
||||
exec(compile(config_file.read(), options.config, 'exec'), config)
|
||||
|
||||
if ':' not in options.fraction:
|
||||
parser.print_help()
|
||||
sys.exit(0)
|
||||
|
||||
session = mirrormanager2.lib.create_session(config['DB_URL'])
|
||||
|
||||
# Get all active mirrors
|
||||
|
@ -29,4 +43,15 @@ hosts = [ host.id for host in hosts ]
|
|||
|
||||
session.close()
|
||||
|
||||
print max(hosts)
|
||||
hosts.sort()
|
||||
|
||||
total = int(options.fraction.split(':')[1])
|
||||
part = int(options.fraction.split(':')[0])
|
||||
|
||||
start = (part-1)*(len(hosts)/total)
|
||||
stop = (len(hosts)/total)*part
|
||||
|
||||
if total == part:
|
||||
print "--startid=%d" % (hosts[start])
|
||||
else:
|
||||
print "--startid=%d --stopid=%d" % (hosts[start], hosts[stop])
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue