2014-12-08 16:17:09 +01:00
|
|
|
# run the crawler twice a day
|
|
|
|
# logs sent to /var/log/mirrormanager/crawler.log and crawl/* by default
|
2015-05-13 11:23:21 +00:00
|
|
|
#
|
2015-06-28 10:23:43 +00:00
|
|
|
# [ "`hostname -s`" == "mm-crawler02" ] && sleep 2h is used to start the crawl
|
2015-05-13 11:23:21 +00:00
|
|
|
# later on the second crawler to reduce the number of parallel accesses to
|
|
|
|
# the database
|
2016-07-13 10:00:42 +02:00
|
|
|
#
|
|
|
|
# To make sure only one cron started crawler is running the previous running
|
|
|
|
# (cron) crawlers are being signaled to shut down. The crawler can try to
|
|
|
|
# gracefully shutdown if it gets the signal SIGALRM(14). After the signal we
|
|
|
|
# wait for 5 minutes to give the crawler a chance to shutdown. After that the
|
|
|
|
# crawler is killed. To make sure we only end the cron started crawler we look
|
|
|
|
# for the following process "/usr/bin/python /usr/bin/mm2_crawler --threads 25".
|
2016-12-31 12:34:18 +01:00
|
|
|
0 */12 * * * mirrormanager [ "`hostname -s`" == "mm-crawler02" ] && sleep 2h; pkill -14 -f "^/usr/bin/python2 -s /usr/bin/mm2_crawler --threads 25"; sleep 5m; pkill -9 -f "^/usr/bin/python2 -s /usr/bin/mm2_crawler --threads 25"; /usr/bin/mm2_crawler --threads 25 --timeout-minutes 180 `/usr/local/bin/run_crawler.sh 2` > /dev/null 2>&1
|