Before starting the crawler old crawler processes are killed to not have old crawlers running and consuming (lots of) memory. This adapts the pgrep and pkill from ^/usr/bin/python to ^/usr/bin/python2 -s Signed-off-by: Adrian Reber <adrian@lisas.de>
14 lines
1.1 KiB
Text
14 lines
1.1 KiB
Text
# run the crawler twice a day
|
|
# logs sent to /var/log/mirrormanager/crawler.log and crawl/* by default
|
|
#
|
|
# [ "`hostname -s`" == "mm-crawler02" ] && sleep 2h is used to start the crawl
|
|
# later on the second crawler to reduce the number of parallel accesses to
|
|
# the database
|
|
#
|
|
# To make sure only one cron started crawler is running the previous running
|
|
# (cron) crawlers are being signaled to shut down. The crawler can try to
|
|
# gracefully shutdown if it gets the signal SIGALRM(14). After the signal we
|
|
# wait for 5 minutes to give the crawler a chance to shutdown. After that the
|
|
# crawler is killed. To make sure we only end the cron started crawler we look
|
|
# for the following process "/usr/bin/python /usr/bin/mm2_crawler --threads 25".
|
|
0 */12 * * * mirrormanager [ "`hostname -s`" == "mm-crawler02" ] && sleep 2h; pkill -14 -f "^/usr/bin/python2 -s /usr/bin/mm2_crawler --threads 25"; sleep 5m; pkill -9 -f "^/usr/bin/python2 -s /usr/bin/mm2_crawler --threads 25"; /usr/bin/mm2_crawler --threads 25 --timeout-minutes 180 `/usr/local/bin/run_crawler.sh 2` > /dev/null 2>&1
|