diff --git a/roles/taskotron/imagefactory/files/imagefactory-kill-zombie b/roles/taskotron/imagefactory/files/imagefactory-kill-zombie new file mode 100755 index 0000000000..5a5deb0c96 --- /dev/null +++ b/roles/taskotron/imagefactory/files/imagefactory-kill-zombie @@ -0,0 +1,43 @@ +#!/bin/bash +# Kill all imagefactory build processes which took longer than specified +# (3 hours by default) + +DEFAULT_TIMEOUT=180 + +# print usage +if [ "$1" = '--help' ] || [ "$1" = '-h' ]; then + echo "Usage: $0 [TIMEOUT]" + echo -n 'Kill all imagefactory build processes which took longer than ' + echo "TIMEOUT (in minutes, $DEFAULT_TIMEOUT by default)." + exit 1 +fi + +PROCESS_ID='/usr/bin/qemu-system-x86_64 -machine accel=kvm -name guest=factory-build' + +TIMEOUT=${1:-$DEFAULT_TIMEOUT} +# convert to seconds +TIMEOUT=$(( $TIMEOUT * 60 )) + +while pgrep -f "$PROCESS_ID"; do + # get PID of the oldest (pgrep -o) running matched process + PID=$(pgrep -o -f "$PROCESS_ID") + if [ -z "$PID" ]; then + echo 'No (more) processes match, exiting' + exit + fi + AGE=$(ps -o etimes= -p "$PID") + if (( $AGE >= $TIMEOUT )); then + echo "Process matched, killing: $(ps -p $PID --no-headers -o pid,args)" + kill -s TERM "$PID" + # wait a while and see if the process is really terminated, otherwise + # force kill it + sleep 3 + if [ "$PID" = "$(pgrep -o -f "$PROCESS_ID")" ]; then + echo "Process $PID wasn't terminated, force killing it" + kill -s KILL "$PID" + fi + else + echo 'Some processes match, but they are not old enough, exiting' + exit + fi +done diff --git a/roles/taskotron/imagefactory/files/imagefactory-kill-zombie.service b/roles/taskotron/imagefactory/files/imagefactory-kill-zombie.service new file mode 100644 index 0000000000..54d2825af1 --- /dev/null +++ b/roles/taskotron/imagefactory/files/imagefactory-kill-zombie.service @@ -0,0 +1,7 @@ +[Unit] +Description=Kill hanging imagefactory build processes + +[Service] +Type=oneshot +ExecStart=/usr/local/bin/imagefactory-kill-zombie +TimeoutStartSec=300 diff --git a/roles/taskotron/imagefactory/files/imagefactory-kill-zombie.timer b/roles/taskotron/imagefactory/files/imagefactory-kill-zombie.timer new file mode 100644 index 0000000000..9b16c003d6 --- /dev/null +++ b/roles/taskotron/imagefactory/files/imagefactory-kill-zombie.timer @@ -0,0 +1,10 @@ +[Unit] +Description=Kill hanging imagefactory build processes regularly + +[Timer] +OnCalendar=daily +RandomizedDelaySec=1h +Persistent=true + +[Install] +WantedBy=timers.target diff --git a/roles/taskotron/imagefactory/tasks/main.yml b/roles/taskotron/imagefactory/tasks/main.yml index 0636b297b7..2d97feadf2 100644 --- a/roles/taskotron/imagefactory/tasks/main.yml +++ b/roles/taskotron/imagefactory/tasks/main.yml @@ -13,6 +13,26 @@ - name: copy oz.cfg config file copy: src=oz.cfg dest=/etc/oz/oz.cfg owner=root group=root mode=0644 +- name: copy imagefactory-kill-zombie script + copy: + src: imagefactory-kill-zombie + dest: /usr/local/bin + owner: root + group: root + mode: '0744' + +- name: copy imagefactory-kill-zombie service + copy: + src: imagefactory-kill-zombie.service + dest: /etc/systemd/system + register: imagefactory-kill-zombie_service + +- name: copy imagefactory-kill-zombie timer + copy: + src: imagefactory-kill-zombie.timer + dest: /etc/systemd/system + register: imagefactory-kill-zombie_timer + - name: hotfix imagefactory's REST api to allow file download copy: src=hotfix_imgfac_RESTv2.py dest=/usr/lib/python2.7/site-packages/imgfac/rest/RESTv2.py owner=root group=root mode=0644 @@ -25,11 +45,17 @@ - name: reload systemd command: systemctl daemon-reload - when: imagefactory_service.changed + when: imagefactory_service.changed or imagefactory-kill-zombie_service.changed or imagefactory-kill-zombie_timer.changed - name: enable imagefactory service: name=imagefactoryd state=started enabled=yes +- name: enable imagefactory-kill-zombie.timer + service: + name: imagefactory-kill-zombie.timer + state: started + enabled: yes + - name: create directory for git clone file: path=/var/lib/fedoraqa state=directory owner=root group=root mode=1755