From 68348e180d2339aa942845269310d05a1fd3a03f Mon Sep 17 00:00:00 2001 From: Mikolaj Izdebski Date: Tue, 24 Sep 2019 11:31:30 +0200 Subject: [PATCH] Koschei: Enable health checks (watchdog) --- .../koschei/templates/backend-deployment.yml | 9 +++++++++ .../koschei/templates/config-backend.cfg.j2 | 8 +++++++- roles/openshift-apps/koschei/vars/main.yml | 3 +++ 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/roles/openshift-apps/koschei/templates/backend-deployment.yml b/roles/openshift-apps/koschei/templates/backend-deployment.yml index 8470fefd23..fd8fd2a642 100644 --- a/roles/openshift-apps/koschei/templates/backend-deployment.yml +++ b/roles/openshift-apps/koschei/templates/backend-deployment.yml @@ -27,6 +27,8 @@ spec: value: /etc/krb5.keytab - name: FEDORA_MESSAGING_CONF value: /etc/koschei/fedora-messaging.toml + - name: WATCHDOG_PATH + value: /var/run/koschei-watchdog volumeMounts: - name: config mountPath: /etc/koschei @@ -42,6 +44,13 @@ spec: limits: cpu: "{{ max_cpu }}m" memory: "{{ max_mem }}Mi" + readinessProbe: + initialDelaySeconds: 5 + command: ["/bin/bash", "-c", "[[ -e /var/run/koschei-watchdog ]]"] + livenessProbe: + initialDelaySeconds: 30 + periodSeconds: 5 + command: ["/bin/bash", "-c", "(($(stat -c%Y /var/run/koschei-watchdog) + {{ koschei_watchdog_timeout }} > $(date +%s)))"] volumes: - name: config secret: diff --git a/roles/openshift-apps/koschei/templates/config-backend.cfg.j2 b/roles/openshift-apps/koschei/templates/config-backend.cfg.j2 index 38d017587d..bea0b11fea 100644 --- a/roles/openshift-apps/koschei/templates/config-backend.cfg.j2 +++ b/roles/openshift-apps/koschei/templates/config-backend.cfg.j2 @@ -46,16 +46,22 @@ config = { {% else %} "interval": 20*60, # in seconds {% endif %} + "watchdog": True, }, "build_resolver": { "memory_limit": 1024**2, # kilobytes + "watchdog": True, }, "repo_resolver": { "memory_limit": 1024**2, # kilobytes + "watchdog": True, }, "watcher": { "memory_limit": 256*1024, # kilobytes - "watchdog": False, + "watchdog": True, + }, + "scheduler": { + "watchdog": True, }, }, "priorities": { diff --git a/roles/openshift-apps/koschei/vars/main.yml b/roles/openshift-apps/koschei/vars/main.yml index 34c7e823e6..24563fc6a8 100644 --- a/roles/openshift-apps/koschei/vars/main.yml +++ b/roles/openshift-apps/koschei/vars/main.yml @@ -3,3 +3,6 @@ description: koschei appowners: - mizdebsk - msimacek + +# Time in seconds after which backend pod is considered to be dead +koschei_watchdog_timeout: 120