nagios: Add script and check for checking that a timestamp within a file is within a delta of now, and then use this for alerting when websites stop building

Signed-off-by: Rick Elrod <relrod@redhat.com>
This commit is contained in:
Rick Elrod 2020-02-27 05:29:24 +00:00 committed by Pierre-Yves Chibon
parent 60b360e9e1
commit 0135fc1102
4 changed files with 66 additions and 0 deletions

View file

@ -0,0 +1,43 @@
#!/usr/bin/env python
# Takes a path to a file and a delta. The file must simply contain an epoch
# timestamp. It can be an integer or a float, as can the delta.
#
# Alerts critical if (now - timestamp contained in file) > delta.
#
# Rick Elrod <relrod@redhat.com>
# MIT
import sys
import time
if len(sys.argv) != 3:
print('UNKNOWN: Pass path to file and delta as parameters')
sys.exit(3)
filename = sys.argv[1]
delta = float(sys.argv[2])
timestamp = None
try:
with open(filename, 'r') as f:
timestamp = float(f.read().strip())
except Exception as e:
print('UNKNOWN: Unable to open/read file path')
sys.exit(3)
difference = round(time.time() - timestamp, 2)
if difference > delta:
print(
'CRITICAL: Timestamp in file (%.2f) exceeds delta (%.2f) by %.2f seconds' % (
timestamp,
delta,
difference - delta))
sys.exit(2)
print('OK: Timestamp in file (%.2f) is within delta (%.2f) of now, by %.2f seconds' % (
timestamp,
delta,
abs(difference - delta)))
sys.exit(0)

View file

@ -47,6 +47,7 @@
- check_osbs_api.py
- check_ipa_replication
- check_redis_queue.sh
- check_timestamp_from_file
when: not inventory_hostname.startswith('noc')
tags:
- nagios_client
@ -226,6 +227,16 @@
tags:
- nagios_client
- name: install nrpe checks for sundries/websites
template: src={{ item }}.j2 dest=/etc/nrpe.d/{{ item }} owner=root group=root mode=0644
with_items:
- check_websites_buildtime.cfg
when: inventory_hostname.startswith('sundries')
notify:
- restart nrpe
tags:
- nagios_client
- name: install nrpe config for the RabbitMQ checks
template:
src: "rabbitmq_args.ini.j2"

View file

@ -0,0 +1,2 @@
# Alert if websites haven't been built in 3 hours
command[check_websites_buildtime]={{ libdir }}/nagios/plugins/check_timestamp_from_file /srv/websites/getfedora.org/build.timestamp.txt 10800

View file

@ -316,4 +316,14 @@ define service {
use ppc-secondarytemplate
}
## Auxillary to websites but necessary to make them happen
define service {
host_name sundries01.phx2.fedoraproject.org
service_description websites build happened recently
check_command check_by_nrpe!check_websites_buildtime
use websitetemplate
}
{% endif %}