nagios: Add script and check for checking that a timestamp within a file is within a delta of now, and then use this for alerting when websites stop building
Signed-off-by: Rick Elrod <relrod@redhat.com>
This commit is contained in:
parent
60b360e9e1
commit
0135fc1102
4 changed files with 66 additions and 0 deletions
43
roles/nagios_client/files/scripts/check_timestamp_from_file
Normal file
43
roles/nagios_client/files/scripts/check_timestamp_from_file
Normal file
|
@ -0,0 +1,43 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
# Takes a path to a file and a delta. The file must simply contain an epoch
|
||||
# timestamp. It can be an integer or a float, as can the delta.
|
||||
#
|
||||
# Alerts critical if (now - timestamp contained in file) > delta.
|
||||
#
|
||||
# Rick Elrod <relrod@redhat.com>
|
||||
# MIT
|
||||
|
||||
import sys
|
||||
import time
|
||||
|
||||
if len(sys.argv) != 3:
|
||||
print('UNKNOWN: Pass path to file and delta as parameters')
|
||||
sys.exit(3)
|
||||
|
||||
filename = sys.argv[1]
|
||||
delta = float(sys.argv[2])
|
||||
|
||||
timestamp = None
|
||||
|
||||
try:
|
||||
with open(filename, 'r') as f:
|
||||
timestamp = float(f.read().strip())
|
||||
except Exception as e:
|
||||
print('UNKNOWN: Unable to open/read file path')
|
||||
sys.exit(3)
|
||||
|
||||
difference = round(time.time() - timestamp, 2)
|
||||
if difference > delta:
|
||||
print(
|
||||
'CRITICAL: Timestamp in file (%.2f) exceeds delta (%.2f) by %.2f seconds' % (
|
||||
timestamp,
|
||||
delta,
|
||||
difference - delta))
|
||||
sys.exit(2)
|
||||
|
||||
print('OK: Timestamp in file (%.2f) is within delta (%.2f) of now, by %.2f seconds' % (
|
||||
timestamp,
|
||||
delta,
|
||||
abs(difference - delta)))
|
||||
sys.exit(0)
|
|
@ -47,6 +47,7 @@
|
|||
- check_osbs_api.py
|
||||
- check_ipa_replication
|
||||
- check_redis_queue.sh
|
||||
- check_timestamp_from_file
|
||||
when: not inventory_hostname.startswith('noc')
|
||||
tags:
|
||||
- nagios_client
|
||||
|
@ -226,6 +227,16 @@
|
|||
tags:
|
||||
- nagios_client
|
||||
|
||||
- name: install nrpe checks for sundries/websites
|
||||
template: src={{ item }}.j2 dest=/etc/nrpe.d/{{ item }} owner=root group=root mode=0644
|
||||
with_items:
|
||||
- check_websites_buildtime.cfg
|
||||
when: inventory_hostname.startswith('sundries')
|
||||
notify:
|
||||
- restart nrpe
|
||||
tags:
|
||||
- nagios_client
|
||||
|
||||
- name: install nrpe config for the RabbitMQ checks
|
||||
template:
|
||||
src: "rabbitmq_args.ini.j2"
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
# Alert if websites haven't been built in 3 hours
|
||||
command[check_websites_buildtime]={{ libdir }}/nagios/plugins/check_timestamp_from_file /srv/websites/getfedora.org/build.timestamp.txt 10800
|
|
@ -316,4 +316,14 @@ define service {
|
|||
use ppc-secondarytemplate
|
||||
}
|
||||
|
||||
## Auxillary to websites but necessary to make them happen
|
||||
|
||||
define service {
|
||||
host_name sundries01.phx2.fedoraproject.org
|
||||
service_description websites build happened recently
|
||||
check_command check_by_nrpe!check_websites_buildtime
|
||||
use websitetemplate
|
||||
}
|
||||
|
||||
|
||||
{% endif %}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue