Add two new nagios checks for the FMN "Producers"
This commit is contained in:
parent
ad9673cbae
commit
b3a97a1c91
5 changed files with 91 additions and 0 deletions
|
@ -0,0 +1,69 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
import arrow
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import socket
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import zmq
|
||||||
|
|
||||||
|
try:
|
||||||
|
service = sys.argv[1]
|
||||||
|
check_producer = sys.argv[2]
|
||||||
|
elapsed_warning = int(sys.argv[3])
|
||||||
|
elapsed_critical = int(sys.argv[4])
|
||||||
|
fname = '/var/run/fedmsg/monitoring-%s.socket' % service
|
||||||
|
|
||||||
|
if not os.path.exists(fname):
|
||||||
|
print "UNKNOWN - %s does not exist" % fname
|
||||||
|
sys.exit(3)
|
||||||
|
|
||||||
|
if not os.access(fname, os.W_OK):
|
||||||
|
print "UNKNOWN - cannot write to %s" % fname
|
||||||
|
sys.exit(3)
|
||||||
|
|
||||||
|
connect_to = "ipc:///%s" % fname
|
||||||
|
ctx = zmq.Context()
|
||||||
|
s = ctx.socket(zmq.SUB)
|
||||||
|
s.connect(connect_to)
|
||||||
|
s.setsockopt(zmq.SUBSCRIBE, '')
|
||||||
|
|
||||||
|
poller = zmq.Poller()
|
||||||
|
poller.register(s, zmq.POLLIN)
|
||||||
|
|
||||||
|
timeout = 20000
|
||||||
|
|
||||||
|
events = dict(poller.poll(timeout))
|
||||||
|
if s in events and events[s] == zmq.POLLIN:
|
||||||
|
msg = s.recv()
|
||||||
|
msg = json.loads(msg)
|
||||||
|
else:
|
||||||
|
print 'UNKNOWN - ZMQ timeout. No message received in %i ms' % timeout
|
||||||
|
sys.exit(3)
|
||||||
|
|
||||||
|
now = time.time()
|
||||||
|
|
||||||
|
for prod in msg['producers']:
|
||||||
|
if prod['name'] != check_producer:
|
||||||
|
continue
|
||||||
|
diff = now - prod['last_ran']
|
||||||
|
then = arrow.get(prod['last_ran']).humanize()
|
||||||
|
if diff > elapsed_critical:
|
||||||
|
print "CRITICAL: %s last ran %s (%i seconds ago)" % (
|
||||||
|
check_producer, then, diff)
|
||||||
|
sys.exit(2)
|
||||||
|
elif diff > elapsed_warning:
|
||||||
|
print "WARNING: %s last ran %s (%i seconds ago)" % (
|
||||||
|
check_producer, then, diff)
|
||||||
|
sys.exit(1)
|
||||||
|
else:
|
||||||
|
print "OK: %s last ran %s (%i seconds ago)" % (
|
||||||
|
check_producer, then, diff)
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
print "UNKNOWN: fedmsg producer %s not found" % check_producer
|
||||||
|
sys.exit(3)
|
||||||
|
except Exception as err:
|
||||||
|
print "UNKNOWN:", str(err)
|
||||||
|
sys.exit(3)
|
|
@ -27,6 +27,7 @@
|
||||||
- check_fcomm_queue
|
- check_fcomm_queue
|
||||||
- check_fedmsg_consumer_backlog.py
|
- check_fedmsg_consumer_backlog.py
|
||||||
- check_fedmsg_consumer_exceptions.py
|
- check_fedmsg_consumer_exceptions.py
|
||||||
|
- check_fedmsg_producer_last_ran.py
|
||||||
- check_fedmsg_producers_consumers.py
|
- check_fedmsg_producers_consumers.py
|
||||||
- check_supybot_plugin
|
- check_supybot_plugin
|
||||||
- check_datanommer_timesince.py
|
- check_datanommer_timesince.py
|
||||||
|
|
|
@ -31,3 +31,6 @@ command[check_fedmsg_cbacklog_summershum]={{libdir}}/nagios/plugins/check_fedmsg
|
||||||
command[check_fedmsg_cbacklog_badges_backend]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-hub FedoraBadgesConsumer 5000 10000
|
command[check_fedmsg_cbacklog_badges_backend]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-hub FedoraBadgesConsumer 5000 10000
|
||||||
command[check_fedmsg_cbacklog_notifs_backend]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-hub FMNConsumer 2000 5000
|
command[check_fedmsg_cbacklog_notifs_backend]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-hub FMNConsumer 2000 5000
|
||||||
command[check_fedmsg_cbacklog_bugzilla2fedmsg]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py moksha-hub BugzillaConsumer 10 100
|
command[check_fedmsg_cbacklog_bugzilla2fedmsg]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py moksha-hub BugzillaConsumer 10 100
|
||||||
|
|
||||||
|
command[check_fedmsg_fmn_digest_last_ran]={{libdir}}/nagios/plugins/check_fedmsg_producer_last_ran.py fedmsg-hub DigestProducer 30 300
|
||||||
|
command[check_fedmsg_fmn_confirm_last_ran]={{libdir}}/nagios/plugins/check_fedmsg_producer_last_ran.py fedmsg-hub ConfirmationProducer 30 300
|
||||||
|
|
|
@ -410,3 +410,18 @@ define service {
|
||||||
check_command check_by_nrpe!check_fedmsg_cbacklog_bugzilla2fedmsg
|
check_command check_by_nrpe!check_fedmsg_cbacklog_bugzilla2fedmsg
|
||||||
use defaulttemplate
|
use defaulttemplate
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
define service {
|
||||||
|
host_name notifs-backend01
|
||||||
|
service_description Did the FMN digest producer run?
|
||||||
|
check_command check_by_nrpe!check_fedmsg_fmn_digest_last_ran
|
||||||
|
use defaulttemplate
|
||||||
|
}
|
||||||
|
|
||||||
|
define service {
|
||||||
|
host_name notifs-backend01
|
||||||
|
service_description Did the FMN confirmation producer run?
|
||||||
|
check_command check_by_nrpe!check_fedmsg_fmn_confirm_last_ran
|
||||||
|
use defaulttemplate
|
||||||
|
}
|
||||||
|
|
|
@ -308,6 +308,9 @@ command[check_fedmsg_cbacklog_badges_backend]=/usr/lib64/nagios/plugins/check_fe
|
||||||
command[check_fedmsg_cbacklog_notifs_backend]=/usr/lib64/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-hub FMNConsumer 10 50
|
command[check_fedmsg_cbacklog_notifs_backend]=/usr/lib64/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-hub FMNConsumer 10 50
|
||||||
command[check_fedmsg_cbacklog_bugzilla2fedmsg]=/usr/lib64/nagios/plugins/check_fedmsg_consumer_backlog.py moksha-hub BugzillaConsumer 10 100
|
command[check_fedmsg_cbacklog_bugzilla2fedmsg]=/usr/lib64/nagios/plugins/check_fedmsg_consumer_backlog.py moksha-hub BugzillaConsumer 10 100
|
||||||
|
|
||||||
|
command[check_fedmsg_fmn_digest_last_ran]={{libdir}}/nagios/plugins/check_fedmsg_producer_last_ran.py fedmsg-hub DigestProducer 30 300
|
||||||
|
command[check_fedmsg_fmn_confirm_last_ran]={{libdir}}/nagios/plugins/check_fedmsg_producer_last_ran.py fedmsg-hub ConfirmationProducer 30 300
|
||||||
|
|
||||||
# The following are 'action commands' where by an actual action is performed
|
# The following are 'action commands' where by an actual action is performed
|
||||||
# like restarting httpd
|
# like restarting httpd
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue