diff --git a/roles/nagios_client/files/scripts/check_fedmsg_producers_consumers.py b/roles/nagios_client/files/scripts/check_fedmsg_producers_consumers.py index 918ced6914..92bc2b604f 100644 --- a/roles/nagios_client/files/scripts/check_fedmsg_producers_consumers.py +++ b/roles/nagios_client/files/scripts/check_fedmsg_producers_consumers.py @@ -9,34 +9,48 @@ import zmq try: service = sys.argv[1] check_list = frozenset(sys.argv[2:]) + fname = '/var/run/fedmsg/monitoring-%s.socket' % service + if not check_list: print "UNKNOWN - empty list of fedmsg consumers and producers to check" sys.exit(3) - uninitialized_cp = False - fname = '/var/run/fedmsg/monitoring-%s.socket' % service + if not os.path.exists(fname): print "UNKNOWN - %s does not exist" % fname sys.exit(3) + connect_to = "ipc:///%s" % fname ctx = zmq.Context() s = ctx.socket(zmq.SUB) s.connect(connect_to) s.setsockopt(zmq.SUBSCRIBE, '') - msg = s.recv() - msg = json.loads(msg) + poller = zmq.Poller() + poller.register(s, zmq.POLLIN) + + timeout = 10000 + + events = dict(poller.poll(timeout)) + if s in events and events[s] == zmq.POLLIN: + msg = s.recv() + msg = json.loads(msg) + else: + print 'UNKNOWN - ZMQ timeout. No message received in %i ms' % timeout + sys.exit(3) for consumer in msg['consumers']: if consumer['name'] in check_list and not consumer['initialized']: print 'ERROR: fedmsg consumer %s is not initialized' % consumer['name'] - uninitialized_cp = True + sys.exit(2) for producer in msg['producers']: if producer['name'] in check_list and not producer['initialized']: print 'ERROR: fedmsg producer %s is not initialized' % producer['name'] - uninitialized_cp = True + sys.exit(2) - if uninitialized_cp: - sys.exit(2) + for item in check_list: + if item not in [p['name'] for p in msg['producers'] + msg['consumers']]: + print 'ERROR: %s not found among installed plugins' % item + sys.exit(2) print "OK: fedmsg consumer(s) and producer(s) initialized" sys.exit(0)