diff --git a/roles/nagios_client/files/scripts/check_fedmsg_consumer_backlog.py b/roles/nagios_client/files/scripts/check_fedmsg_consumer_backlog.py index 75ff4333fe..0b9da670ea 100644 --- a/roles/nagios_client/files/scripts/check_fedmsg_consumer_backlog.py +++ b/roles/nagios_client/files/scripts/check_fedmsg_consumer_backlog.py @@ -12,16 +12,29 @@ try: backlog_warning = int(sys.argv[3]) backlog_critical = int(sys.argv[4]) fname = '/var/run/fedmsg/monitoring-%s.socket' % service + if not os.path.exists(fname): print "UNKNOWN - %s does not exist" % fname sys.exit(3) + connect_to = "ipc:///%s" % fname ctx = zmq.Context() s = ctx.socket(zmq.SUB) s.connect(connect_to) s.setsockopt(zmq.SUBSCRIBE, '') - msg = s.recv() - msg = json.loads(msg) + + poller = zmq.Poller() + poller.register(s, zmq.POLLIN) + + timeout = 10000 + + events = dict(poller.poll(timeout)) + if s in events and events[s] == zmq.POLLIN: + msg = s.recv() + msg = json.loads(msg) + else: + print 'UNKNOWN - ZMQ timeout. No message received in %i ms' % timeout + sys.exit(3) for consumer in msg['consumers']: if consumer['name'] == check_consumer: @@ -38,6 +51,8 @@ try: print 'OK: fedmsg consumer %s backlog value is %i' % (consumer['name'],consumer['backlog']) sys.exit(0) + print "UNKNOWN: fedmsg consumer %s not found" % check_consumer + sys.exit(3) except Exception as err: print "UNKNOWN:", str(err) sys.exit(3) diff --git a/roles/nagios_client/files/scripts/check_fedmsg_consumer_exceptions.py b/roles/nagios_client/files/scripts/check_fedmsg_consumer_exceptions.py index c6ef87234a..630e877410 100644 --- a/roles/nagios_client/files/scripts/check_fedmsg_consumer_exceptions.py +++ b/roles/nagios_client/files/scripts/check_fedmsg_consumer_exceptions.py @@ -12,16 +12,28 @@ try: exceptions_warning = int(sys.argv[3]) exceptions_critical = int(sys.argv[4]) fname = '/var/run/fedmsg/monitoring-%s.socket' % service + if not os.path.exists(fname): print "UNKNOWN - %s does not exist" % fname sys.exit(3) + connect_to = "ipc:///%s" % fname ctx = zmq.Context() s = ctx.socket(zmq.SUB) s.connect(connect_to) s.setsockopt(zmq.SUBSCRIBE, '') - msg = s.recv() - msg = json.loads(msg) + poller = zmq.Poller() + poller.register(s, zmq.POLLIN) + + timeout = 10000 + + events = dict(poller.poll(timeout)) + if s in events and events[s] == zmq.POLLIN: + msg = s.recv() + msg = json.loads(msg) + else: + print 'UNKNOWN - ZMQ timeout. No message received in %i ms' % timeout + sys.exit(3) for consumer in msg['consumers']: if consumer['name'] == check_consumer: @@ -35,6 +47,8 @@ try: print 'OK: fedmsg consumer %s exceptions value is %i' % (consumer['name'],consumer['exceptions']) sys.exit(0) + print "UNKNOWN: fedmsg consumers %s not found" % check_consumer + sys.exit(3) except Exception as err: print "UNKNOWN:", str(err) sys.exit(3)