From 7b84ea9663d61eb9f45da273230c712e7d4e5e63 Mon Sep 17 00:00:00 2001 From: Ralph Bean Date: Mon, 19 May 2014 14:26:38 +0000 Subject: [PATCH] Also, improve the other two new fedmsg checks. --- .../scripts/check_fedmsg_consumer_backlog.py | 19 +++++++++++++++++-- .../check_fedmsg_consumer_exceptions.py | 18 ++++++++++++++++-- 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/roles/nagios_client/files/scripts/check_fedmsg_consumer_backlog.py b/roles/nagios_client/files/scripts/check_fedmsg_consumer_backlog.py index 75ff4333fe..0b9da670ea 100644 --- a/roles/nagios_client/files/scripts/check_fedmsg_consumer_backlog.py +++ b/roles/nagios_client/files/scripts/check_fedmsg_consumer_backlog.py @@ -12,16 +12,29 @@ try: backlog_warning = int(sys.argv[3]) backlog_critical = int(sys.argv[4]) fname = '/var/run/fedmsg/monitoring-%s.socket' % service + if not os.path.exists(fname): print "UNKNOWN - %s does not exist" % fname sys.exit(3) + connect_to = "ipc:///%s" % fname ctx = zmq.Context() s = ctx.socket(zmq.SUB) s.connect(connect_to) s.setsockopt(zmq.SUBSCRIBE, '') - msg = s.recv() - msg = json.loads(msg) + + poller = zmq.Poller() + poller.register(s, zmq.POLLIN) + + timeout = 10000 + + events = dict(poller.poll(timeout)) + if s in events and events[s] == zmq.POLLIN: + msg = s.recv() + msg = json.loads(msg) + else: + print 'UNKNOWN - ZMQ timeout. No message received in %i ms' % timeout + sys.exit(3) for consumer in msg['consumers']: if consumer['name'] == check_consumer: @@ -38,6 +51,8 @@ try: print 'OK: fedmsg consumer %s backlog value is %i' % (consumer['name'],consumer['backlog']) sys.exit(0) + print "UNKNOWN: fedmsg consumer %s not found" % check_consumer + sys.exit(3) except Exception as err: print "UNKNOWN:", str(err) sys.exit(3) diff --git a/roles/nagios_client/files/scripts/check_fedmsg_consumer_exceptions.py b/roles/nagios_client/files/scripts/check_fedmsg_consumer_exceptions.py index c6ef87234a..630e877410 100644 --- a/roles/nagios_client/files/scripts/check_fedmsg_consumer_exceptions.py +++ b/roles/nagios_client/files/scripts/check_fedmsg_consumer_exceptions.py @@ -12,16 +12,28 @@ try: exceptions_warning = int(sys.argv[3]) exceptions_critical = int(sys.argv[4]) fname = '/var/run/fedmsg/monitoring-%s.socket' % service + if not os.path.exists(fname): print "UNKNOWN - %s does not exist" % fname sys.exit(3) + connect_to = "ipc:///%s" % fname ctx = zmq.Context() s = ctx.socket(zmq.SUB) s.connect(connect_to) s.setsockopt(zmq.SUBSCRIBE, '') - msg = s.recv() - msg = json.loads(msg) + poller = zmq.Poller() + poller.register(s, zmq.POLLIN) + + timeout = 10000 + + events = dict(poller.poll(timeout)) + if s in events and events[s] == zmq.POLLIN: + msg = s.recv() + msg = json.loads(msg) + else: + print 'UNKNOWN - ZMQ timeout. No message received in %i ms' % timeout + sys.exit(3) for consumer in msg['consumers']: if consumer['name'] == check_consumer: @@ -35,6 +47,8 @@ try: print 'OK: fedmsg consumer %s exceptions value is %i' % (consumer['name'],consumer['exceptions']) sys.exit(0) + print "UNKNOWN: fedmsg consumers %s not found" % check_consumer + sys.exit(3) except Exception as err: print "UNKNOWN:", str(err) sys.exit(3)