From c96195506a772d1d1f857c46c45967d6064a1a77 Mon Sep 17 00:00:00 2001 From: Ralph Bean Date: Mon, 12 May 2014 16:43:09 +0000 Subject: [PATCH] Introduce new fedmsg nagios checks from Janez Nemanic. See https://fedorahosted.org/fedora-infrastructure/ticket/4044 --- .../scripts/check_fedmsg_consumer_backlog.py | 43 +++++++++++++++++ .../check_fedmsg_consumer_exceptions.py | 40 ++++++++++++++++ .../check_fedmsg_producers_consumers.py | 46 +++++++++++++++++++ .../templates/check_fedmsg_consumers.cfg.j2 | 30 ++++++++++++ 4 files changed, 159 insertions(+) create mode 100644 roles/nagios_client/files/scripts/check_fedmsg_consumer_backlog.py create mode 100644 roles/nagios_client/files/scripts/check_fedmsg_consumer_exceptions.py create mode 100644 roles/nagios_client/files/scripts/check_fedmsg_producers_consumers.py create mode 100644 roles/nagios_client/templates/check_fedmsg_consumers.cfg.j2 diff --git a/roles/nagios_client/files/scripts/check_fedmsg_consumer_backlog.py b/roles/nagios_client/files/scripts/check_fedmsg_consumer_backlog.py new file mode 100644 index 0000000000..e37327ee86 --- /dev/null +++ b/roles/nagios_client/files/scripts/check_fedmsg_consumer_backlog.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python + +import json +import os +import socket +import sys +import zmq + +try: + service = sys.argv[1] + check_consumer = sys.argv[2] + backlog_warning = sys.argv[3] + backlog_critical = sys.argv[4] + fname = '/var/run/fedmsg/monitoring-%s.socket' % service + if not os.path.exists(fname): + print "UNKNOWN - %s does not exist" % fname + sys.exit(3) + connect_to = "ipc:///%s" % fname + ctx = zmq.Context() + s = ctx.socket(zmq.SUB) + s.connect(connect_to) + s.setsockopt(zmq.SUBSCRIBE, '') + msg = s.recv() + msg = json.loads(msg) + + for consumer in msg['consumers']: + if consumer['name'] == check_consumer: + if consumer['backlog'] is None: + print 'ERROR: fedmsg consumer %s is not initialized' % consumer['name'] + sys.exit(3) + elif consumer['backlog'] > backlog_critical: + print 'CRITICAL: fedmsg consumer %s backlog value is %i' % (consumer['name'],consumer['backlog']) + sys.exit(2) + elif consumer['backlog'] > backlog_warning: + print 'WARNING: fedmsg consumer %s backlog value is %i' % (consumer['name'],consumer['backlog']) + sys.exit(1) + else: + print 'OK: fedmsg consumer %s backlog value is %i' % (consumer['name'],consumer['backlog']) + sys.exit(0) + +except Exception as err: + print "UNKNOWN:", str(err) + sys.exit(3) diff --git a/roles/nagios_client/files/scripts/check_fedmsg_consumer_exceptions.py b/roles/nagios_client/files/scripts/check_fedmsg_consumer_exceptions.py new file mode 100644 index 0000000000..6890063a21 --- /dev/null +++ b/roles/nagios_client/files/scripts/check_fedmsg_consumer_exceptions.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python + +import json +import os +import socket +import sys +import zmq + +try: + service = sys.argv[1] + check_consumer = sys.argv[2] + exceptions_warning = sys.argv[3] + exceptions_critical = sys.argv[4] + fname = '/var/run/fedmsg/monitoring-%s.socket' % service + if not os.path.exists(fname): + print "UNKNOWN - %s does not exist" % fname + sys.exit(3) + connect_to = "ipc:///%s" % fname + ctx = zmq.Context() + s = ctx.socket(zmq.SUB) + s.connect(connect_to) + s.setsockopt(zmq.SUBSCRIBE, '') + msg = s.recv() + msg = json.loads(msg) + + for consumer in msg['consumers']: + if consumer['name'] == check_consumer: + if consumer['exceptions'] > exceptions_critical: + print 'CRITICAL: fedmsg consumer %s exceptions value is %i' % (consumer['name'],consumer['exceptions']) + sys.exit(2) + elif consumer['exceptions'] > exceptions_warning: + print 'WARNING: fedmsg consumer %s exceptions value is %i' % (consumer['name'],consumer['exceptions']) + sys.exit(1) + else: + print 'OK: fedmsg consumer %s exceptions value is %i' % (consumer['name'],consumer['exceptions']) + sys.exit(0) + +except Exception as err: + print "UNKNOWN:", str(err) + sys.exit(3) diff --git a/roles/nagios_client/files/scripts/check_fedmsg_producers_consumers.py b/roles/nagios_client/files/scripts/check_fedmsg_producers_consumers.py new file mode 100644 index 0000000000..918ced6914 --- /dev/null +++ b/roles/nagios_client/files/scripts/check_fedmsg_producers_consumers.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python + +import json +import os +import socket +import sys +import zmq + +try: + service = sys.argv[1] + check_list = frozenset(sys.argv[2:]) + if not check_list: + print "UNKNOWN - empty list of fedmsg consumers and producers to check" + sys.exit(3) + uninitialized_cp = False + fname = '/var/run/fedmsg/monitoring-%s.socket' % service + if not os.path.exists(fname): + print "UNKNOWN - %s does not exist" % fname + sys.exit(3) + connect_to = "ipc:///%s" % fname + ctx = zmq.Context() + s = ctx.socket(zmq.SUB) + s.connect(connect_to) + s.setsockopt(zmq.SUBSCRIBE, '') + msg = s.recv() + msg = json.loads(msg) + + for consumer in msg['consumers']: + if consumer['name'] in check_list and not consumer['initialized']: + print 'ERROR: fedmsg consumer %s is not initialized' % consumer['name'] + uninitialized_cp = True + + for producer in msg['producers']: + if producer['name'] in check_list and not producer['initialized']: + print 'ERROR: fedmsg producer %s is not initialized' % producer['name'] + uninitialized_cp = True + + if uninitialized_cp: + sys.exit(2) + + print "OK: fedmsg consumer(s) and producer(s) initialized" + sys.exit(0) + +except Exception as err: + print "UNKNOWN:", str(err) + sys.exit(3) diff --git a/roles/nagios_client/templates/check_fedmsg_consumers.cfg.j2 b/roles/nagios_client/templates/check_fedmsg_consumers.cfg.j2 new file mode 100644 index 0000000000..51a2cc68fa --- /dev/null +++ b/roles/nagios_client/templates/check_fedmsg_consumers.cfg.j2 @@ -0,0 +1,30 @@ +# Fedmsg checks for consumers and producers +command[check_fedmsg_cp_busgateway_hub]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-hub DatanommerConsumer MonitoringProducer +command[check_fedmsg_cp_busgateway_relay]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-relay RelayConsumer MonitoringProducer +command[check_fedmsg_cp_busgateway_gateway]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-gateway GatewayConsumer MonitoringProducer +command[check_fedmsg_cp_app]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-relay RelayConsumer MonitoringProducer +command[check_fedmsg_cp_value]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-irc IRCBotConsumer MonitoringProducer +command[check_fedmsg_cp_pkgs]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-hub GenACLsConsumer MonitoringProducer +command[check_fedmsg_cp_summershum]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-hub SummerShumConsumer MonitoringProducer +command[check_fedmsg_cp_badges_backend]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-hub FedoraBadgesConsumer MonitoringProducer +command[check_fedmsg_cp_notifs_backend]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-hub FMNConsumer DigestProducer ConfirmationProducer MonitoringProducer + +command[check_fedmsg_cexceptions_busgateway_hub]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-hub DatanommerConsumer 1 10 +command[check_fedmsg_cexceptions_busgateway_relay]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-relay RelayConsumer 1 10 +command[check_fedmsg_cexceptions_busgateway_gateway]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-gateway GatewayConsumer 1 10 +command[check_fedmsg_cexceptions_app]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-relay RelayConsumer 1 10 +command[check_fedmsg_cexceptions_value]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-irc IRCBotConsumer 1 10 +command[check_fedmsg_cexceptions_pkgs]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-hub GenACLsConsumer 1 10 +command[check_fedmsg_cexceptions_summershum]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-hub SummerShumConsumer 1 10 +command[check_fedmsg_cexceptions_badges_backend]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-hub FedoraBadgesConsumer 1 10 +command[check_fedmsg_cexceptions_notifs_backend]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-hub FMNConsumer 1 10 + +command[check_fedmsg_cbacklog_busgateway_hub]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-hub DatanommerConsumer 10 50 +command[check_fedmsg_cbacklog_busgateway_relay]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-relay RelayConsumer 10 50 +command[check_fedmsg_cbacklog_busgateway_gateway]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-gateway GatewayConsumer 10 50 +command[check_fedmsg_cbacklog_app]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-relay RelayConsumer 10 50 +command[check_fedmsg_cbacklog_value]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-irc IRCBotConsumer 10 50 +command[check_fedmsg_cbacklog_pkgs]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-hub GenACLsConsumer 10 50 +command[check_fedmsg_cbacklog_summershum]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-hub SummerShumConsumer 10 50 +command[check_fedmsg_cbacklog_badges_backend]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-hub FedoraBadgesConsumer 10 50 +command[check_fedmsg_cbacklog_notifs_backend]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-hub FMNConsumer 10 50