work so far on nagios_server role.

Signed-off-by: Ricky Elrod <codeblock@fedoraproject.org>
This commit is contained in:
Rick Elrod 2014-07-07 17:09:38 +00:00
parent 4ee5939b13
commit 649108c0c3
118 changed files with 1100 additions and 627 deletions

View file

@ -0,0 +1,58 @@
#!/usr/bin/env python
import json
import os
import socket
import sys
import zmq
try:
service = sys.argv[1]
check_consumer = sys.argv[2]
backlog_warning = int(sys.argv[3])
backlog_critical = int(sys.argv[4])
fname = '/var/run/fedmsg/monitoring-%s.socket' % service
if not os.path.exists(fname):
print "UNKNOWN - %s does not exist" % fname
sys.exit(3)
connect_to = "ipc:///%s" % fname
ctx = zmq.Context()
s = ctx.socket(zmq.SUB)
s.connect(connect_to)
s.setsockopt(zmq.SUBSCRIBE, '')
poller = zmq.Poller()
poller.register(s, zmq.POLLIN)
timeout = 10000
events = dict(poller.poll(timeout))
if s in events and events[s] == zmq.POLLIN:
msg = s.recv()
msg = json.loads(msg)
else:
print 'UNKNOWN - ZMQ timeout. No message received in %i ms' % timeout
sys.exit(3)
for consumer in msg['consumers']:
if consumer['name'] == check_consumer:
if consumer['backlog'] is None:
print 'ERROR: fedmsg consumer %s is not initialized' % consumer['name']
sys.exit(3)
elif consumer['backlog'] > backlog_critical:
print 'CRITICAL: fedmsg consumer %s backlog value is %i' % (consumer['name'],consumer['backlog'])
sys.exit(2)
elif consumer['backlog'] > backlog_warning:
print 'WARNING: fedmsg consumer %s backlog value is %i' % (consumer['name'],consumer['backlog'])
sys.exit(1)
else:
print 'OK: fedmsg consumer %s backlog value is %i' % (consumer['name'],consumer['backlog'])
sys.exit(0)
print "UNKNOWN: fedmsg consumer %s not found" % check_consumer
sys.exit(3)
except Exception as err:
print "UNKNOWN:", str(err)
sys.exit(3)

View file

@ -0,0 +1,54 @@
#!/usr/bin/env python
import json
import os
import socket
import sys
import zmq
try:
service = sys.argv[1]
check_consumer = sys.argv[2]
exceptions_warning = int(sys.argv[3])
exceptions_critical = int(sys.argv[4])
fname = '/var/run/fedmsg/monitoring-%s.socket' % service
if not os.path.exists(fname):
print "UNKNOWN - %s does not exist" % fname
sys.exit(3)
connect_to = "ipc:///%s" % fname
ctx = zmq.Context()
s = ctx.socket(zmq.SUB)
s.connect(connect_to)
s.setsockopt(zmq.SUBSCRIBE, '')
poller = zmq.Poller()
poller.register(s, zmq.POLLIN)
timeout = 10000
events = dict(poller.poll(timeout))
if s in events and events[s] == zmq.POLLIN:
msg = s.recv()
msg = json.loads(msg)
else:
print 'UNKNOWN - ZMQ timeout. No message received in %i ms' % timeout
sys.exit(3)
for consumer in msg['consumers']:
if consumer['name'] == check_consumer:
if consumer['exceptions'] > exceptions_critical:
print 'CRITICAL: fedmsg consumer %s exceptions value is %i' % (consumer['name'],consumer['exceptions'])
sys.exit(2)
elif consumer['exceptions'] > exceptions_warning:
print 'WARNING: fedmsg consumer %s exceptions value is %i' % (consumer['name'],consumer['exceptions'])
sys.exit(1)
else:
print 'OK: fedmsg consumer %s exceptions value is %i' % (consumer['name'],consumer['exceptions'])
sys.exit(0)
print "UNKNOWN: fedmsg consumers %s not found" % check_consumer
sys.exit(3)
except Exception as err:
print "UNKNOWN:", str(err)
sys.exit(3)

View file

@ -0,0 +1,60 @@
#!/usr/bin/env python
import json
import os
import socket
import sys
import zmq
try:
service = sys.argv[1]
check_list = frozenset(sys.argv[2:])
fname = '/var/run/fedmsg/monitoring-%s.socket' % service
if not check_list:
print "UNKNOWN - empty list of fedmsg consumers and producers to check"
sys.exit(3)
if not os.path.exists(fname):
print "UNKNOWN - %s does not exist" % fname
sys.exit(3)
connect_to = "ipc:///%s" % fname
ctx = zmq.Context()
s = ctx.socket(zmq.SUB)
s.connect(connect_to)
s.setsockopt(zmq.SUBSCRIBE, '')
poller = zmq.Poller()
poller.register(s, zmq.POLLIN)
timeout = 10000
events = dict(poller.poll(timeout))
if s in events and events[s] == zmq.POLLIN:
msg = s.recv()
msg = json.loads(msg)
else:
print 'UNKNOWN - ZMQ timeout. No message received in %i ms' % timeout
sys.exit(3)
for consumer in msg['consumers']:
if consumer['name'] in check_list and not consumer['initialized']:
print 'ERROR: fedmsg consumer %s is not initialized' % consumer['name']
sys.exit(2)
for producer in msg['producers']:
if producer['name'] in check_list and not producer['initialized']:
print 'ERROR: fedmsg producer %s is not initialized' % producer['name']
sys.exit(2)
for item in check_list:
if item not in [p['name'] for p in msg['producers'] + msg['consumers']]:
print 'ERROR: %s not found among installed plugins' % item
sys.exit(2)
print "OK: fedmsg consumer(s) and producer(s) initialized"
sys.exit(0)
except Exception as err:
print "UNKNOWN:", str(err)
sys.exit(3)

View file

@ -49,7 +49,7 @@ CRITICAL)
2)
echo -n "Restarting HTTP service (3rd soft critical state)..."
# Call the init script to restart the HTTPD server
echo "#fedora-noc $hostalias - Attempting to reload httpd. $servicedesc is $servicestate (2nd check)" | /usr/bin/nc -w 1 value03 5050
echo "#fedora-noc $hostalias - Attempting to reload httpd. $servicedesc is $servicestate (2nd check)" | /usr/bin/nc -w 1 value01 5050
/usr/lib/nagios/plugins/check_nrpe -H $remotehost -c service_httpd_reload
;;
esac
@ -62,7 +62,7 @@ CRITICAL)
# point (unless you disabled notifications for this service)
HARD)
echo -n "Restarting HTTP service..."
echo "#fedora-noc $hostalias - Attempting to restart httpd. $servicedesc is $servicestate" | /usr/bin/nc -w 1 value03 5050
echo "#fedora-noc $hostalias - Attempting to restart httpd. $servicedesc is $servicestate" | /usr/bin/nc -w 1 value01 5050
# Call the init script to restart the HTTPD server
/usr/lib/nagios/plugins/check_nrpe -H $remotehost -c service_httpd_restart
;;

View file

@ -49,7 +49,7 @@ CRITICAL)
2)
echo -n "Restarting rsyslog service (3rd soft critical state)..."
# Call the init script to restart the rsyslog server
echo "#fedora-noc $hostalias - Attempting to reload rsyslog. $servicedesc is $servicestate (2nd check)" | /usr/bin/nc -w 1 value03 5050
echo "#fedora-noc $hostalias - Attempting to reload rsyslog. $servicedesc is $servicestate (2nd check)" | /usr/bin/nc -w 1 value01 5050
/usr/lib/nagios/plugins/check_nrpe -H $remotehost -c service_rsyslog_reload
;;
esac
@ -62,7 +62,7 @@ CRITICAL)
# point (unless you disabled notifications for this service)
HARD)
echo -n "Restarting rsyslog service..."
echo "#fedora-noc $hostalias - Attempting to restart rsyslog. $servicedesc is $servicestate" | /usr/bin/nc -w 1 value03 5050
echo "#fedora-noc $hostalias - Attempting to restart rsyslog. $servicedesc is $servicestate" | /usr/bin/nc -w 1 value01 5050
# Call the init script to restart the HTTPD server
/usr/lib/nagios/plugins/check_nrpe -H $remotehost -c service_rsyslog_restart
;;