work so far on nagios_server role.
Signed-off-by: Ricky Elrod <codeblock@fedoraproject.org>
This commit is contained in:
parent
4ee5939b13
commit
649108c0c3
118 changed files with 1100 additions and 627 deletions
|
@ -0,0 +1,58 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
import json
|
||||
import os
|
||||
import socket
|
||||
import sys
|
||||
import zmq
|
||||
|
||||
try:
|
||||
service = sys.argv[1]
|
||||
check_consumer = sys.argv[2]
|
||||
backlog_warning = int(sys.argv[3])
|
||||
backlog_critical = int(sys.argv[4])
|
||||
fname = '/var/run/fedmsg/monitoring-%s.socket' % service
|
||||
|
||||
if not os.path.exists(fname):
|
||||
print "UNKNOWN - %s does not exist" % fname
|
||||
sys.exit(3)
|
||||
|
||||
connect_to = "ipc:///%s" % fname
|
||||
ctx = zmq.Context()
|
||||
s = ctx.socket(zmq.SUB)
|
||||
s.connect(connect_to)
|
||||
s.setsockopt(zmq.SUBSCRIBE, '')
|
||||
|
||||
poller = zmq.Poller()
|
||||
poller.register(s, zmq.POLLIN)
|
||||
|
||||
timeout = 10000
|
||||
|
||||
events = dict(poller.poll(timeout))
|
||||
if s in events and events[s] == zmq.POLLIN:
|
||||
msg = s.recv()
|
||||
msg = json.loads(msg)
|
||||
else:
|
||||
print 'UNKNOWN - ZMQ timeout. No message received in %i ms' % timeout
|
||||
sys.exit(3)
|
||||
|
||||
for consumer in msg['consumers']:
|
||||
if consumer['name'] == check_consumer:
|
||||
if consumer['backlog'] is None:
|
||||
print 'ERROR: fedmsg consumer %s is not initialized' % consumer['name']
|
||||
sys.exit(3)
|
||||
elif consumer['backlog'] > backlog_critical:
|
||||
print 'CRITICAL: fedmsg consumer %s backlog value is %i' % (consumer['name'],consumer['backlog'])
|
||||
sys.exit(2)
|
||||
elif consumer['backlog'] > backlog_warning:
|
||||
print 'WARNING: fedmsg consumer %s backlog value is %i' % (consumer['name'],consumer['backlog'])
|
||||
sys.exit(1)
|
||||
else:
|
||||
print 'OK: fedmsg consumer %s backlog value is %i' % (consumer['name'],consumer['backlog'])
|
||||
sys.exit(0)
|
||||
|
||||
print "UNKNOWN: fedmsg consumer %s not found" % check_consumer
|
||||
sys.exit(3)
|
||||
except Exception as err:
|
||||
print "UNKNOWN:", str(err)
|
||||
sys.exit(3)
|
|
@ -0,0 +1,54 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
import json
|
||||
import os
|
||||
import socket
|
||||
import sys
|
||||
import zmq
|
||||
|
||||
try:
|
||||
service = sys.argv[1]
|
||||
check_consumer = sys.argv[2]
|
||||
exceptions_warning = int(sys.argv[3])
|
||||
exceptions_critical = int(sys.argv[4])
|
||||
fname = '/var/run/fedmsg/monitoring-%s.socket' % service
|
||||
|
||||
if not os.path.exists(fname):
|
||||
print "UNKNOWN - %s does not exist" % fname
|
||||
sys.exit(3)
|
||||
|
||||
connect_to = "ipc:///%s" % fname
|
||||
ctx = zmq.Context()
|
||||
s = ctx.socket(zmq.SUB)
|
||||
s.connect(connect_to)
|
||||
s.setsockopt(zmq.SUBSCRIBE, '')
|
||||
poller = zmq.Poller()
|
||||
poller.register(s, zmq.POLLIN)
|
||||
|
||||
timeout = 10000
|
||||
|
||||
events = dict(poller.poll(timeout))
|
||||
if s in events and events[s] == zmq.POLLIN:
|
||||
msg = s.recv()
|
||||
msg = json.loads(msg)
|
||||
else:
|
||||
print 'UNKNOWN - ZMQ timeout. No message received in %i ms' % timeout
|
||||
sys.exit(3)
|
||||
|
||||
for consumer in msg['consumers']:
|
||||
if consumer['name'] == check_consumer:
|
||||
if consumer['exceptions'] > exceptions_critical:
|
||||
print 'CRITICAL: fedmsg consumer %s exceptions value is %i' % (consumer['name'],consumer['exceptions'])
|
||||
sys.exit(2)
|
||||
elif consumer['exceptions'] > exceptions_warning:
|
||||
print 'WARNING: fedmsg consumer %s exceptions value is %i' % (consumer['name'],consumer['exceptions'])
|
||||
sys.exit(1)
|
||||
else:
|
||||
print 'OK: fedmsg consumer %s exceptions value is %i' % (consumer['name'],consumer['exceptions'])
|
||||
sys.exit(0)
|
||||
|
||||
print "UNKNOWN: fedmsg consumers %s not found" % check_consumer
|
||||
sys.exit(3)
|
||||
except Exception as err:
|
||||
print "UNKNOWN:", str(err)
|
||||
sys.exit(3)
|
|
@ -0,0 +1,60 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
import json
|
||||
import os
|
||||
import socket
|
||||
import sys
|
||||
import zmq
|
||||
|
||||
try:
|
||||
service = sys.argv[1]
|
||||
check_list = frozenset(sys.argv[2:])
|
||||
fname = '/var/run/fedmsg/monitoring-%s.socket' % service
|
||||
|
||||
if not check_list:
|
||||
print "UNKNOWN - empty list of fedmsg consumers and producers to check"
|
||||
sys.exit(3)
|
||||
|
||||
if not os.path.exists(fname):
|
||||
print "UNKNOWN - %s does not exist" % fname
|
||||
sys.exit(3)
|
||||
|
||||
connect_to = "ipc:///%s" % fname
|
||||
ctx = zmq.Context()
|
||||
s = ctx.socket(zmq.SUB)
|
||||
s.connect(connect_to)
|
||||
s.setsockopt(zmq.SUBSCRIBE, '')
|
||||
poller = zmq.Poller()
|
||||
poller.register(s, zmq.POLLIN)
|
||||
|
||||
timeout = 10000
|
||||
|
||||
events = dict(poller.poll(timeout))
|
||||
if s in events and events[s] == zmq.POLLIN:
|
||||
msg = s.recv()
|
||||
msg = json.loads(msg)
|
||||
else:
|
||||
print 'UNKNOWN - ZMQ timeout. No message received in %i ms' % timeout
|
||||
sys.exit(3)
|
||||
|
||||
for consumer in msg['consumers']:
|
||||
if consumer['name'] in check_list and not consumer['initialized']:
|
||||
print 'ERROR: fedmsg consumer %s is not initialized' % consumer['name']
|
||||
sys.exit(2)
|
||||
|
||||
for producer in msg['producers']:
|
||||
if producer['name'] in check_list and not producer['initialized']:
|
||||
print 'ERROR: fedmsg producer %s is not initialized' % producer['name']
|
||||
sys.exit(2)
|
||||
|
||||
for item in check_list:
|
||||
if item not in [p['name'] for p in msg['producers'] + msg['consumers']]:
|
||||
print 'ERROR: %s not found among installed plugins' % item
|
||||
sys.exit(2)
|
||||
|
||||
print "OK: fedmsg consumer(s) and producer(s) initialized"
|
||||
sys.exit(0)
|
||||
|
||||
except Exception as err:
|
||||
print "UNKNOWN:", str(err)
|
||||
sys.exit(3)
|
|
@ -49,7 +49,7 @@ CRITICAL)
|
|||
2)
|
||||
echo -n "Restarting HTTP service (3rd soft critical state)..."
|
||||
# Call the init script to restart the HTTPD server
|
||||
echo "#fedora-noc $hostalias - Attempting to reload httpd. $servicedesc is $servicestate (2nd check)" | /usr/bin/nc -w 1 value03 5050
|
||||
echo "#fedora-noc $hostalias - Attempting to reload httpd. $servicedesc is $servicestate (2nd check)" | /usr/bin/nc -w 1 value01 5050
|
||||
/usr/lib/nagios/plugins/check_nrpe -H $remotehost -c service_httpd_reload
|
||||
;;
|
||||
esac
|
||||
|
@ -62,7 +62,7 @@ CRITICAL)
|
|||
# point (unless you disabled notifications for this service)
|
||||
HARD)
|
||||
echo -n "Restarting HTTP service..."
|
||||
echo "#fedora-noc $hostalias - Attempting to restart httpd. $servicedesc is $servicestate" | /usr/bin/nc -w 1 value03 5050
|
||||
echo "#fedora-noc $hostalias - Attempting to restart httpd. $servicedesc is $servicestate" | /usr/bin/nc -w 1 value01 5050
|
||||
# Call the init script to restart the HTTPD server
|
||||
/usr/lib/nagios/plugins/check_nrpe -H $remotehost -c service_httpd_restart
|
||||
;;
|
||||
|
|
|
@ -49,7 +49,7 @@ CRITICAL)
|
|||
2)
|
||||
echo -n "Restarting rsyslog service (3rd soft critical state)..."
|
||||
# Call the init script to restart the rsyslog server
|
||||
echo "#fedora-noc $hostalias - Attempting to reload rsyslog. $servicedesc is $servicestate (2nd check)" | /usr/bin/nc -w 1 value03 5050
|
||||
echo "#fedora-noc $hostalias - Attempting to reload rsyslog. $servicedesc is $servicestate (2nd check)" | /usr/bin/nc -w 1 value01 5050
|
||||
/usr/lib/nagios/plugins/check_nrpe -H $remotehost -c service_rsyslog_reload
|
||||
;;
|
||||
esac
|
||||
|
@ -62,7 +62,7 @@ CRITICAL)
|
|||
# point (unless you disabled notifications for this service)
|
||||
HARD)
|
||||
echo -n "Restarting rsyslog service..."
|
||||
echo "#fedora-noc $hostalias - Attempting to restart rsyslog. $servicedesc is $servicestate" | /usr/bin/nc -w 1 value03 5050
|
||||
echo "#fedora-noc $hostalias - Attempting to restart rsyslog. $servicedesc is $servicestate" | /usr/bin/nc -w 1 value01 5050
|
||||
# Call the init script to restart the HTTPD server
|
||||
/usr/lib/nagios/plugins/check_nrpe -H $remotehost -c service_rsyslog_restart
|
||||
;;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue