put in the first run at new nagios configs
This commit is contained in:
parent
a1957d29d4
commit
8cf72ff116
310 changed files with 13255 additions and 26 deletions
|
@ -1264,6 +1264,12 @@ docker-candidate-registry01.phx2.fedoraproject.org
|
|||
docker-registry01.stg.phx2.fedoraproject.org
|
||||
docker-candidate-registry01.stg.phx2.fedoraproject.org
|
||||
|
||||
[webservers:children]
|
||||
proxies
|
||||
ipsilon
|
||||
ipa
|
||||
fas
|
||||
|
||||
#
|
||||
# Hosts in this group have zombie processes for various reasons
|
||||
# and we want to not alert on those, so to the client nrpe.conf uses
|
||||
|
@ -1276,3 +1282,4 @@ pkgs02.phx2.fedoraproject.org
|
|||
fed-cloud09.cloud.fedoraproject.org
|
||||
# Ansible from time to time in large runs has zombie threads
|
||||
batcave01.phx2.fedoraproject.org
|
||||
|
||||
|
|
|
@ -35,4 +35,3 @@ define contact{
|
|||
email 9178159801@vtext.com
|
||||
pager 9178159801@vtext.com
|
||||
}
|
||||
|
||||
|
|
|
@ -10,29 +10,29 @@ define contact{
|
|||
email nick@bebout.net
|
||||
}
|
||||
|
||||
define contact{
|
||||
contact_name nb-emergency
|
||||
alias Nick Bebout
|
||||
service_notification_period never
|
||||
host_notification_period never
|
||||
service_notification_options w,u,c,r
|
||||
host_notification_options d,u,r
|
||||
service_notification_commands notify-by-epager
|
||||
host_notification_commands host-notify-by-epager
|
||||
email nb5@txt.att.net
|
||||
pager nb5@txt.att.net
|
||||
}
|
||||
#define contact{
|
||||
# contact_name nb-emergency
|
||||
# alias Nick Bebout
|
||||
# service_notification_period never
|
||||
# host_notification_period never
|
||||
# service_notification_options w,u,c,r
|
||||
# host_notification_options d,u,r
|
||||
# service_notification_commands notify-by-epager
|
||||
# host_notification_commands host-notify-by-epager
|
||||
# email nb5@txt.att.net
|
||||
# pager nb5@txt.att.net
|
||||
#}
|
||||
|
||||
define contact{
|
||||
contact_name nbp
|
||||
alias Nick Bebout
|
||||
service_notification_period never
|
||||
host_notification_period never
|
||||
service_notification_options w,u,c,r
|
||||
host_notification_options d,u,r
|
||||
service_notification_commands notify-by-epager
|
||||
host_notification_commands host-notify-by-epager
|
||||
email nb5@txt.att.net
|
||||
pager nb5@txt.att.net
|
||||
}
|
||||
#define contact{
|
||||
# contact_name nbp
|
||||
# alias Nick Bebout
|
||||
# service_notification_period never
|
||||
# host_notification_period never
|
||||
# service_notification_options w,u,c,r
|
||||
# host_notification_options d,u,r
|
||||
# service_notification_commands notify-by-epager
|
||||
# host_notification_commands host-notify-by-epager
|
||||
# email nb5@txt.att.net
|
||||
# pager nb5@txt.att.net
|
||||
#}
|
||||
|
||||
|
|
|
@ -11,7 +11,19 @@
|
|||
#}
|
||||
#
|
||||
#define contact{
|
||||
# contact_name skvidalp
|
||||
# contact_name skvidal_xmpp
|
||||
# alias Seth Vidal
|
||||
# service_notification_period 24x7
|
||||
# host_notification_period 24x7
|
||||
# service_notification_options w,u,c,r
|
||||
# host_notification_options d,u,r
|
||||
# service_notification_commands notify-by-xmpp
|
||||
# host_notification_commands host-notify-by-xmpp
|
||||
# email skvidal@jabber.org
|
||||
#}
|
||||
#
|
||||
#define contact{
|
||||
# contact_name skvidal-emergency
|
||||
# alias Seth Vidal
|
||||
# service_notification_period 24x7
|
||||
# host_notification_period 24x7
|
||||
|
@ -20,5 +32,17 @@
|
|||
# service_notification_commands notify-by-epager
|
||||
# host_notification_commands host-notify-by-epager
|
||||
# email page-seth-vidal@sethdot.org
|
||||
#}
|
||||
#
|
||||
#define contact{
|
||||
# contact_name skvidalp
|
||||
# alias Seth Vidal
|
||||
# service_notification_period 16x7
|
||||
# host_notification_period 16x7
|
||||
# service_notification_options w,u,c,r
|
||||
# host_notification_options d,u,r
|
||||
# service_notification_commands notify-by-epager
|
||||
# host_notification_commands host-notify-by-epager
|
||||
# email page-seth-vidal@sethdot.org
|
||||
# pager page-seth-vidal@sethdot.org
|
||||
#}
|
||||
|
|
36
roles/nagios_client/README.rst
Normal file
36
roles/nagios_client/README.rst
Normal file
|
@ -0,0 +1,36 @@
|
|||
===================================
|
||||
Nagios 4 Configuration for Fedora
|
||||
===================================
|
||||
|
||||
The Fedora Infrastructure Nagios is built on a set of configurations
|
||||
originally written for Nagios 2 and then upgraded over time to Nagios
|
||||
3 and then 4.08. With additional changes made in the 4.2 series of
|
||||
Nagios this needed a better rewrite as various parts came from
|
||||
pre-puppet and then various puppet modules added on top.
|
||||
|
||||
In order to get this rewrite done, we will use as much of the original
|
||||
layout of the Fedora ansible nagios module but with rewrites to better
|
||||
match current Nagios configurations so that it can be maintained.
|
||||
|
||||
Role directory layout
|
||||
=====================
|
||||
The original layout branched out from
|
||||
|
||||
roles/nagios/client/
|
||||
roles/nagios/server/
|
||||
|
||||
With the usual trees below this. This breaks ansible best practices
|
||||
and how most new modules are set up so the rewrite uses:
|
||||
|
||||
roles/nagios_client/
|
||||
roles/nagios_server/
|
||||
|
||||
=====================
|
||||
Nagios Client Files
|
||||
=====================
|
||||
|
||||
For the most part the Nagios Client files seem to work from the
|
||||
original layout to the new site. Changes will only need to be made to
|
||||
playbooks for the initial changes.
|
||||
|
||||
|
72
roles/nagios_client/files/scripts/check_datanommer_timesince.py
Executable file
72
roles/nagios_client/files/scripts/check_datanommer_timesince.py
Executable file
|
@ -0,0 +1,72 @@
|
|||
#!/usr/bin/env python
|
||||
""" NRPE check for datanommer/fedmsg health.
|
||||
Given a category like 'bodhi', 'buildsys', or 'git', return an error if
|
||||
datanommer hasn't seen a message of that type in such and such time.
|
||||
You can alternatively provide a 'topic' which might look like
|
||||
org.fedoraproject.prod.bodhi.update.comment.
|
||||
|
||||
Requires: python-dateutil
|
||||
|
||||
Usage:
|
||||
|
||||
$ check_datanommer_timesince CATEGORY WARNING_THRESH CRITICAL_THRESH
|
||||
|
||||
:Author: Ralph Bean <rbean@redhat.com>
|
||||
|
||||
"""
|
||||
|
||||
import dateutil.relativedelta
|
||||
import subprocess
|
||||
import sys
|
||||
import json
|
||||
|
||||
|
||||
def query_timesince(identifier):
|
||||
# If it has a '.', then assume it is a topic.
|
||||
if '.' in identifier:
|
||||
cmd = 'datanommer-latest --topic %s --timesince' % identifier
|
||||
else:
|
||||
cmd = 'datanommer-latest --category %s --timesince' % identifier
|
||||
sys.stderr.write("Running %r\n" % cmd)
|
||||
process = subprocess.Popen(cmd.split(), shell=False,
|
||||
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
stdout, stderr = process.communicate()
|
||||
prefix, stdout = stdout.split("INFO] ", 1)
|
||||
data = json.loads(stdout)
|
||||
return float(data[0])
|
||||
|
||||
|
||||
def main():
|
||||
identifier, warning_threshold, critical_threshold = sys.argv[-3:]
|
||||
timesince = query_timesince(identifier)
|
||||
warning_threshold = int(warning_threshold)
|
||||
critical_threshold = int(critical_threshold)
|
||||
|
||||
time_strings = []
|
||||
rd = dateutil.relativedelta.relativedelta(seconds=timesince)
|
||||
for denomination in ['years', 'months', 'days', 'hours', 'minutes', 'seconds']:
|
||||
value = getattr(rd, denomination, 0)
|
||||
if value:
|
||||
time_strings.append("%d %s" % (value, denomination))
|
||||
|
||||
string = ", ".join(time_strings)
|
||||
reason = "datanommer has not seen a %r message in %s" % (identifier, string)
|
||||
|
||||
if timesince > critical_threshold:
|
||||
print "CRIT: ", reason
|
||||
sys.exit(2)
|
||||
|
||||
if timesince > warning_threshold:
|
||||
print "WARN: ", reason
|
||||
sys.exit(1)
|
||||
|
||||
print "OK: ", reason
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
try:
|
||||
main()
|
||||
except Exception as e:
|
||||
print "UNKNOWN: ", str(e)
|
||||
sys.exit(3)
|
23
roles/nagios_client/files/scripts/check_fcomm_queue
Normal file
23
roles/nagios_client/files/scripts/check_fcomm_queue
Normal file
|
@ -0,0 +1,23 @@
|
|||
#!/usr/bin/env python
|
||||
import sys
|
||||
|
||||
try:
|
||||
import retask.queue
|
||||
|
||||
queue = retask.queue.Queue('fedora-packages')
|
||||
queue.connect()
|
||||
|
||||
items = queue.length
|
||||
if items > 500:
|
||||
print "CRITICAL: %i tasks in fcomm queue" % items
|
||||
sys.exit(2)
|
||||
elif items > 250:
|
||||
print "WARNING: %i tasks in fcomm queue" % items
|
||||
sys.exit(1)
|
||||
else:
|
||||
print "OK: %i tasks in fcomm queue" % items
|
||||
sys.exit(0)
|
||||
|
||||
except Exception as e:
|
||||
print "UNKNOWN:", str(e)
|
||||
sys.exit(3)
|
|
@ -0,0 +1,62 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
import json
|
||||
import os
|
||||
import socket
|
||||
import sys
|
||||
import zmq
|
||||
|
||||
try:
|
||||
service = sys.argv[1]
|
||||
check_consumer = sys.argv[2]
|
||||
backlog_warning = int(sys.argv[3])
|
||||
backlog_critical = int(sys.argv[4])
|
||||
fname = '/var/run/fedmsg/monitoring-%s.socket' % service
|
||||
|
||||
if not os.path.exists(fname):
|
||||
print "UNKNOWN - %s does not exist" % fname
|
||||
sys.exit(3)
|
||||
|
||||
if not os.access(fname, os.W_OK):
|
||||
print "UNKNOWN - cannot write to %s" % fname
|
||||
sys.exit(3)
|
||||
|
||||
connect_to = "ipc:///%s" % fname
|
||||
ctx = zmq.Context()
|
||||
s = ctx.socket(zmq.SUB)
|
||||
s.connect(connect_to)
|
||||
s.setsockopt(zmq.SUBSCRIBE, '')
|
||||
|
||||
poller = zmq.Poller()
|
||||
poller.register(s, zmq.POLLIN)
|
||||
|
||||
timeout = 20000
|
||||
|
||||
events = dict(poller.poll(timeout))
|
||||
if s in events and events[s] == zmq.POLLIN:
|
||||
msg = s.recv()
|
||||
msg = json.loads(msg)
|
||||
else:
|
||||
print 'UNKNOWN - ZMQ timeout. No message received in %i ms' % timeout
|
||||
sys.exit(3)
|
||||
|
||||
for consumer in msg['consumers']:
|
||||
if consumer['name'] == check_consumer:
|
||||
if consumer['backlog'] is None:
|
||||
print 'ERROR: fedmsg consumer %s is not initialized' % consumer['name']
|
||||
sys.exit(3)
|
||||
elif consumer['backlog'] > backlog_critical:
|
||||
print 'CRITICAL: fedmsg consumer %s backlog value is %i' % (consumer['name'],consumer['backlog'])
|
||||
sys.exit(2)
|
||||
elif consumer['backlog'] > backlog_warning:
|
||||
print 'WARNING: fedmsg consumer %s backlog value is %i' % (consumer['name'],consumer['backlog'])
|
||||
sys.exit(1)
|
||||
else:
|
||||
print 'OK: fedmsg consumer %s backlog value is %i' % (consumer['name'],consumer['backlog'])
|
||||
sys.exit(0)
|
||||
|
||||
print "UNKNOWN: fedmsg consumer %s not found" % check_consumer
|
||||
sys.exit(3)
|
||||
except Exception as err:
|
||||
print "UNKNOWN:", str(err)
|
||||
sys.exit(3)
|
|
@ -0,0 +1,58 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
import json
|
||||
import os
|
||||
import socket
|
||||
import sys
|
||||
import zmq
|
||||
|
||||
try:
|
||||
service = sys.argv[1]
|
||||
check_consumer = sys.argv[2]
|
||||
exceptions_warning = int(sys.argv[3])
|
||||
exceptions_critical = int(sys.argv[4])
|
||||
fname = '/var/run/fedmsg/monitoring-%s.socket' % service
|
||||
|
||||
if not os.path.exists(fname):
|
||||
print "UNKNOWN - %s does not exist" % fname
|
||||
sys.exit(3)
|
||||
|
||||
if not os.access(fname, os.W_OK):
|
||||
print "UNKNOWN - cannot write to %s" % fname
|
||||
sys.exit(3)
|
||||
|
||||
connect_to = "ipc:///%s" % fname
|
||||
ctx = zmq.Context()
|
||||
s = ctx.socket(zmq.SUB)
|
||||
s.connect(connect_to)
|
||||
s.setsockopt(zmq.SUBSCRIBE, '')
|
||||
poller = zmq.Poller()
|
||||
poller.register(s, zmq.POLLIN)
|
||||
|
||||
timeout = 20000
|
||||
|
||||
events = dict(poller.poll(timeout))
|
||||
if s in events and events[s] == zmq.POLLIN:
|
||||
msg = s.recv()
|
||||
msg = json.loads(msg)
|
||||
else:
|
||||
print 'UNKNOWN - ZMQ timeout. No message received in %i ms' % timeout
|
||||
sys.exit(3)
|
||||
|
||||
for consumer in msg['consumers']:
|
||||
if consumer['name'] == check_consumer:
|
||||
if consumer['exceptions'] > exceptions_critical:
|
||||
print 'CRITICAL: fedmsg consumer %s exceptions value is %i' % (consumer['name'],consumer['exceptions'])
|
||||
sys.exit(2)
|
||||
elif consumer['exceptions'] > exceptions_warning:
|
||||
print 'WARNING: fedmsg consumer %s exceptions value is %i' % (consumer['name'],consumer['exceptions'])
|
||||
sys.exit(1)
|
||||
else:
|
||||
print 'OK: fedmsg consumer %s exceptions value is %i' % (consumer['name'],consumer['exceptions'])
|
||||
sys.exit(0)
|
||||
|
||||
print "UNKNOWN: fedmsg consumers %s not found" % check_consumer
|
||||
sys.exit(3)
|
||||
except Exception as err:
|
||||
print "UNKNOWN:", str(err)
|
||||
sys.exit(3)
|
|
@ -0,0 +1,69 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
import arrow
|
||||
import json
|
||||
import os
|
||||
import socket
|
||||
import sys
|
||||
import time
|
||||
import zmq
|
||||
|
||||
try:
|
||||
service = sys.argv[1]
|
||||
check_producer = sys.argv[2]
|
||||
elapsed_warning = int(sys.argv[3])
|
||||
elapsed_critical = int(sys.argv[4])
|
||||
fname = '/var/run/fedmsg/monitoring-%s.socket' % service
|
||||
|
||||
if not os.path.exists(fname):
|
||||
print "UNKNOWN - %s does not exist" % fname
|
||||
sys.exit(3)
|
||||
|
||||
if not os.access(fname, os.W_OK):
|
||||
print "UNKNOWN - cannot write to %s" % fname
|
||||
sys.exit(3)
|
||||
|
||||
connect_to = "ipc:///%s" % fname
|
||||
ctx = zmq.Context()
|
||||
s = ctx.socket(zmq.SUB)
|
||||
s.connect(connect_to)
|
||||
s.setsockopt(zmq.SUBSCRIBE, '')
|
||||
|
||||
poller = zmq.Poller()
|
||||
poller.register(s, zmq.POLLIN)
|
||||
|
||||
timeout = 20000
|
||||
|
||||
events = dict(poller.poll(timeout))
|
||||
if s in events and events[s] == zmq.POLLIN:
|
||||
msg = s.recv()
|
||||
msg = json.loads(msg)
|
||||
else:
|
||||
print 'UNKNOWN - ZMQ timeout. No message received in %i ms' % timeout
|
||||
sys.exit(3)
|
||||
|
||||
now = time.time()
|
||||
|
||||
for prod in msg['producers']:
|
||||
if prod['name'] != check_producer:
|
||||
continue
|
||||
diff = now - prod['last_ran']
|
||||
then = arrow.get(prod['last_ran']).humanize()
|
||||
if diff > elapsed_critical:
|
||||
print "CRITICAL: %s last ran %s (%i seconds ago)" % (
|
||||
check_producer, then, diff)
|
||||
sys.exit(2)
|
||||
elif diff > elapsed_warning:
|
||||
print "WARNING: %s last ran %s (%i seconds ago)" % (
|
||||
check_producer, then, diff)
|
||||
sys.exit(1)
|
||||
else:
|
||||
print "OK: %s last ran %s (%i seconds ago)" % (
|
||||
check_producer, then, diff)
|
||||
sys.exit(0)
|
||||
|
||||
print "UNKNOWN: fedmsg producer %s not found" % check_producer
|
||||
sys.exit(3)
|
||||
except Exception as err:
|
||||
print "UNKNOWN:", str(err)
|
||||
sys.exit(3)
|
|
@ -0,0 +1,64 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
import json
|
||||
import os
|
||||
import socket
|
||||
import sys
|
||||
import zmq
|
||||
|
||||
try:
|
||||
service = sys.argv[1]
|
||||
check_list = frozenset(sys.argv[2:])
|
||||
fname = '/var/run/fedmsg/monitoring-%s.socket' % service
|
||||
|
||||
if not check_list:
|
||||
print "UNKNOWN - empty list of fedmsg consumers and producers to check"
|
||||
sys.exit(3)
|
||||
|
||||
if not os.path.exists(fname):
|
||||
print "UNKNOWN - %s does not exist" % fname
|
||||
sys.exit(3)
|
||||
|
||||
if not os.access(fname, os.W_OK):
|
||||
print "UNKNOWN - cannot write to %s" % fname
|
||||
sys.exit(3)
|
||||
|
||||
connect_to = "ipc:///%s" % fname
|
||||
ctx = zmq.Context()
|
||||
s = ctx.socket(zmq.SUB)
|
||||
s.connect(connect_to)
|
||||
s.setsockopt(zmq.SUBSCRIBE, '')
|
||||
poller = zmq.Poller()
|
||||
poller.register(s, zmq.POLLIN)
|
||||
|
||||
timeout = 20000
|
||||
|
||||
events = dict(poller.poll(timeout))
|
||||
if s in events and events[s] == zmq.POLLIN:
|
||||
msg = s.recv()
|
||||
msg = json.loads(msg)
|
||||
else:
|
||||
print 'UNKNOWN - ZMQ timeout. No message received in %i ms' % timeout
|
||||
sys.exit(3)
|
||||
|
||||
for consumer in msg['consumers']:
|
||||
if consumer['name'] in check_list and not consumer['initialized']:
|
||||
print 'ERROR: fedmsg consumer %s is not initialized' % consumer['name']
|
||||
sys.exit(2)
|
||||
|
||||
for producer in msg['producers']:
|
||||
if producer['name'] in check_list and not producer['initialized']:
|
||||
print 'ERROR: fedmsg producer %s is not initialized' % producer['name']
|
||||
sys.exit(2)
|
||||
|
||||
for item in check_list:
|
||||
if item not in [p['name'] for p in msg['producers'] + msg['consumers']]:
|
||||
print 'ERROR: %s not found among installed plugins' % item
|
||||
sys.exit(2)
|
||||
|
||||
print "OK: fedmsg consumer(s) and producer(s) initialized"
|
||||
sys.exit(0)
|
||||
|
||||
except Exception as err:
|
||||
print "UNKNOWN:", str(err)
|
||||
sys.exit(3)
|
76
roles/nagios_client/files/scripts/check_haproxy_conns.py
Executable file
76
roles/nagios_client/files/scripts/check_haproxy_conns.py
Executable file
|
@ -0,0 +1,76 @@
|
|||
#!/usr/bin/env python
|
||||
""" Nagios check for haproxy over-subscription.
|
||||
|
||||
fedmsg-gateway is the primary concern as it can eat up a ton of simultaneous
|
||||
connections.
|
||||
|
||||
:Author: Ralph Bean <rbean@redhat.com>
|
||||
"""
|
||||
|
||||
import socket
|
||||
import sys
|
||||
|
||||
|
||||
def _numeric(value):
|
||||
""" Type casting utility """
|
||||
try:
|
||||
return int(value)
|
||||
except ValueError:
|
||||
try:
|
||||
return float(value)
|
||||
except ValueError:
|
||||
return value
|
||||
|
||||
|
||||
def query(sockname="/var/run/haproxy-stat"):
|
||||
""" Read stats from the haproxy socket and return a dict """
|
||||
s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
|
||||
s.connect("/var/run/haproxy-stat")
|
||||
s.send('show info\n')
|
||||
try:
|
||||
response = s.recv(1024).strip()
|
||||
lines = response.split('\n')
|
||||
data = dict([map(str.strip, line.split(':')) for line in lines])
|
||||
data = dict([(k, _numeric(v)) for k, v in data.items()])
|
||||
return data
|
||||
except Exception, e:
|
||||
print str(e)
|
||||
finally:
|
||||
s.close()
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def nagios_check(data):
|
||||
""" Print warnings and return nagios exit codes. """
|
||||
|
||||
current = data['CurrConns']
|
||||
maxconn = data['Maxconn']
|
||||
percent = 100 * float(current) / float(maxconn)
|
||||
details = "%.2f%% subscribed. %i current of %i maxconn." % (
|
||||
percent, current, maxconn,
|
||||
)
|
||||
|
||||
if percent < 50:
|
||||
print "HAPROXY SUBS OK: " + details
|
||||
return 0
|
||||
|
||||
if percent < 75:
|
||||
print "HAPROXY SUBS WARN: " + details
|
||||
return 1
|
||||
|
||||
if percent <= 100:
|
||||
print "HAPROXY SUBS CRIT: " + details
|
||||
return 2
|
||||
|
||||
print "HAPROXY SUBS UNKNOWN: " + details
|
||||
return 3
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
try:
|
||||
data = query(sockname="/var/run/haproxy-stat")
|
||||
except Exception as e:
|
||||
print "HAPROXY SUBS UNKNOWN: " + str(e)
|
||||
sys.exit(3)
|
||||
sys.exit(nagios_check(data))
|
59
roles/nagios_client/files/scripts/check_haproxy_mirrorlist.py
Executable file
59
roles/nagios_client/files/scripts/check_haproxy_mirrorlist.py
Executable file
|
@ -0,0 +1,59 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
import socket
|
||||
import sys
|
||||
|
||||
|
||||
try:
|
||||
|
||||
unixsocket="/var/run/haproxy-stat"
|
||||
s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
|
||||
s.connect(unixsocket)
|
||||
s.send('show stat\n')
|
||||
|
||||
try:
|
||||
|
||||
output = s.recv(16384).strip().split('\n')
|
||||
fields = output.pop(0).split(',')
|
||||
fields[0]=fields[0].replace('# ','')
|
||||
proxies = list()
|
||||
for line in output:
|
||||
proxies.append(dict(zip(fields,line.split(','))))
|
||||
|
||||
except Exception, e:
|
||||
print str(e)
|
||||
finally:
|
||||
s.close()
|
||||
|
||||
except Exception as e:
|
||||
print "MIRRORLIST STATE UNKNOWN: " + str(e)
|
||||
sys.exit(3)
|
||||
|
||||
total=0
|
||||
downcount=0
|
||||
downlist=""
|
||||
for proxy in proxies:
|
||||
if proxy['svname'] == "FRONTEND" or proxy['svname'] == "BACKEND":
|
||||
continue
|
||||
if proxy['pxname'] == "mirror-lists":
|
||||
total+=1
|
||||
if proxy['status'] == "DOWN":
|
||||
downlist+=proxy["svname"]+" "
|
||||
downcount+=1
|
||||
|
||||
unavailability = 100 * float(downcount) / float(total)
|
||||
|
||||
if unavailability == 0:
|
||||
print "MIRRORLIST STATE OK: " + downlist
|
||||
sys.exit(0)
|
||||
|
||||
if unavailability < 50:
|
||||
print "MIRRORLIST STATE WARN: " + downlist
|
||||
sys.exit(1)
|
||||
|
||||
if unavailability >= 50:
|
||||
print "MIRRORLIST STATE CRIT: " + downlist
|
||||
sys.exit(2)
|
||||
|
||||
print "MIRRORLIST STATE UNKNOWN: " + downlist
|
||||
sys.exit(3)
|
74
roles/nagios_client/files/scripts/check_ipa_replication
Normal file
74
roles/nagios_client/files/scripts/check_ipa_replication
Normal file
|
@ -0,0 +1,74 @@
|
|||
#!/usr/bin/python
|
||||
# Source: https://github.com/opinkerfi/nagios-plugins/blob/master/check_ipa/check_ipa_replication
|
||||
# Copyright 2013, Tomas Edwardsson
|
||||
# Copyright 2016, Patrick Uiterwijk
|
||||
#
|
||||
# This script is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This script is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import ldap
|
||||
from pynag.Plugins import PluginHelper, critical, warning, ok
|
||||
|
||||
plugin = PluginHelper()
|
||||
|
||||
plugin.parser.add_option('-u', help="ldap uri", dest="uri")
|
||||
plugin.parser.add_option('-D', help="bind DN", dest="binddn")
|
||||
plugin.parser.add_option('-w', help="bind password", dest="bindpw")
|
||||
plugin.parse_arguments()
|
||||
|
||||
if not plugin.options.uri:
|
||||
plugin.parser.error('-u (uri) argument is required')
|
||||
|
||||
try:
|
||||
l = ldap.initialize(plugin.options.uri)
|
||||
|
||||
if plugin.options.binddn:
|
||||
l.bind_s(plugin.options.binddn, plugin.options.bindpw)
|
||||
|
||||
replication = l.search_s('cn=config',
|
||||
ldap.SCOPE_SUBTREE,
|
||||
'(objectclass=nsds5replicationagreement)',
|
||||
['nsDS5ReplicaHost', 'nsds5replicaLastUpdateStatus'])
|
||||
except Exception, e:
|
||||
plugin.status(critical)
|
||||
plugin.add_summary("Unable to initialize ldap connection: %s" % (e))
|
||||
plugin.exit()
|
||||
|
||||
|
||||
# Loop through replication agreements
|
||||
for rhost in replication:
|
||||
plugin.add_summary("Replica %s Status: %s" % (rhost[1]['nsDS5ReplicaHost'][0], rhost[1]['nsds5replicaLastUpdateStatus'][0]))
|
||||
|
||||
status = rhost[1]['nsds5replicaLastUpdateStatus'][0]
|
||||
code = status[:2]
|
||||
if status.startswith('Error ('):
|
||||
# IPA >=4.4.0
|
||||
code = status[status.find('(')+1:status.find(')')]
|
||||
else:
|
||||
# IPA <4.4.0
|
||||
code = status[:status.find(' ')]
|
||||
|
||||
if code == '0':
|
||||
plugin.status(ok)
|
||||
elif code == '1':
|
||||
# Busy Replica is not an error, its "unknown" (but its "ok" for now)
|
||||
plugin.status(ok)
|
||||
else:
|
||||
plugin.status(critical)
|
||||
|
||||
if not len(replication):
|
||||
plugin.add_summary("Warning: No replicas found")
|
||||
plugin.status(warning)
|
||||
|
||||
plugin.exit()
|
||||
|
17
roles/nagios_client/files/scripts/check_lock
Normal file
17
roles/nagios_client/files/scripts/check_lock
Normal file
|
@ -0,0 +1,17 @@
|
|||
#!/usr/bin/python
|
||||
|
||||
import fcntl
|
||||
import sys
|
||||
|
||||
try:
|
||||
f = open('/mnt/koji/.nagios_test', 'r')
|
||||
f.close()
|
||||
f = open('/mnt/koji/.nagios_test', 'w')
|
||||
except IOError:
|
||||
print "Could not create file"
|
||||
sys.exit(2)
|
||||
|
||||
fcntl.flock(f, fcntl.LOCK_EX)
|
||||
f.close()
|
||||
print "File Locked Successfully"
|
||||
sys.exit(0)
|
123
roles/nagios_client/files/scripts/check_lock_file_age
Executable file
123
roles/nagios_client/files/scripts/check_lock_file_age
Executable file
|
@ -0,0 +1,123 @@
|
|||
#! /usr/bin/perl -w
|
||||
|
||||
# check_lock_file_age.pl Copyright (C) 2010 Ricky Elrod <codeblock@fedoraproject.org>
|
||||
#
|
||||
# Fork of check_file_age.pl
|
||||
#
|
||||
# Checks a lock file's size and modification time to make sure it's not empty
|
||||
# and that it's sufficiently recent.
|
||||
#
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU General Public License
|
||||
# as published by the Free Software Foundation; either version 2
|
||||
# of the License, or (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty
|
||||
# of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# you should have received a copy of the GNU General Public License
|
||||
# along with this program (or with Nagios); if not, write to the
|
||||
# Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
# Boston, MA 02111-1307, USA
|
||||
|
||||
use strict;
|
||||
use English;
|
||||
use Getopt::Long;
|
||||
use File::stat;
|
||||
use vars qw($PROGNAME);
|
||||
use lib "/usr/lib64/nagios/plugins";
|
||||
use utils qw (%ERRORS &print_revision &support);
|
||||
|
||||
sub print_help ();
|
||||
sub print_usage ();
|
||||
|
||||
my ($opt_c, $opt_f, $opt_w, $opt_h, $opt_V);
|
||||
my ($result, $message, $age, $size, $st);
|
||||
|
||||
$PROGNAME="check_lock_file_age";
|
||||
|
||||
$opt_w = 1;
|
||||
$opt_c = 5;
|
||||
$opt_f = "";
|
||||
|
||||
Getopt::Long::Configure('bundling');
|
||||
GetOptions(
|
||||
"V" => \$opt_V, "version" => \$opt_V,
|
||||
"h" => \$opt_h, "help" => \$opt_h,
|
||||
"f=s" => \$opt_f, "file" => \$opt_f,
|
||||
"w=f" => \$opt_w, "warning-age=f" => \$opt_w,
|
||||
"c=f" => \$opt_c, "critical-age=f" => \$opt_c);
|
||||
|
||||
if ($opt_V) {
|
||||
print_revision($PROGNAME, '1.4.14');
|
||||
exit $ERRORS{'OK'};
|
||||
}
|
||||
|
||||
if ($opt_h) {
|
||||
print_help();
|
||||
exit $ERRORS{'OK'};
|
||||
}
|
||||
|
||||
if (($opt_c and $opt_w) and ($opt_c < $opt_w)) {
|
||||
print "Warning time must be less than Critical time.\n";
|
||||
exit $ERRORS{'UNKNOWN'};
|
||||
}
|
||||
|
||||
$opt_f = shift unless ($opt_f);
|
||||
|
||||
if (! $opt_f) {
|
||||
print "LOCK_FILE_AGE UNKNOWN: No file specified\n";
|
||||
exit $ERRORS{'UNKNOWN'};
|
||||
}
|
||||
|
||||
# Check that file exists (can be directory or link)
|
||||
unless (-e $opt_f) {
|
||||
print "LOCK_FILE_AGE OK: File not found (Lock file removed) - $opt_f\n";
|
||||
exit $ERRORS{'OK'};
|
||||
}
|
||||
|
||||
$st = File::stat::stat($opt_f);
|
||||
$age = time - $st->mtime;
|
||||
|
||||
$result = 'OK';
|
||||
|
||||
# Convert minutes to seconds
|
||||
if($opt_c) { $opt_c *= 60; }
|
||||
if($opt_w) { $opt_w *= 60; }
|
||||
|
||||
if ($opt_c and $age > $opt_c) {
|
||||
$result = 'CRITICAL';
|
||||
}
|
||||
elsif ($opt_w and $age > $opt_w) {
|
||||
$result = 'WARNING';
|
||||
}
|
||||
|
||||
# If the age is higher than 2 minutes, convert seconds -> minutes
|
||||
# If it's higher than a day, use days.
|
||||
# Just a nicety, to make people not have to do math ;)
|
||||
if($age > 86400) { $age = int(($age/86400))." days"; }
|
||||
elsif($age > 120) { $age = int(($age/60))." minutes"; }
|
||||
else { $age = "$age seconds"; }
|
||||
|
||||
print "LOCK_FILE_AGE $result: $opt_f is $age old.\n";
|
||||
exit $ERRORS{$result};
|
||||
|
||||
sub print_usage () {
|
||||
print "Usage:\n";
|
||||
print " $PROGNAME [-w <secs>] [-c <secs>] -f <file>\n";
|
||||
print " $PROGNAME [-h | --help]\n";
|
||||
print " $PROGNAME [-V | --version]\n";
|
||||
}
|
||||
|
||||
sub print_help () {
|
||||
print_revision($PROGNAME, '1.4.14');
|
||||
print "Copyright (c) 2010 Ricky Elrod\n\n";
|
||||
print_usage();
|
||||
print "\n";
|
||||
print " <mins> File must be no more than this many minutes old (default: warn 1m, crit 5m)\n";
|
||||
print "\n";
|
||||
support();
|
||||
}
|
24
roles/nagios_client/files/scripts/check_memcache_connect
Normal file
24
roles/nagios_client/files/scripts/check_memcache_connect
Normal file
|
@ -0,0 +1,24 @@
|
|||
#!/bin/bash
|
||||
#
|
||||
# 2014-11-19
|
||||
# Author: Ralph Bean <rbean@redhat.com>
|
||||
|
||||
# exit codes
|
||||
ok=0
|
||||
warn=1
|
||||
crit=2
|
||||
unkn=3
|
||||
|
||||
# Right now we just check to see if we can even run this command without
|
||||
# hanging and timing out. In the future, we could parse stdout for more
|
||||
# fine-grained information.
|
||||
echo stats | nc 127.0.0.1 11211 > /dev/null
|
||||
status=$?
|
||||
|
||||
if [ $status -ne 0 ]; then
|
||||
echo "CRIT: stats command got status code $status"
|
||||
exit $crit
|
||||
else
|
||||
echo "OK: stats command got status code $status"
|
||||
exit $ok
|
||||
fi
|
14
roles/nagios_client/files/scripts/check_osbs_api.py
Executable file
14
roles/nagios_client/files/scripts/check_osbs_api.py
Executable file
|
@ -0,0 +1,14 @@
|
|||
#!/usr/bin/python
|
||||
|
||||
import requests
|
||||
import sys
|
||||
|
||||
r = requests.get("https://localhost:8443/", verify=False)
|
||||
|
||||
if 'paths' in r.json().keys():
|
||||
print "OK: OSBS API endpoint is responding with path data"
|
||||
sys.exit(0)
|
||||
else:
|
||||
print "CRITICAL: OSBS API not responding properly"
|
||||
sys.exit(2)
|
||||
|
23
roles/nagios_client/files/scripts/check_osbs_builds.py
Executable file
23
roles/nagios_client/files/scripts/check_osbs_builds.py
Executable file
|
@ -0,0 +1,23 @@
|
|||
#!/usr/bin/python
|
||||
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
sp = subprocess.Popen(
|
||||
["osbs", "list-builds"],
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
stdin=subprocess.PIPE
|
||||
)
|
||||
sp_out, sp_err = sp.communicate()
|
||||
sp_err = sp_err.split('\n')
|
||||
|
||||
if 'not attached to terminal' in sp_err[0]:
|
||||
sp_err = sp_err[1:]
|
||||
|
||||
if sp_err[0].split()[0] == 'BUILD':
|
||||
print "OK: OSBS is responsive to 'osbs list-builds'"
|
||||
sys.exit(0)
|
||||
else:
|
||||
print "CRITICAL: OSBS UNRESPONSIVE"
|
||||
sys.exit(2)
|
49
roles/nagios_client/files/scripts/check_postfix_queue
Normal file
49
roles/nagios_client/files/scripts/check_postfix_queue
Normal file
|
@ -0,0 +1,49 @@
|
|||
#!/bin/bash
|
||||
#
|
||||
# 19-07-2010
|
||||
# Author: Cherwin Nooitmeer <cherwin@gmail.com>
|
||||
#
|
||||
|
||||
# exit codes
|
||||
e_ok=0
|
||||
e_warning=1
|
||||
e_critical=2
|
||||
e_unknown=3
|
||||
|
||||
# regular expression that matches queue IDs (e.g. D71EF7AC80F8)
|
||||
queue_id='^[A-F0-9][A-F0-9][A-F0-9][A-F0-9][A-F0-9][A-F0-9][A-F0-9][A-F0-9][A-F0-9][A-F0-9]'
|
||||
|
||||
usage="Invalid command line usage"
|
||||
|
||||
if [ -z $1 ]; then
|
||||
echo $usage
|
||||
exit $e_unknown
|
||||
fi
|
||||
|
||||
while getopts ":w:c:" options
|
||||
do
|
||||
case $options in
|
||||
w ) warning=$OPTARG ;;
|
||||
c ) critical=$OPTARG ;;
|
||||
* ) echo $usage
|
||||
exit $e_unknown ;;
|
||||
esac
|
||||
done
|
||||
|
||||
# determine queue size
|
||||
qsize=$(mailq | egrep -c $queue_id)
|
||||
if [ -z $qsize ]
|
||||
then
|
||||
exit $e_unknown
|
||||
fi
|
||||
|
||||
if [ $qsize -ge $critical ]; then
|
||||
retval=$e_critical
|
||||
elif [ $qsize -ge $warning ]; then
|
||||
retval=$e_warning
|
||||
elif [ $qsize -lt $warning ]; then
|
||||
retval=$e_ok
|
||||
fi
|
||||
|
||||
echo "$qsize mail(s) in queue | mail_queue=$qsize"
|
||||
exit $retval
|
26
roles/nagios_client/files/scripts/check_rabbitmq_size
Normal file
26
roles/nagios_client/files/scripts/check_rabbitmq_size
Normal file
|
@ -0,0 +1,26 @@
|
|||
#!/bin/python
|
||||
import sys
|
||||
import requests
|
||||
|
||||
url = 'http://localhost:15672/api/queues/%%2f/%s' % (sys.argv[1])
|
||||
|
||||
r = requests.get(url, auth=('guest', 'guest')).json()
|
||||
consumers = r['consumers']
|
||||
messages = r['messages']
|
||||
|
||||
msg = 'Messages in queue: %i (%i consumers)' % (messages, consumers)
|
||||
|
||||
if consumers < 1:
|
||||
print 'CRITICAL: No consumers: %s' % msg
|
||||
sys.exit(2)
|
||||
|
||||
if messages > sys.argv[2]:
|
||||
print 'CRITICAL: %s' % msg
|
||||
sys.exit(2)
|
||||
|
||||
if messages > sys.argv[3]:
|
||||
print 'WARNING: %s' % msg
|
||||
sys.exit(1)
|
||||
|
||||
print 'OK: %s' % msg
|
||||
sys.exit(0)
|
45
roles/nagios_client/files/scripts/check_raid.py
Normal file
45
roles/nagios_client/files/scripts/check_raid.py
Normal file
|
@ -0,0 +1,45 @@
|
|||
#!/usr/bin/env python
|
||||
#
|
||||
# very simple python script to parse out /proc/mdstat
|
||||
# and give results for nagios to monitor
|
||||
#
|
||||
|
||||
import sys
|
||||
import string
|
||||
|
||||
devices = []
|
||||
|
||||
try:
|
||||
mdstat = string.split(open('/proc/mdstat').read(), '\n')
|
||||
except IOError:
|
||||
# seems we have no software raid on this machines
|
||||
sys.exit(0)
|
||||
|
||||
error = ""
|
||||
i = 0
|
||||
for line in mdstat:
|
||||
if line[0:2] == 'md':
|
||||
device = string.split(line)[0]
|
||||
devices.append(device)
|
||||
status = string.split(mdstat[i+1])[3]
|
||||
if string.count(status, "_"):
|
||||
# see if we can figure out what's going on
|
||||
err = string.split(mdstat[i+2])
|
||||
msg = "device=%s status=%s" % (device, status)
|
||||
if len(err) > 0:
|
||||
msg = msg + " rebuild=%s" % err[0]
|
||||
|
||||
if not error:
|
||||
error = msg
|
||||
else:
|
||||
error = error + ", " + msg
|
||||
i = i + 1
|
||||
|
||||
if not error:
|
||||
print "DEVICES %s OK" % " ".join(devices)
|
||||
sys.exit(0)
|
||||
|
||||
else:
|
||||
print error
|
||||
sys.exit(2)
|
||||
|
84
roles/nagios_client/files/scripts/check_readonly_fs
Executable file
84
roles/nagios_client/files/scripts/check_readonly_fs
Executable file
|
@ -0,0 +1,84 @@
|
|||
#!/bin/bash
|
||||
|
||||
# check_readonlyfs: Check for readonly filesystems
|
||||
# Copyright (C) 2010 Davide Madrisan <davide.madrisan@gmail.com>
|
||||
|
||||
PROGNAME=`/bin/basename $0`
|
||||
PROGPATH=`echo $0 | sed -e 's,[\\/][^\\/][^\\/]*$,,'`
|
||||
REVISION=`echo '$Revision: 1 $' | sed -e 's/[^0-9.]//g'`
|
||||
|
||||
. $PROGPATH/utils.sh
|
||||
|
||||
print_usage() {
|
||||
echo "Usage: $PROGNAME --no-network-fs"
|
||||
echo "Usage: $PROGNAME --help"
|
||||
echo "Usage: $PROGNAME --version"
|
||||
}
|
||||
|
||||
print_help() {
|
||||
print_revision $PROGNAME $REVISION
|
||||
echo ""
|
||||
print_usage
|
||||
echo ""
|
||||
echo "readonly filesystem checker plugin for Nagios"
|
||||
echo ""
|
||||
support
|
||||
}
|
||||
|
||||
NETFS=1
|
||||
|
||||
# Grab the command line arguments
|
||||
|
||||
exitstatus=$STATE_WARNING #default
|
||||
|
||||
while test -n "$1"; do
|
||||
case "$1" in
|
||||
--help|-h)
|
||||
print_help
|
||||
exit $STATE_OK
|
||||
;;
|
||||
--version|-V)
|
||||
print_revision $PROGNAME $REVISION
|
||||
exit $STATE_OK
|
||||
;;
|
||||
--no-network-fs|-n)
|
||||
NETFS="0"
|
||||
;;
|
||||
*)
|
||||
echo "Unknown argument: $1"
|
||||
print_usage
|
||||
exit $STATE_UNKNOWN
|
||||
;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
[ -r /proc/mounts ] || { echo "cannot read /proc/mounts!"; exit $STATE_UNKNOWN; }
|
||||
|
||||
nerr=0
|
||||
IFS_SAVE="$IFS"
|
||||
|
||||
rofs_list=""
|
||||
while read dev mp fs mopt ignore; do
|
||||
[ "$dev" = none ] && continue
|
||||
case $fs in binfmt_misc|devpts|iso9660|proc|selinuxfs|rpc_pipefs|sysfs|tmpfs|usbfs)
|
||||
continue ;;
|
||||
esac
|
||||
case $fs in autofs|nfs|nfs4|smbfs)
|
||||
# skip the network filesystems
|
||||
[ "$NETFS" = 0 ] && continue ;;
|
||||
esac
|
||||
|
||||
IFS=","; set -- $mopt; IFS="$IFS_SAVE"
|
||||
while :; do
|
||||
case "$1" in
|
||||
ro) rofs_list="$rofs_list $mp"; nerr=$(( $nerr + 1 )) ;;
|
||||
"") shift; break ;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
done < <(LC_ALL=C /bin/cat /proc/mounts 2>/dev/null)
|
||||
|
||||
[ $nerr -eq 0 ] && { echo OK; exit $STATE_OK; } || echo "$rofs_list: read only fs"
|
||||
|
||||
exit $exitstatus
|
108
roles/nagios_client/files/scripts/check_supybot_plugin
Executable file
108
roles/nagios_client/files/scripts/check_supybot_plugin
Executable file
|
@ -0,0 +1,108 @@
|
|||
#!/usr/bin/env python
|
||||
""" check_supybot_plugin -- ensure that a plugin is loaded by supybot.
|
||||
|
||||
Run like:
|
||||
|
||||
check_supybot_plugin --target fedmsg
|
||||
check_supybot_plugin --target koji --debug
|
||||
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
import socket
|
||||
import string
|
||||
import uuid
|
||||
|
||||
|
||||
def process_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
'-t', '--target', default=None, dest='target',
|
||||
help="Required. The plugin we're looking for."
|
||||
)
|
||||
parser.add_argument(
|
||||
'-n', '--nick', default=None, dest='nick',
|
||||
help="NICK to use when connecting to freenode.",
|
||||
)
|
||||
parser.add_argument(
|
||||
'-d', '--debug', default=False, action='store_true',
|
||||
help='Print out debug information.', dest='debug',
|
||||
)
|
||||
parser.add_argument(
|
||||
'-H', '--host', default='irc.freenode.net',
|
||||
help='Host to connect to.', dest='host',
|
||||
)
|
||||
parser.add_argument(
|
||||
'-p', '--port', default=6667, type=int,
|
||||
help='Host to connect to.', dest='port',
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
args = process_args()
|
||||
|
||||
# Use a random nick so people can't mess with us
|
||||
if not args.nick:
|
||||
args.nick = 'nrpe-' + str(uuid.uuid4()).split('-')[0]
|
||||
|
||||
name = "NRPE Bot"
|
||||
readbuffer = ""
|
||||
|
||||
if not args.target:
|
||||
print "UNKNOWN: No 'target' specified."
|
||||
sys.exit(3)
|
||||
|
||||
args.target = args.target.lower()
|
||||
|
||||
if args.debug:
|
||||
print "connecting to %s/%i" % (args.host, args.port)
|
||||
|
||||
try:
|
||||
s = socket.socket()
|
||||
s.connect((args.host, args.port))
|
||||
|
||||
if args.debug:
|
||||
print "as %s/%s (%s)" % (args.nick, args.nick, name)
|
||||
|
||||
s.send("nick %s\r\n" % args.nick)
|
||||
s.send("USER %s %s bla :%s\r\n" % (args.nick, args.host, name))
|
||||
|
||||
while 1:
|
||||
readbuffer = readbuffer+s.recv(1024)
|
||||
temp = string.split(readbuffer, "\n")
|
||||
readbuffer = temp.pop()
|
||||
|
||||
for line in temp:
|
||||
line = string.rstrip(line)
|
||||
|
||||
if args.debug:
|
||||
print " * ", line
|
||||
|
||||
line = string.split(line)
|
||||
|
||||
if line[1] == 'MODE':
|
||||
msg = "privmsg zodbot :list\r\n"
|
||||
if args.debug:
|
||||
print "sending:"
|
||||
print " ->", msg
|
||||
s.send(msg)
|
||||
|
||||
if line[1] == 'PRIVMSG':
|
||||
if args.debug:
|
||||
print "Got our response.."
|
||||
|
||||
plugins = map(str.lower, ' '.join(line[3:][1:]).split(', '))
|
||||
|
||||
if args.target in plugins:
|
||||
print "OK"
|
||||
s.send("QUIT")
|
||||
sys.exit(0)
|
||||
else:
|
||||
print "CRITICAL: %r not loaded by supybot" % args.target
|
||||
s.send("QUIT")
|
||||
sys.exit(2)
|
||||
except Exception as e:
|
||||
print "UNKNOWN: ", str(e)
|
||||
if args.debug:
|
||||
raise
|
||||
sys.exit(3)
|
19
roles/nagios_client/files/scripts/check_testcloud
Normal file
19
roles/nagios_client/files/scripts/check_testcloud
Normal file
|
@ -0,0 +1,19 @@
|
|||
#!/bin/bash
|
||||
|
||||
RUNNING_VMS=`testcloud instance list | grep -i 'running' | wc -l`
|
||||
CRITICAL=20
|
||||
WARNING=15
|
||||
|
||||
|
||||
if [ $RUNNING_VMS -gt $CRITICAL ]
|
||||
then
|
||||
echo "Testcloud: CRITICAL Number of VMs running: $RUNNING_VMS"
|
||||
exit 2
|
||||
elif [ $RUNNING_VMS -gt $WARNING ]
|
||||
then
|
||||
echo "Testcloud: WARNING Number of VMs running: $RUNNING_VMS"
|
||||
exit 1
|
||||
else
|
||||
echo "Testcloud: OK Number of VMs running: $RUNNING_VMS"
|
||||
exit 0
|
||||
fi
|
BIN
roles/nagios_client/files/selinux/fi-nrpe.mod
Normal file
BIN
roles/nagios_client/files/selinux/fi-nrpe.mod
Normal file
Binary file not shown.
BIN
roles/nagios_client/files/selinux/fi-nrpe.pp
Normal file
BIN
roles/nagios_client/files/selinux/fi-nrpe.pp
Normal file
Binary file not shown.
11
roles/nagios_client/files/selinux/fi-nrpe.te
Normal file
11
roles/nagios_client/files/selinux/fi-nrpe.te
Normal file
|
@ -0,0 +1,11 @@
|
|||
module fi-nrpe 1.0;
|
||||
|
||||
require {
|
||||
type nagios_system_plugin_t;
|
||||
type nrpe_exec_t;
|
||||
class file getattr;
|
||||
}
|
||||
|
||||
#============= nagios_system_plugin_t ==============
|
||||
allow nagios_system_plugin_t nrpe_exec_t:file getattr;
|
||||
|
3
roles/nagios_client/handlers/main.yml
Normal file
3
roles/nagios_client/handlers/main.yml
Normal file
|
@ -0,0 +1,3 @@
|
|||
---
|
||||
- name: restart nrpe
|
||||
service: name=nrpe state=restarted
|
228
roles/nagios_client/tasks/main.yml
Normal file
228
roles/nagios_client/tasks/main.yml
Normal file
|
@ -0,0 +1,228 @@
|
|||
# nagios-client/nrpe
|
||||
|
||||
---
|
||||
# install pkgs:
|
||||
- name: install nagios client pkgs
|
||||
yum: name={{ item }} state=present
|
||||
with_items:
|
||||
- nrpe
|
||||
- nagios-plugins
|
||||
- nagios-plugins-disk
|
||||
- nagios-plugins-file_age
|
||||
- nagios-plugins-users
|
||||
- nagios-plugins-procs
|
||||
- nagios-plugins-swap
|
||||
- nagios-plugins-load
|
||||
- nagios-plugins-ping
|
||||
tags:
|
||||
- packages
|
||||
- nagios_client
|
||||
when: ansible_distribution_major_version|int < 22
|
||||
|
||||
# install pkgs:
|
||||
- name: install nagios client pkgs
|
||||
dnf: name={{ item }} state=present
|
||||
with_items:
|
||||
- nrpe
|
||||
- nagios-plugins
|
||||
- nagios-plugins-disk
|
||||
- nagios-plugins-file_age
|
||||
- nagios-plugins-users
|
||||
- nagios-plugins-procs
|
||||
- nagios-plugins-swap
|
||||
- nagios-plugins-load
|
||||
- nagios-plugins-ping
|
||||
tags:
|
||||
- packages
|
||||
- nagios_client
|
||||
when: ansible_distribution_major_version|int > 21
|
||||
|
||||
- name: install local nrpe check scripts that are not packaged
|
||||
copy: src="scripts/{{ item }}" dest="{{ libdir }}/nagios/plugins/{{ item }}" mode=0755 owner=nagios group=nagios
|
||||
with_items:
|
||||
- check_haproxy_conns.py
|
||||
- check_haproxy_mirrorlist.py
|
||||
- check_postfix_queue
|
||||
- check_raid.py
|
||||
- check_lock
|
||||
- check_fcomm_queue
|
||||
- check_fedmsg_consumer_backlog.py
|
||||
- check_fedmsg_consumer_exceptions.py
|
||||
- check_fedmsg_producer_last_ran.py
|
||||
- check_fedmsg_producers_consumers.py
|
||||
- check_supybot_plugin
|
||||
- check_rabbitmq_size
|
||||
- check_datanommer_timesince.py
|
||||
- check_memcache_connect
|
||||
- check_readonly_fs
|
||||
- check_lock_file_age
|
||||
- check_testcloud
|
||||
- check_osbs_builds.py
|
||||
- check_osbs_api.py
|
||||
- check_ipa_replication
|
||||
when: not inventory_hostname.startswith('noc')
|
||||
tags:
|
||||
- nagios_client
|
||||
|
||||
# create dirs
|
||||
# puppet used to make /var/spool/nagios (owned by nagios.nagios) mode 750
|
||||
# and /usr/lib/nagios/plugins (owned by root) mode 755 - but we don't know WHY
|
||||
# then stuff it with plugins from the plugins dir in the nagios module
|
||||
# then we symlinked that to /usr/lib64/nagios/plugins
|
||||
# it was a nightmare - don't do that - my ghost will haunt you if you do
|
||||
# skvidal 2013-05-21
|
||||
|
||||
|
||||
# Three tasks for handling our custom selinux module
|
||||
- name: ensure a directory exists for our custom selinux module
|
||||
file: dest=/usr/share/nrpe state=directory
|
||||
|
||||
- name: copy over our custom selinux module
|
||||
copy: src=selinux/fi-nrpe.pp dest=/usr/share/nrpe/fi-nrpe.pp
|
||||
register: selinux_module
|
||||
|
||||
- name: install our custom selinux module
|
||||
command: semodule -i /usr/share/nrpe/fi-nrpe.pp
|
||||
when: ansible_distribution_major_version|int == 7 and selinux_module|changed
|
||||
|
||||
|
||||
# Set up our base config.
|
||||
- name: /etc/nagios/nrpe.cfg
|
||||
template: src=nrpe.cfg.j2 dest=/etc/nagios/nrpe.cfg
|
||||
when: not inventory_hostname.startswith('noc')
|
||||
notify:
|
||||
- restart nrpe
|
||||
tags:
|
||||
- config
|
||||
- nagios_client
|
||||
|
||||
#
|
||||
# The actual items files here end in .j2 (they are templates)
|
||||
# So when adding or modifying them change the .j2 version in git.
|
||||
#
|
||||
- name: install nrpe client configs
|
||||
template: src={{ item }}.j2 dest=/etc/nrpe.d/{{ item }}
|
||||
with_items:
|
||||
- check_mirrorlist_cache.cfg
|
||||
- check_raid.cfg
|
||||
- check_ipa.cfg
|
||||
- check_readonly_fs.cfg
|
||||
- check_cron.cfg
|
||||
- check_disk.cfg
|
||||
- check_swap.cfg
|
||||
- check_postfix_queue.cfg
|
||||
- check_lock.cfg
|
||||
- check_fedmsg_hub_proc.cfg
|
||||
- check_fedmsg_irc_proc.cfg
|
||||
- check_fedmsg_relay_proc.cfg
|
||||
- check_fedmsg_gateway_proc.cfg
|
||||
- check_fedmsg_masher_proc.cfg
|
||||
- check_redis_proc.cfg
|
||||
- check_autocloud_proc.cfg
|
||||
- check_fedmsg_consumers.cfg
|
||||
- check_supybot_fedmsg_plugin.cfg
|
||||
- check_datanommer_history.cfg
|
||||
- check_memcache.cfg
|
||||
- check_lock_file_age.cfg
|
||||
- check_basset.cfg
|
||||
- check_fmn.cfg
|
||||
- check_osbs.cfg
|
||||
- check_koschei_polling_proc.cfg
|
||||
- check_koschei_resolver_proc.cfg
|
||||
- check_koschei_scheduler_proc.cfg
|
||||
- check_koschei_watcher_proc.cfg
|
||||
- check_testcloud.cfg
|
||||
notify:
|
||||
- restart nrpe
|
||||
tags:
|
||||
- config
|
||||
- nagios_client
|
||||
|
||||
#
|
||||
# The actual items files here end in .j2 (they are templates)
|
||||
# So when adding or modifying them change the .j2 version in git.
|
||||
#
|
||||
- name: install nrpe bugyou fedmsg hubs check config
|
||||
template: src=check_fedmsg_hub_procs_bugyou.cfg.j2 dest=/etc/nrpe.d/check_fedmsg_hub_procs_bugyou.cfg
|
||||
when: inventory_hostname.startswith('bugyou01')
|
||||
notify:
|
||||
- restart nrpe
|
||||
tags:
|
||||
- nagios_client
|
||||
|
||||
#
|
||||
# The actual items files here end in .j2 (they are templates)
|
||||
# So when adding or modifying them change the .j2 version in git.
|
||||
#
|
||||
- name: install nrpe openvpn check config
|
||||
template: src=check_openvpn_link.cfg.j2 dest=/etc/nrpe.d/check_openvpn_link.cfg
|
||||
when: datacenter != 'phx2'
|
||||
notify:
|
||||
- restart nrpe
|
||||
tags:
|
||||
- nagios_client
|
||||
#
|
||||
# The actual items files here end in .j2 (they are templates)
|
||||
# So when adding or modifying them change the .j2 version in git.
|
||||
#
|
||||
- name: install nrpe unbound check config
|
||||
template: src=check_unbound_proc.cfg.j2 dest=/etc/nrpe.d/check_unbound_proc.cfg
|
||||
when: inventory_hostname.startswith('unbound')
|
||||
notify:
|
||||
- restart nrpe
|
||||
tags:
|
||||
- nagios_client
|
||||
#
|
||||
# The actual items files here end in .j2 (they are templates)
|
||||
# So when adding or modifying them change the .j2 version in git.
|
||||
#
|
||||
- name: install nrpe merged log check script on log01
|
||||
template: src=check_merged_file_age.cfg.j2 dest=/etc/nrpe.d/check_merged_file_age.cfg
|
||||
when: inventory_hostname.startswith('log0')
|
||||
notify:
|
||||
- restart nrpe
|
||||
tags:
|
||||
- nagios_client
|
||||
#
|
||||
# The actual items files here end in .j2 (they are templates)
|
||||
# So when adding or modifying them change the .j2 version in git.
|
||||
#
|
||||
- name: install nrpe check_mysql config for mariadb servers
|
||||
template: src=check_mysql.cfg.j2 dest=/etc/nrpe.d/check_mysql.cfg
|
||||
when: inventory_hostname.startswith('db03')
|
||||
notify:
|
||||
- restart nrpe
|
||||
tags:
|
||||
- nagios_client
|
||||
|
||||
- name: install nrpe checks for proxies
|
||||
template: src={{ item }}.j2 dest=/etc/nrpe.d/{{ item }}
|
||||
with_items:
|
||||
- check_happroxy_conns.cfg
|
||||
- check_happroxy_mirrorlist.cfg
|
||||
- check_varnish_proc.cfg
|
||||
when: inventory_hostname.startswith('proxy')
|
||||
notify:
|
||||
- restart nrpe
|
||||
tags:
|
||||
- nagios_client
|
||||
|
||||
- name: nrpe service start
|
||||
service: name=nrpe state=running enabled=true
|
||||
tags:
|
||||
- service
|
||||
- nagios_client
|
||||
|
||||
- name: Check if the fedmsg group exists
|
||||
shell: /usr/bin/getent group fedmsg | /usr/bin/wc -l | tr -d ' '
|
||||
register: fedmsg_exists
|
||||
check_mode: no
|
||||
changed_when: "1 != 1"
|
||||
tags:
|
||||
- nagios_client
|
||||
|
||||
- name: Add nrpe user to the fedmsg group if it exists
|
||||
user: name=nrpe groups=fedmsg append=yes
|
||||
when: fedmsg_exists.stdout == "1"
|
||||
tags:
|
||||
- nagios_client
|
|
@ -0,0 +1 @@
|
|||
command[check_autocloud_proc]=/usr/lib64/nagios/plugins/check_procs -c 1:1 -C 'python' -a 'autocloud_job.py' -u root
|
4
roles/nagios_client/templates/check_basset.cfg.j2
Normal file
4
roles/nagios_client/templates/check_basset.cfg.j2
Normal file
|
@ -0,0 +1,4 @@
|
|||
command[check_mongo_proc]={{ libdir }}/nagios/plugins/check_procs -s RSD -u mongodb -C mongod -c 1:1
|
||||
command[check_rabbitmq_proc]={{ libdir }}/nagios/plugins/check_procs -s RSD -u rabbitmq -C beam.smp -c 1:1
|
||||
command[check_worker_proc]={{ libdir }}/nagios/plugins/check_procs -s RSD -u basset-worker -C basset-worker -c 1:6
|
||||
command[check_basset_queue]={{ libdir }}/nagios/plugins/check_rabbitmq_size check_submission 10 20
|
1
roles/nagios_client/templates/check_cron.cfg.j2
Normal file
1
roles/nagios_client/templates/check_cron.cfg.j2
Normal file
|
@ -0,0 +1 @@
|
|||
command[check_cron]={{ libdir }}/nagios/plugins/check_procs -c 1:15 -C 'crond' -u root
|
|
@ -0,0 +1,50 @@
|
|||
# Checks on the datanommer history to make sure we're still receiving messages
|
||||
# of all types.
|
||||
#
|
||||
# The following are fedmsg/datanommer checks to be run on busgateway01.
|
||||
# They check for the time since the latest message in any particular category.
|
||||
# The first number is the seconds elapsed until we should raise a warning.
|
||||
# The second number is the seconds elapsed until we should raise an error.
|
||||
# For your reference:
|
||||
# 4 hours -> 14400
|
||||
# 1 day -> 86400
|
||||
# 3 days -> 259200
|
||||
# 1 week -> 604800
|
||||
# 3 weeks -> 1814400
|
||||
# 1 month -> 2628000
|
||||
# 3 months -> 7884000
|
||||
command[check_datanommer_buildsys]={{libdir}}/nagios/plugins/check_datanommer_timesince.py buildsys 14400 86400
|
||||
command[check_datanommer_git]={{libdir}}/nagios/plugins/check_datanommer_timesince.py git 86400 604800
|
||||
command[check_datanommer_bodhi]={{libdir}}/nagios/plugins/check_datanommer_timesince.py bodhi 86400 604800
|
||||
command[check_datanommer_wiki]={{libdir}}/nagios/plugins/check_datanommer_timesince.py wiki 259200 1814400
|
||||
command[check_datanommer_compose]={{libdir}}/nagios/plugins/check_datanommer_timesince.py compose 259200 1814400
|
||||
command[check_datanommer_meetbot]={{libdir}}/nagios/plugins/check_datanommer_timesince.py meetbot 604800 2628000
|
||||
command[check_datanommer_fas]={{libdir}}/nagios/plugins/check_datanommer_timesince.py fas 1814400 2628000
|
||||
command[check_datanommer_pkgdb]={{libdir}}/nagios/plugins/check_datanommer_timesince.py pkgdb 1814400 2628000
|
||||
command[check_datanommer_fedoratagger]={{libdir}}/nagios/plugins/check_datanommer_timesince.py fedoratagger 2628000 7884000
|
||||
command[check_datanommer_planet]={{libdir}}/nagios/plugins/check_datanommer_timesince.py planet 2628000 7884000
|
||||
command[check_datanommer_copr]={{libdir}}/nagios/plugins/check_datanommer_timesince.py copr 21600 86400
|
||||
command[check_datanommer_trac]={{libdir}}/nagios/plugins/check_datanommer_timesince.py trac 86400 259200
|
||||
command[check_datanommer_askbot]={{libdir}}/nagios/plugins/check_datanommer_timesince.py askbot 86400 259200
|
||||
command[check_datanommer_fedbadges]={{libdir}}/nagios/plugins/check_datanommer_timesince.py fedbadges 86400 259200
|
||||
command[check_datanommer_fedocal]={{libdir}}/nagios/plugins/check_datanommer_timesince.py fedocal 7884000 23652000
|
||||
command[check_datanommer_ansible]={{libdir}}/nagios/plugins/check_datanommer_timesince.py ansible 432000 604800
|
||||
command[check_datanommer_summershum]={{libdir}}/nagios/plugins/check_datanommer_timesince.py summershum 604800 1814400
|
||||
command[check_datanommer_jenkins]={{libdir}}/nagios/plugins/check_datanommer_timesince.py jenkins 432000 604800
|
||||
command[check_datanommer_github]={{libdir}}/nagios/plugins/check_datanommer_timesince.py github 432000 604800
|
||||
command[check_datanommer_kerneltest]={{libdir}}/nagios/plugins/check_datanommer_timesince.py kerneltest 604800 1814400
|
||||
command[check_datanommer_fmn]={{libdir}}/nagios/plugins/check_datanommer_timesince.py fmn 604800 1814400
|
||||
command[check_datanommer_anitya]={{libdir}}/nagios/plugins/check_datanommer_timesince.py anitya 604800 1814400
|
||||
command[check_datanommer_fedimg]={{libdir}}/nagios/plugins/check_datanommer_timesince.py fedimg 259200 604800
|
||||
command[check_datanommer_hotness]={{libdir}}/nagios/plugins/check_datanommer_timesince.py hotness 604800 1814400
|
||||
command[check_datanommer_faf]={{libdir}}/nagios/plugins/check_datanommer_timesince.py faf 86400 259200
|
||||
command[check_datanommer_koschei]={{libdir}}/nagios/plugins/check_datanommer_timesince.py koschei 86400 604800
|
||||
command[check_datanommer_autocloud]={{libdir}}/nagios/plugins/check_datanommer_timesince.py autocloud 259200 1814400
|
||||
command[check_datanommer_twoweekatomic]=/usr/lib64/nagios/plugins/check_datanommer_timesince.py org.fedoraproject.prod.releng.atomic.twoweek.complete 1296000 1382400
|
||||
|
||||
# This one is retired since it times out all the time. Too few messages.
|
||||
#command[check_datanommer_nuancier]={{libdir}}/nagios/plugins/check_datanommer_timesince.py nuancier 23652000 31536000
|
||||
|
||||
# These are not actually finished and deployed yet
|
||||
command[check_datanommer_mailman]={{libdir}}/nagios/plugins/check_datanommer_timesince.py mailman 14400 86400
|
||||
command[check_datanommer_bugzilla]={{libdir}}/nagios/plugins/check_datanommer_timesince.py bugzilla 86400 259200
|
7
roles/nagios_client/templates/check_disk.cfg.j2
Normal file
7
roles/nagios_client/templates/check_disk.cfg.j2
Normal file
|
@ -0,0 +1,7 @@
|
|||
command[check_disk_/]={{ libdir }}/nagios/plugins/check_disk -w 14% -c 10% -p /
|
||||
command[check_disk_/boot]={{ libdir }}/nagios/plugins/check_disk -w 15% -c 10% -p /boot
|
||||
command[check_disk_/srv/cache/lookaside]={{ libdir }}/nagios/plugins/check_disk -w 20% -c 10% -p /srv/cache/lookaside
|
||||
command[check_disk_/srv]=/usr/lib64/nagios/plugins/check_disk -w 20% -c 10% -p /srv
|
||||
command[check_disk_/srv/buildmaster]=/usr/lib64/nagios/plugins/check_disk -w 20% -c 10% -p /srv/buildmaster
|
||||
command[check_disk_/srv/taskotron]=/usr/lib64/nagios/plugins/check_disk -w 20% -c 10% -p /srv/taskotron
|
||||
command[check_disk_/var/log]=/usr/lib64/nagios/plugins/check_disk -w 20% -c 15% -p /var/log
|
63
roles/nagios_client/templates/check_fedmsg_consumers.cfg.j2
Normal file
63
roles/nagios_client/templates/check_fedmsg_consumers.cfg.j2
Normal file
|
@ -0,0 +1,63 @@
|
|||
# Fedmsg checks for consumers and producers
|
||||
command[check_fedmsg_cp_busgateway_hub]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-hub Nommer MonitoringProducer
|
||||
command[check_fedmsg_cp_busgateway_relay]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-relay RelayConsumer MonitoringProducer
|
||||
command[check_fedmsg_cp_busgateway_gateway]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-gateway GatewayConsumer MonitoringProducer
|
||||
command[check_fedmsg_cp_anitya_relay]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-relay RelayConsumer MonitoringProducer
|
||||
command[check_fedmsg_cp_app]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-relay RelayConsumer MonitoringProducer
|
||||
command[check_fedmsg_cp_value]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-irc IRCBotConsumer MonitoringProducer
|
||||
command[check_fedmsg_cp_pkgs]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-hub GenACLsConsumer MonitoringProducer
|
||||
command[check_fedmsg_cp_summershum]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-hub SummerShumConsumer MonitoringProducer
|
||||
command[check_fedmsg_cp_badges_backend]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-hub FedoraBadgesConsumer MonitoringProducer
|
||||
command[check_fedmsg_cp_notifs_backend]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-hub FMNConsumer DigestProducer ConfirmationProducer MonitoringProducer
|
||||
command[check_fedmsg_cp_bugzilla2fedmsg]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py moksha-hub BugzillaConsumer MonitoringProducer
|
||||
command[check_fedmsg_cp_fedimg_backend]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-hub FedimgConsumer MonitoringProducer
|
||||
command[check_fedmsg_cp_hotness_backend]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-hub BugzillaTicketFiler MonitoringProducer
|
||||
command[check_fedmsg_cp_bodhi_backend01_hub]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-hub Masher MonitoringProducer
|
||||
command[check_fedmsg_cp_bodhi_backend02_hub]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-hub UpdatesHandler MonitoringProducer
|
||||
command[check_fedmsg_cp_autocloud_backend]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-hub AutoCloudConsumer MonitoringProducer
|
||||
command[check_fedmsg_cp_packages_backend]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-hub CacheInvalidator MonitoringProducer
|
||||
command[check_fedmsg_cp_bugyou_backend]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-hub BugyouConsumer MonitoringProducer
|
||||
command[check_fedmsg_cp_pdc_backend]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-hub PDCUpdater MonitoringProducer
|
||||
|
||||
command[check_fedmsg_cexceptions_busgateway_hub]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-hub Nommer 1 10
|
||||
command[check_fedmsg_cexceptions_busgateway_relay]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-relay RelayConsumer 1 10
|
||||
command[check_fedmsg_cexceptions_busgateway_gateway]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-gateway GatewayConsumer 1 10
|
||||
command[check_fedmsg_cexceptions_anitya_relay]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-relay RelayConsumer 1 10
|
||||
command[check_fedmsg_cexceptions_app]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-relay RelayConsumer 1 10
|
||||
command[check_fedmsg_cexceptions_value]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-irc IRCBotConsumer 1 10
|
||||
command[check_fedmsg_cexceptions_pkgs]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-hub GenACLsConsumer 1 10
|
||||
command[check_fedmsg_cexceptions_summershum]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-hub SummerShumConsumer 1 10
|
||||
command[check_fedmsg_cexceptions_badges_backend]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-hub FedoraBadgesConsumer 1 10
|
||||
command[check_fedmsg_cexceptions_notifs_backend]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-hub FMNConsumer 1 10
|
||||
command[check_fedmsg_cexceptions_bugzilla2fedmsg]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py moksha-hub BugzillaConsumer 1 10
|
||||
command[check_fedmsg_cexceptions_fedimg_backend]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-hub FedimgConsumer 1 10
|
||||
command[check_fedmsg_cexceptions_hotness_backend]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-hub BugzillaTicketFiler 1 10
|
||||
command[check_fedmsg_cexceptions_bodhi_backend01_hub]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-hub Masher 1 10
|
||||
command[check_fedmsg_cexceptions_bodhi_backend02_hub]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-hub UpdatesHandler 1 10
|
||||
command[check_fedmsg_cexceptions_autocloud_backend]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-hub AutoCloudConsumer 1 10
|
||||
command[check_fedmsg_cexceptions_packages_backend]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-hub CacheInvalidator 1 10
|
||||
command[check_fedmsg_cexceptions_bugyou_backend]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-hub BugyouConsumer 1 10
|
||||
command[check_fedmsg_cexceptions_pdc_backend]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-hub PDCUpdater 1 10
|
||||
|
||||
command[check_fedmsg_cbacklog_busgateway_hub]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-hub Nommer 500 1000
|
||||
command[check_fedmsg_cbacklog_busgateway_relay]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-relay RelayConsumer 10 50
|
||||
command[check_fedmsg_cbacklog_busgateway_gateway]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-gateway GatewayConsumer 10 50
|
||||
command[check_fedmsg_cbacklog_anitya_relay]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-relay RelayConsumer 10 50
|
||||
command[check_fedmsg_cbacklog_app]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-relay RelayConsumer 10 50
|
||||
command[check_fedmsg_cbacklog_value]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-irc IRCBotConsumer 10 50
|
||||
command[check_fedmsg_cbacklog_pkgs]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-hub GenACLsConsumer 10 50
|
||||
command[check_fedmsg_cbacklog_summershum]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-hub SummerShumConsumer 100 500
|
||||
command[check_fedmsg_cbacklog_badges_backend]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-hub FedoraBadgesConsumer 7000 10000
|
||||
command[check_fedmsg_cbacklog_notifs_backend]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-hub FMNConsumer 15000 20000
|
||||
command[check_fedmsg_cbacklog_bugzilla2fedmsg]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py moksha-hub BugzillaConsumer 10 100
|
||||
command[check_fedmsg_cbacklog_fedimg_backend]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-hub FedimgConsumer 2000 5000
|
||||
command[check_fedmsg_cbacklog_hotness_backend]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-hub BugzillaTicketFiler 1000 5000
|
||||
command[check_fedmsg_cbacklog_bodhi_backend01_hub]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-hub Masher 500 1000
|
||||
command[check_fedmsg_cbacklog_bodhi_backend02_hub]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-hub UpdatesHandler 500 1000
|
||||
command[check_fedmsg_cbacklog_autocloud_backend]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-hub AutoCloudConsumer 100 500
|
||||
command[check_fedmsg_cbacklog_packages_backend]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-hub CacheInvalidator 20000 30000
|
||||
command[check_fedmsg_cbacklog_bugyou_backend]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-hub BugyouConsumer 5000 10000
|
||||
command[check_fedmsg_cbacklog_pdc_backend]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-hub PDCUpdater 10000 20000
|
||||
|
||||
command[check_fedmsg_fmn_digest_last_ran]={{libdir}}/nagios/plugins/check_fedmsg_producer_last_ran.py fedmsg-hub DigestProducer 90 600
|
||||
command[check_fedmsg_fmn_confirm_last_ran]={{libdir}}/nagios/plugins/check_fedmsg_producer_last_ran.py fedmsg-hub ConfirmationProducer 90 600
|
|
@ -0,0 +1 @@
|
|||
command[check_fedmsg_gateway_proc]={{ libdir }}/nagios/plugins/check_procs -c 1:1 -C 'fedmsg-gateway' -u fedmsg
|
|
@ -0,0 +1 @@
|
|||
command[check_fedmsg_hub_proc]={{ libdir }}/nagios/plugins/check_procs -c 1:1 -C 'fedmsg-hub' -u fedmsg
|
|
@ -0,0 +1 @@
|
|||
command[check_fedmsg_hub_procs_bugyou]={{ libdir }}/nagios/plugins/check_procs -c 3:3 -C 'fedmsg-hub' -u fedmsg
|
|
@ -0,0 +1 @@
|
|||
command[check_fedmsg_irc_proc]={{ libdir }}/nagios/plugins/check_procs -c 1:1 -C 'fedmsg-irc' -u fedmsg
|
|
@ -0,0 +1 @@
|
|||
command[check_fedmsg_masher_proc]={{ libdir }}/nagios/plugins/check_procs -c 1:1 -C 'fedmsg-hub' -u apache
|
|
@ -0,0 +1 @@
|
|||
command[check_fedmsg_relay_proc]={{ libdir }}/nagios/plugins/check_procs -c 1:1 -C 'fedmsg-relay' -u fedmsg
|
2
roles/nagios_client/templates/check_fmn.cfg.j2
Normal file
2
roles/nagios_client/templates/check_fmn.cfg.j2
Normal file
|
@ -0,0 +1,2 @@
|
|||
command[check_fmn_worker_queue]={{ libdir }}/nagios/plugins/check_rabbitmq_size workers 200 1000
|
||||
command[check_fmn_backend_queue]={{ libdir }}/nagios/plugins/check_rabbitmq_size backends 100 200
|
|
@ -0,0 +1 @@
|
|||
command[check_haproxy_conns]=/usr/lib64/nagios/plugins/check_haproxy_conns.py
|
|
@ -0,0 +1 @@
|
|||
command[check_haproxy_mirrorlist]=/usr/lib64/nagios/plugins/check_haproxy_mirrorlist.py
|
1
roles/nagios_client/templates/check_ipa.cfg.j2
Normal file
1
roles/nagios_client/templates/check_ipa.cfg.j2
Normal file
|
@ -0,0 +1 @@
|
|||
command[check_ipa_replication]={{ libdir }}/nagios/plugins/check_ipa_replication -u ldaps://localhost/
|
|
@ -0,0 +1 @@
|
|||
command[check_koschei_polling_proc]={{ libdir }}/nagios/plugins/check_procs -s RSD -u koschei -C koschei-polling -c 1:1
|
|
@ -0,0 +1 @@
|
|||
command[check_koschei_resolver_proc]={{ libdir }}/nagios/plugins/check_procs -s RSD -u koschei -C koschei-resolve -c 1:1
|
|
@ -0,0 +1 @@
|
|||
command[check_koschei_scheduler_proc]={{ libdir }}/nagios/plugins/check_procs -s RSD -u koschei -C koschei-schedul -c 1:1
|
|
@ -0,0 +1 @@
|
|||
command[check_koschei_watcher_proc]={{ libdir }}/nagios/plugins/check_procs -s RSD -u koschei -C koschei-watcher -c 1:1
|
1
roles/nagios_client/templates/check_lock.cfg.j2
Normal file
1
roles/nagios_client/templates/check_lock.cfg.j2
Normal file
|
@ -0,0 +1 @@
|
|||
command[check_lock]={{ libdir }}/nagios/plugins/check_lock
|
1
roles/nagios_client/templates/check_lock_file_age.cfg.j2
Normal file
1
roles/nagios_client/templates/check_lock_file_age.cfg.j2
Normal file
|
@ -0,0 +1 @@
|
|||
command[check_lock_file_age]={{ libdir }}/nagios/plugins/check_lock_file_age -w 1 -c 5 -f /var/lock/fedora-ca/lock
|
2
roles/nagios_client/templates/check_memcache.cfg.j2
Normal file
2
roles/nagios_client/templates/check_memcache.cfg.j2
Normal file
|
@ -0,0 +1,2 @@
|
|||
command[check_memcache]=/usr/lib64/nagios/plugins/check_procs -c 1:1 -a '/usr/bin/memcached' -u memcached
|
||||
command[check_memcache_connect]=/usr/lib64/nagios/plugins/check_memcache_connect
|
|
@ -0,0 +1 @@
|
|||
command[check_merged_file_age]=/usr/lib64/nagios/plugins/check_file_age -w 120 -c 300 /var/log/merged/messages.log
|
|
@ -0,0 +1 @@
|
|||
command[check_mirrorlist_cache]={{ libdir }}/nagios/plugins/check_file_age -w 14400 -c 129600 -f /var/lib/mirrormanager/mirrorlist_cache.pkl
|
1
roles/nagios_client/templates/check_mysql.cfg.j2
Normal file
1
roles/nagios_client/templates/check_mysql.cfg.j2
Normal file
|
@ -0,0 +1 @@
|
|||
command[check_mysql_backup]={{ libdir }}/nagios/plugins/check_file_age -w 86400 -c 129600 -f /backups/fpo-mediawiki-latest.xz
|
1
roles/nagios_client/templates/check_openvpn_link.cfg.j2
Normal file
1
roles/nagios_client/templates/check_openvpn_link.cfg.j2
Normal file
|
@ -0,0 +1 @@
|
|||
command[check_openvpn_link]={{ libdir }}/nagios/plugins/check_ping -H 192.168.1.41 -w 375.0,20% -c 500,60%
|
2
roles/nagios_client/templates/check_osbs.cfg.j2
Normal file
2
roles/nagios_client/templates/check_osbs.cfg.j2
Normal file
|
@ -0,0 +1,2 @@
|
|||
command[check_osbs_builds]={{ libdir }}/nagios/plugins/check_osbs_builds.py
|
||||
command[check_osbs_api]={{ libdir }}/nagios/plugins/check_osbs_api.py
|
1
roles/nagios_client/templates/check_postfix_queue.cfg.j2
Normal file
1
roles/nagios_client/templates/check_postfix_queue.cfg.j2
Normal file
|
@ -0,0 +1 @@
|
|||
command[check_postfix_queue]={{ libdir }}/nagios/plugins/check_postfix_queue -w {{ nrpe_check_postfix_queue_warn }} -c {{ nrpe_check_postfix_queue_crit }}
|
1
roles/nagios_client/templates/check_raid.cfg.j2
Normal file
1
roles/nagios_client/templates/check_raid.cfg.j2
Normal file
|
@ -0,0 +1 @@
|
|||
command[check_raid]={{ libdir }}/nagios/plugins/check_raid.py
|
1
roles/nagios_client/templates/check_readonly_fs.cfg.j2
Normal file
1
roles/nagios_client/templates/check_readonly_fs.cfg.j2
Normal file
|
@ -0,0 +1 @@
|
|||
command[check_readonly_fs]=/usr/lib64/nagios/plugins/check_readonly_fs
|
1
roles/nagios_client/templates/check_redis_proc.cfg.j2
Normal file
1
roles/nagios_client/templates/check_redis_proc.cfg.j2
Normal file
|
@ -0,0 +1 @@
|
|||
command[check_redis_proc]=/usr/lib64/nagios/plugins/check_procs -c 1:1 -C 'redis-server' -u redis
|
|
@ -0,0 +1 @@
|
|||
command[check_supybot_fedmsg_plugin]={{libdir}}/nagios/plugins/check_supybot_plugin -t fedmsg
|
1
roles/nagios_client/templates/check_swap.cfg.j2
Normal file
1
roles/nagios_client/templates/check_swap.cfg.j2
Normal file
|
@ -0,0 +1 @@
|
|||
command[check_swap]={{ libdir }}/nagios/plugins/check_swap -w 15% -c 10%
|
1
roles/nagios_client/templates/check_testcloud.cfg.j2
Normal file
1
roles/nagios_client/templates/check_testcloud.cfg.j2
Normal file
|
@ -0,0 +1 @@
|
|||
command[check_testcloud]={{ libdir }}/nagios/plugins/check_testcloud
|
1
roles/nagios_client/templates/check_unbound_proc.cfg.j2
Normal file
1
roles/nagios_client/templates/check_unbound_proc.cfg.j2
Normal file
|
@ -0,0 +1 @@
|
|||
command[check_unbound_proc]={{ libdir }}/nagios/plugins/check_procs -c 1:1 -C 'unbound' -u unbound
|
1
roles/nagios_client/templates/check_varnish_proc.cfg.j2
Normal file
1
roles/nagios_client/templates/check_varnish_proc.cfg.j2
Normal file
|
@ -0,0 +1 @@
|
|||
command[check_varnish_proc]=/usr/lib64/nagios/plugins/check_procs -c 1:2 -C 'varnishd' -u varnish
|
228
roles/nagios_client/templates/nrpe.cfg.j2
Normal file
228
roles/nagios_client/templates/nrpe.cfg.j2
Normal file
|
@ -0,0 +1,228 @@
|
|||
#############################################################################
|
||||
# Sample NRPE Config File
|
||||
# Written by: Ethan Galstad (nagios@nagios.org)
|
||||
#
|
||||
# Last Modified: 11-23-2007
|
||||
#
|
||||
# NOTES:
|
||||
# This is a sample configuration file for the NRPE daemon. It needs to be
|
||||
# located on the remote host that is running the NRPE daemon, not the host
|
||||
# from which the check_nrpe client is being executed.
|
||||
#############################################################################
|
||||
|
||||
|
||||
# LOG FACILITY
|
||||
# The syslog facility that should be used for logging purposes.
|
||||
|
||||
log_facility=daemon
|
||||
|
||||
|
||||
|
||||
# PID FILE
|
||||
# The name of the file in which the NRPE daemon should write it's process ID
|
||||
# number. The file is only written if the NRPE daemon is started by the root
|
||||
# user and is running in standalone mode.
|
||||
|
||||
pid_file=/var/run/nrpe/nrpe.pid
|
||||
|
||||
|
||||
|
||||
# PORT NUMBER
|
||||
# Port number we should wait for connections on.
|
||||
# NOTE: This must be a non-priviledged port (i.e. > 1024).
|
||||
# NOTE: This option is ignored if NRPE is running under either inetd or xinetd
|
||||
|
||||
server_port=5666
|
||||
|
||||
|
||||
|
||||
# SERVER ADDRESS
|
||||
# Address that nrpe should bind to in case there are more than one interface
|
||||
# and you do not want nrpe to bind on all interfaces.
|
||||
# NOTE: This option is ignored if NRPE is running under either inetd or xinetd
|
||||
|
||||
#server_address=127.0.0.1
|
||||
|
||||
|
||||
|
||||
# NRPE USER
|
||||
# This determines the effective user that the NRPE daemon should run as.
|
||||
# You can either supply a username or a UID.
|
||||
#
|
||||
# NOTE: This option is ignored if NRPE is running under either inetd or xinetd
|
||||
|
||||
nrpe_user=nrpe
|
||||
|
||||
|
||||
|
||||
# NRPE GROUP
|
||||
# This determines the effective group that the NRPE daemon should run as.
|
||||
# You can either supply a group name or a GID.
|
||||
#
|
||||
# NOTE: This option is ignored if NRPE is running under either inetd or xinetd
|
||||
|
||||
nrpe_group=nrpe
|
||||
|
||||
|
||||
|
||||
# ALLOWED HOST ADDRESSES
|
||||
# This is an optional comma-delimited list of IP address or hostnames
|
||||
# that are allowed to talk to the NRPE daemon. Network addresses with a bit mask
|
||||
# (i.e. 192.168.1.0/24) are also supported. Hostname wildcards are not currently
|
||||
# supported.
|
||||
#
|
||||
# Note: The daemon only does rudimentary checking of the client's IP
|
||||
# address. I would highly recommend adding entries in your /etc/hosts.allow
|
||||
# file to allow only the specified host to connect to the port
|
||||
# you are running this daemon on.
|
||||
#
|
||||
# NOTE: This option is ignored if NRPE is running under either inetd or xinetd
|
||||
|
||||
|
||||
allowed_hosts=10.5.126.41,192.168.1.10,192.168.1.20,209.132.181.35
|
||||
|
||||
|
||||
|
||||
# COMMAND ARGUMENT PROCESSING
|
||||
# This option determines whether or not the NRPE daemon will allow clients
|
||||
# to specify arguments to commands that are executed. This option only works
|
||||
# if the daemon was configured with the --enable-command-args configure script
|
||||
# option.
|
||||
#
|
||||
# *** ENABLING THIS OPTION IS A SECURITY RISK! ***
|
||||
# Read the SECURITY file for information on some of the security implications
|
||||
# of enabling this variable.
|
||||
#
|
||||
# Values: 0=do not allow arguments, 1=allow command arguments
|
||||
|
||||
dont_blame_nrpe=0
|
||||
|
||||
|
||||
|
||||
# COMMAND PREFIX
|
||||
# This option allows you to prefix all commands with a user-defined string.
|
||||
# A space is automatically added between the specified prefix string and the
|
||||
# command line from the command definition.
|
||||
#
|
||||
# *** THIS EXAMPLE MAY POSE A POTENTIAL SECURITY RISK, SO USE WITH CAUTION! ***
|
||||
# Usage scenario:
|
||||
# Execute restricted commmands using sudo. For this to work, you need to add
|
||||
# the nagios user to your /etc/sudoers. An example entry for alllowing
|
||||
# execution of the plugins from might be:
|
||||
#
|
||||
# nagios ALL=(ALL) NOPASSWD: /usr/lib/nagios/plugins/
|
||||
#
|
||||
# This lets the nagios user run all commands in that directory (and only them)
|
||||
# without asking for a password. If you do this, make sure you don't give
|
||||
# random users write access to that directory or its contents!
|
||||
|
||||
# command_prefix=/usr/bin/sudo
|
||||
|
||||
|
||||
|
||||
# DEBUGGING OPTION
|
||||
# This option determines whether or not debugging messages are logged to the
|
||||
# syslog facility.
|
||||
# Values: 0=debugging off, 1=debugging on
|
||||
|
||||
debug=0
|
||||
|
||||
|
||||
|
||||
# COMMAND TIMEOUT
|
||||
# This specifies the maximum number of seconds that the NRPE daemon will
|
||||
# allow plugins to finish executing before killing them off.
|
||||
|
||||
command_timeout=100
|
||||
|
||||
|
||||
|
||||
# CONNECTION TIMEOUT
|
||||
# This specifies the maximum number of seconds that the NRPE daemon will
|
||||
# wait for a connection to be established before exiting. This is sometimes
|
||||
# seen where a network problem stops the SSL being established even though
|
||||
# all network sessions are connected. This causes the nrpe daemons to
|
||||
# accumulate, eating system resources. Do not set this too low.
|
||||
|
||||
connection_timeout=300
|
||||
|
||||
|
||||
|
||||
# WEEK RANDOM SEED OPTION
|
||||
# This directive allows you to use SSL even if your system does not have
|
||||
# a /dev/random or /dev/urandom (on purpose or because the necessary patches
|
||||
# were not applied). The random number generator will be seeded from a file
|
||||
# which is either a file pointed to by the environment valiable $RANDFILE
|
||||
# or $HOME/.rnd. If neither exists, the pseudo random number generator will
|
||||
# be initialized and a warning will be issued.
|
||||
# Values: 0=only seed from /dev/[u]random, 1=also seed from weak randomness
|
||||
|
||||
#allow_weak_random_seed=1
|
||||
|
||||
|
||||
|
||||
# INCLUDE CONFIG FILE
|
||||
# This directive allows you to include definitions from an external config file.
|
||||
|
||||
#include=<somefile.cfg>
|
||||
|
||||
|
||||
|
||||
# INCLUDE CONFIG DIRECTORY
|
||||
# This directive allows you to include definitions from config files (with a
|
||||
# .cfg extension) in one or more directories (with recursion).
|
||||
|
||||
include_dir=/etc/nrpe.d/
|
||||
|
||||
|
||||
|
||||
# COMMAND DEFINITIONS
|
||||
# Command definitions that this daemon will run. Definitions
|
||||
# are in the following format:
|
||||
#
|
||||
# command[<command_name>]=<command_line>
|
||||
#
|
||||
# When the daemon receives a request to return the results of <command_name>
|
||||
# it will execute the command specified by the <command_line> argument.
|
||||
#
|
||||
# Unlike Nagios, the command line cannot contain macros - it must be
|
||||
# typed exactly as it should be executed.
|
||||
#
|
||||
# Note: Any plugins that are used in the command lines must reside
|
||||
# on the machine that this daemon is running on! The examples below
|
||||
# assume that you have plugins installed in a /usr/local/nagios/libexec
|
||||
# directory. Also note that you will have to modify the definitions below
|
||||
# to match the argument format the plugins expect. Remember, these are
|
||||
# examples only!
|
||||
|
||||
|
||||
# The following examples use hardcoded command arguments...
|
||||
|
||||
command[check_users]={{ libdir }}/nagios/plugins/check_users -w 5 -c 10
|
||||
command[check_load]={{ libdir }}/nagios/plugins/check_load -w 15,10,5 -c 30,25,20
|
||||
command[check_hda1]={{ libdir }}/nagios/plugins/check_disk -w 20% -c 10% -p /dev/hda1
|
||||
{% if inventory_hostname not in groups['zombie-infested'] %}
|
||||
command[check_zombie_procs]={{ libdir }}/nagios/plugins/check_procs -w 5 -c 10 -s Z
|
||||
{% else %}
|
||||
# This host is prone to Zombies and we do not care or want to alert on it so we make the limits very high
|
||||
command[check_zombie_procs]={{ libdir }}/nagios/plugins/check_procs -w 50000 -c 100000 -s Z
|
||||
{% endif %}
|
||||
command[check_total_procs]={{ libdir }}/nagios/plugins/check_procs -w {{ nrpe_procs_warn }} -c {{ nrpe_procs_crit }}
|
||||
|
||||
|
||||
# The following examples allow user-supplied arguments and can
|
||||
# only be used if the NRPE daemon was compiled with support for
|
||||
# command arguments *AND* the dont_blame_nrpe directive in this
|
||||
# config file is set to '1'. This poses a potential security risk, so
|
||||
# make sure you read the SECURITY file before doing this.
|
||||
|
||||
#command[check_users]=/usr/lib64/nagios/plugins/check_users -w $ARG1$ -c $ARG2$
|
||||
#command[check_load]=/usr/lib64/nagios/plugins/check_load -w $ARG1$ -c $ARG2$
|
||||
#command[check_disk]=/usr/lib64/nagios/plugins/check_disk -w $ARG1$ -c $ARG2$ -p $ARG3$
|
||||
#command[check_procs]=/usr/lib64/nagios/plugins/check_procs -w $ARG1$ -c $ARG2$ -s $ARG3$
|
||||
|
||||
|
||||
# NEVER ADD ANYTHING HERE - ANY ENTRIES TO NRPE SHOULD BE in .cfg files in /etc/nrpe.d/
|
||||
|
||||
# NEVER NEVER NEVER
|
||||
#
|
78
roles/nagios_server/README.rst
Normal file
78
roles/nagios_server/README.rst
Normal file
|
@ -0,0 +1,78 @@
|
|||
===================================
|
||||
Nagios 4 Configuration for Fedora
|
||||
===================================
|
||||
|
||||
The Fedora Infrastructure Nagios is built on a set of configurations
|
||||
originally written for Nagios 2 and then upgraded over time to Nagios
|
||||
3 and then 4.08. With additional changes made in the 4.2 series of
|
||||
Nagios this needed a better rewrite as various parts came from
|
||||
pre-puppet and then various puppet modules added on top.
|
||||
|
||||
In order to get this rewrite done, we will use as much of the original
|
||||
layout of the Fedora ansible nagios module but with rewrites to better
|
||||
match current Nagios configurations so that it can be maintained.
|
||||
|
||||
Role directory layout
|
||||
=====================
|
||||
The original layout branched out from
|
||||
|
||||
roles/nagios/client/
|
||||
roles/nagios/server/
|
||||
|
||||
With the usual trees below this. This breaks ansible best practices
|
||||
and how most new modules are set up so the rewrite uses:
|
||||
|
||||
roles/nagios_client/
|
||||
roles/nagios_server/
|
||||
|
||||
=====================
|
||||
Nagios Server Files
|
||||
=====================
|
||||
|
||||
The Nagios Server Files require a large layout change. The original
|
||||
Nagios system used multiple independant modes and files which caused
|
||||
problems when hosts were removed. The new system will use hosts set up
|
||||
from the Fedora Ansible Inventory with hostgroups set up to match
|
||||
groups.
|
||||
|
||||
roles/nagios_server/{files,handlers,tasks,templates}
|
||||
|
||||
r.../n.../files/httpd ==> /etc/httpd/conf.d files
|
||||
r.../n.../files/nagios ==> /etc/nagios/ files
|
||||
r.../n.../files/nagios/commands command files
|
||||
r.../n.../files/nagios/hosts host files
|
||||
r.../n.../files/nagios/hostgroups groups made from hosts
|
||||
r.../n.../files/nagios/services services
|
||||
r.../n.../files/nagios/servicegroups groups made from services
|
||||
r.../n.../files/nagios/contacts files for people
|
||||
r.../n.../files/nagios/contactgroups groups made from contacts
|
||||
|
||||
similar layout for templates
|
||||
handlers has the ways to restart and check configuration
|
||||
tasks has the main rules for building stuff.
|
||||
|
||||
===================
|
||||
Nagios Module Steps
|
||||
===================
|
||||
|
||||
1. Check to see if the nagios user is configured. Someone years ago
|
||||
chose that our monitoring uses UID/GID 420. Har Har.
|
||||
Setup any other groups and permissions
|
||||
2. Install the needed packages for the server.
|
||||
3. Setup the directories on the server
|
||||
/etc/nagios/{child}
|
||||
4. Synchonise over the static files
|
||||
/etc/nagios/commands/
|
||||
/etc/nagios/services/
|
||||
/etc/nagios/servicegroups/
|
||||
/etc/nagios/contacts/
|
||||
/etc/nagios/contactgroups/
|
||||
/usr/lib64/nagios/plugins/
|
||||
/usr/local/bin
|
||||
/usr/share/nagios/html/
|
||||
5. Build template files
|
||||
/etc/nagios/commands/
|
||||
/etc/nagios/hosts/{ansible-inventory, ansible-vars, other}
|
||||
/etc/nagios/hostgroups/
|
||||
6. Fix selinux policy
|
||||
7. Restart services
|
36
roles/nagios_server/files/httpd/nagios.conf
Normal file
36
roles/nagios_server/files/httpd/nagios.conf
Normal file
|
@ -0,0 +1,36 @@
|
|||
# noc1
|
||||
ScriptAlias /nagios/cgi-bin/ /usr/lib64/nagios/cgi-bin/
|
||||
|
||||
# noc2
|
||||
ScriptAlias /nagios-external/cgi-bin/ /usr/lib64/nagios/cgi-bin/
|
||||
|
||||
# test
|
||||
ScriptAlias /nagios-just-a-test/cgi-bin/ /usr/lib64/nagios/cgi-bin/
|
||||
|
||||
ScriptAlias /tac.cgi /usr/lib64/nagios/cgi-bin/tac.cgi
|
||||
|
||||
<Location />
|
||||
AuthName "Nagios GSSAPI Login"
|
||||
GssapiCredStore keytab:/etc/krb5.HTTP_admin.fedoraproject.org.keytab
|
||||
AuthType GSSAPI
|
||||
# This is off because Apache (and thus mod_auth_gssapi) doesn't know this is proxied over TLS
|
||||
GssapiSSLonly Off
|
||||
GssapiLocalName on
|
||||
Require valid-user
|
||||
</Location>
|
||||
|
||||
<Location ~ "/(nagios|nagios-external|nagios-just-a-test)/cgi-bin/">
|
||||
Options ExecCGI
|
||||
</Location>
|
||||
|
||||
<Directory "/usr/share/nagios/html">
|
||||
Options None
|
||||
</Directory>
|
||||
|
||||
Alias /nagios /usr/share/nagios/html/
|
||||
|
||||
# This will only affect noc2 because the proxies only forward -external to it.
|
||||
Alias /nagios-external /usr/share/nagios/html/
|
||||
|
||||
# Test
|
||||
Alias /nagios-test /usr/share/nagios/html/
|
8
roles/nagios_server/files/nagios/commands/bzr.cfg
Normal file
8
roles/nagios_server/files/nagios/commands/bzr.cfg
Normal file
|
@ -0,0 +1,8 @@
|
|||
# 'check_bzr' command definition
|
||||
# I'd like this to actually interact with BZR, but I can't find any
|
||||
# proper documentation on the protocol to craft send/expect/quit
|
||||
# strings.
|
||||
define command{
|
||||
command_name check_bzr
|
||||
command_line $USER1$/check_tcp -H $HOSTADDRESS$ -p 4155
|
||||
}
|
15
roles/nagios_server/files/nagios/commands/disk.cfg
Normal file
15
roles/nagios_server/files/nagios/commands/disk.cfg
Normal file
|
@ -0,0 +1,15 @@
|
|||
define command {
|
||||
command_name check_by_ssh_check_raid
|
||||
command_line $USER1$/check_by_ssh -t 30 -H $HOSTADDRESS$ "$USER1$/check_raid.py"
|
||||
}
|
||||
|
||||
define command {
|
||||
command_name check_by_ssh_check_disk
|
||||
command_line $USER1$/check_by_ssh -t 30 -H $HOSTADDRESS$ "$USER1$/check_disk -w $ARG1$% -c $ARG2$% -p $ARG3$"
|
||||
}
|
||||
|
||||
# 'check_postgres_conns' command definition
|
||||
define command{
|
||||
command_name check_postgres_conns
|
||||
command_line $USER1$/check_by_ssh -t 30 -H $HOSTADDRESS$ "$USER1$/check_procs -u postgres -w $ARG1$ -c $ARG2$ -a $ARG3$"
|
||||
}
|
11
roles/nagios_server/files/nagios/commands/dns.cfg
Normal file
11
roles/nagios_server/files/nagios/commands/dns.cfg
Normal file
|
@ -0,0 +1,11 @@
|
|||
# 'check_dns' command definition
|
||||
define command{
|
||||
command_name check_dns
|
||||
command_line $USER1$/check_dns -H www.yahoo.com -s $HOSTADDRESS$
|
||||
}
|
||||
|
||||
# 'check_dns_fpo' command definition
|
||||
define command{
|
||||
command_name check_dns_fpo
|
||||
command_line $USER1$/check_dns -t 30 -H fedoraproject.org -A -s $HOSTADDRESS$
|
||||
}
|
8
roles/nagios_server/files/nagios/commands/git.cfg
Normal file
8
roles/nagios_server/files/nagios/commands/git.cfg
Normal file
|
@ -0,0 +1,8 @@
|
|||
# 'check_git' command definition
|
||||
# I'd like this to actually interact with GIT, but I can't find any
|
||||
# proper documentation on the protocol to craft send/expect/quit
|
||||
# strings.
|
||||
define command{
|
||||
command_name check_git
|
||||
command_line $USER1$/check_tcp -H $HOSTADDRESS$ -p 9418
|
||||
}
|
79
roles/nagios_server/files/nagios/commands/httpd.cfg
Normal file
79
roles/nagios_server/files/nagios/commands/httpd.cfg
Normal file
|
@ -0,0 +1,79 @@
|
|||
##
|
||||
## This file has the commands to check and restart general httpd services
|
||||
## and websites.
|
||||
##
|
||||
|
||||
################################################################################
|
||||
# COMMAND DEFINITIONS
|
||||
#
|
||||
# SYNTAX:
|
||||
#
|
||||
# define command{
|
||||
# template <templatename>
|
||||
# name <objectname>
|
||||
# command_name <commandname>
|
||||
# command_line <commandline>
|
||||
# }
|
||||
#
|
||||
# WHERE:
|
||||
#
|
||||
# <templatename> = object name of another command definition that should be
|
||||
# used as a template for this definition (optional)
|
||||
# <objectname> = object name of command definition, referenced by other
|
||||
# command definitions that use it as a template (optional)
|
||||
# <commandname> = name of the command, as recognized/used by Nagios
|
||||
# <commandline> = command line
|
||||
#
|
||||
################################################################################
|
||||
|
||||
# 'reload httpd'
|
||||
define command {
|
||||
command_name restart_httpd
|
||||
command_line $USER1$/restart_httpd $SERVICESTATE$ $SERVICESTATETYPE$ $SERVICEATTEMPT$ $HOSTADDRESS$ "$HOSTALIAS$" "$SERVICEDESC$" "$SERVICESTATE$"
|
||||
}
|
||||
|
||||
|
||||
#
|
||||
# 'check_website_publiclist' command definition
|
||||
define command{
|
||||
command_name check_website_publiclist
|
||||
command_line $USER1$/check_http -w 60 -c 80 -I $HOSTADDRESS$ -H $ARG1$ -u "$ARG2$" -s "$ARG3$"
|
||||
}
|
||||
|
||||
# 'check_website' command definition
|
||||
define command{
|
||||
command_name check_website
|
||||
command_line $USER1$/check_http -w 30 -c 40 -I $HOSTADDRESS$ -H $ARG1$ -u "$ARG2$" -s "$ARG3$"
|
||||
}
|
||||
|
||||
define command{
|
||||
command_name check_website_ppc
|
||||
command_line $USER1$/check_http -w 300 -c 400 -I $HOSTADDRESS$ -H $ARG1$ -u "$ARG2$" -s "$ARG3$"
|
||||
}
|
||||
|
||||
define command{
|
||||
command_name check_website_ssl
|
||||
command_line $USER1$/check_http -w 30 -c 40 --ssl -I $HOSTADDRESS$ -H $ARG1$ -u $ARG2$ -s "$ARG3$"
|
||||
}
|
||||
|
||||
define command{
|
||||
command_name check_ssl_cert
|
||||
command_line $USER1$/check_http -I $HOSTADDRESS$ -H $ARG1$ -C $ARG2$
|
||||
}
|
||||
|
||||
define command{
|
||||
command_name check_website_publiclist_ssl
|
||||
command_line $USER1$/check_http -w 40 -c 60 --ssl -I $HOSTADDRESS$ -H $ARG1$ -u $ARG2$ -s "$ARG3$"
|
||||
}
|
||||
|
||||
# 'check_http' command definition
|
||||
define command{
|
||||
command_name check_http
|
||||
command_line $USER1$/check_http -H $HOSTADDRESS$
|
||||
}
|
||||
|
||||
# 'check_https' command definition
|
||||
define command{
|
||||
command_name check_https
|
||||
command_line $USER1$/check_http -H $HOSTADDRESS$ --ssl
|
||||
}
|
29
roles/nagios_server/files/nagios/commands/koji.cfg
Normal file
29
roles/nagios_server/files/nagios/commands/koji.cfg
Normal file
|
@ -0,0 +1,29 @@
|
|||
################################################################################
|
||||
# COMMAND DEFINITIONS
|
||||
#
|
||||
# SYNTAX:
|
||||
#
|
||||
# define command{
|
||||
# template <templatename>
|
||||
# name <objectname>
|
||||
# command_name <commandname>
|
||||
# command_line <commandline>
|
||||
# }
|
||||
#
|
||||
# WHERE:
|
||||
#
|
||||
# <templatename> = object name of another command definition that should be
|
||||
# used as a template for this definition (optional)
|
||||
# <objectname> = object name of command definition, referenced by other
|
||||
# command definitions that use it as a template (optional)
|
||||
# <commandname> = name of the command, as recognized/used by Nagios
|
||||
# <commandline> = command line
|
||||
#
|
||||
################################################################################
|
||||
|
||||
# 'check_koji'
|
||||
define command{
|
||||
command_name check_koji
|
||||
command_line $USER1$/check_koji
|
||||
}
|
||||
|
36
roles/nagios_server/files/nagios/commands/local.cfg
Normal file
36
roles/nagios_server/files/nagios/commands/local.cfg
Normal file
|
@ -0,0 +1,36 @@
|
|||
# 'check_local_disk' command definition
|
||||
define command{
|
||||
command_name check_local_disk
|
||||
command_line $USER1$/check_disk -w $ARG1$ -c $ARG2$ -p $ARG3$
|
||||
}
|
||||
|
||||
# 'check_local_load' command definition
|
||||
define command{
|
||||
command_name check_local_load
|
||||
command_line $USER1$/check_load -w $ARG1$ -c $ARG2$
|
||||
}
|
||||
|
||||
# 'check_local_procs' command definition
|
||||
define command{
|
||||
command_name check_local_procs
|
||||
command_line $USER1$/check_procs -w $ARG1$ -c $ARG2$ -s $ARG3$
|
||||
}
|
||||
|
||||
# 'check_local_users' command definition
|
||||
define command{
|
||||
command_name check_local_users
|
||||
command_line $USER1$/check_users -w $ARG1$ -c $ARG2$
|
||||
}
|
||||
|
||||
# 'check_local_swap' command definition
|
||||
define command{
|
||||
command_name check_local_swap
|
||||
command_line $USER1$/check_swap -w $ARG1$ -c $ARG2$
|
||||
}
|
||||
|
||||
# 'check_local_mrtgtraf' command definition
|
||||
define command{
|
||||
command_name check_local_mrtgtraf
|
||||
command_line $USER1$/check_mrtgtraf -F $ARG1$ -a $ARG2$ -w $ARG3$ -c $ARG4$ -e $ARG5$
|
||||
}
|
||||
|
96
roles/nagios_server/files/nagios/commands/misc.cfg
Normal file
96
roles/nagios_server/files/nagios/commands/misc.cfg
Normal file
|
@ -0,0 +1,96 @@
|
|||
################################################################################
|
||||
# COMMAND DEFINITIONS
|
||||
#
|
||||
# SYNTAX:
|
||||
#
|
||||
# define command{
|
||||
# template <templatename>
|
||||
# name <objectname>
|
||||
# command_name <commandname>
|
||||
# command_line <commandline>
|
||||
# }
|
||||
#
|
||||
# WHERE:
|
||||
#
|
||||
# <templatename> = object name of another command definition that should be
|
||||
# used as a template for this definition (optional)
|
||||
# <objectname> = object name of command definition, referenced by other
|
||||
# command definitions that use it as a template (optional)
|
||||
# <commandname> = name of the command, as recognized/used by Nagios
|
||||
# <commandline> = command line
|
||||
#
|
||||
################################################################################
|
||||
|
||||
define command{
|
||||
command_name true
|
||||
command_line /bin/true
|
||||
}
|
||||
|
||||
define command{
|
||||
command_name check_dummy
|
||||
command_line $USER1$/check_dummy $ARG1$ $ARG2$
|
||||
}
|
||||
|
||||
# 'check_tape'
|
||||
define command{
|
||||
command_name check_tape
|
||||
command_line $USER1$/check_tape
|
||||
}
|
||||
|
||||
# 'check_ftp' command definition
|
||||
define command{
|
||||
command_name check_ftp
|
||||
command_line $USER1$/check_ftp -H $HOSTADDRESS$
|
||||
}
|
||||
|
||||
|
||||
# 'check_hpjd' command definition
|
||||
define command{
|
||||
command_name check_hpjd
|
||||
command_line $USER1$/check_hpjd -H $HOSTADDRESS$ -C public
|
||||
}
|
||||
|
||||
# 'check_snmp' command definition
|
||||
define command{
|
||||
command_name check_snmp
|
||||
command_line $USER1$/check_snmp -H $HOSTADDRESS$ $ARG1$
|
||||
}
|
||||
|
||||
|
||||
# 'check_nntp' command definition
|
||||
define command{
|
||||
command_name check_nntp
|
||||
command_line $USER1$/check_nntp -H $HOSTADDRESS$
|
||||
}
|
||||
|
||||
|
||||
# 'check_telnet' command definition
|
||||
define command{
|
||||
command_name check_telnet
|
||||
command_line $USER1$/check_tcp -H $HOSTADDRESS$ -p 23
|
||||
}
|
||||
|
||||
# 'check_dhcp' command definition
|
||||
define command{
|
||||
command_name check_dhcp
|
||||
command_line $USER1$/check_dhcp $ARG1$
|
||||
}
|
||||
|
||||
# 'check_pop' command definition
|
||||
define command{
|
||||
command_name check_pop
|
||||
command_line $USER1$/check_pop -H $HOSTADDRESS$
|
||||
}
|
||||
|
||||
# 'check_imap' command definition
|
||||
define command{
|
||||
command_name check_imap
|
||||
command_line $USER1$/check_imap -H $HOSTADDRESS$ $ARG1$
|
||||
}
|
||||
|
||||
# 'check_nt' command definition
|
||||
define command{
|
||||
command_name check_nt
|
||||
command_line $USER1$/check_nt -H $HOSTADDRESS$ -p 12489 -v $ARG1$ $ARG2$
|
||||
}
|
||||
|
87
roles/nagios_server/files/nagios/commands/notify.cfg
Normal file
87
roles/nagios_server/files/nagios/commands/notify.cfg
Normal file
|
@ -0,0 +1,87 @@
|
|||
################################################################################
|
||||
#
|
||||
# SAMPLE NOTIFICATION COMMANDS
|
||||
#
|
||||
# These are some example notification commands. They may or may not work on
|
||||
# your system without modification. As an example, some systems will require
|
||||
# you to use "/usr/bin/mailx" instead of "/usr/bin/mail" in the commands below.
|
||||
#
|
||||
################################################################################
|
||||
|
||||
# 'host-notify-by-email' command definition
|
||||
define command{
|
||||
command_name host-notify-by-email
|
||||
command_line /usr/bin/printf "%b" "***** Nagios *****\n\nNotification Type: $NOTIFICATIONTYPE$\nHost: $HOSTNAME$\nState: $HOSTSTATE$\nAddress: $HOSTADDRESS$\nInfo: $HOSTOUTPUT$\nSource: $$(hostname)\n\nDate/Time: $LONGDATETIME$\n" | /bin/mail -s "Host $HOSTSTATE$ alert for $HOSTNAME$!" $CONTACTEMAIL$
|
||||
}
|
||||
|
||||
# 'notify-service-by-email' command definition
|
||||
define command{
|
||||
command_name notify-service-by-email
|
||||
command_line /usr/bin/printf "%b" "***** Nagios *****\n\nNotification Type: $NOTIFICATIONTYPE$\n\nService: $SERVICEDESC$\nHost: $HOSTALIAS$\nAddress: $HOSTADDRESS$\nState: $SERVICESTATE$\n\nDate/Time: $LONGDATETIME$\n\nAdditional Info:\n\n$SERVICEOUTPUT$\n" | /usr/bin/mail -s "** $NOTIFICATIONTYPE$ Service Alert: $HOSTALIAS$/$SERVICEDESC$ is $SERVICESTATE$ **" $CONTACTEMAIL$
|
||||
}
|
||||
|
||||
# 'notify-by-epager' command definition
|
||||
define command{
|
||||
command_name notify-by-epager
|
||||
command_line /usr/bin/printf "%b" "Service: $SERVICEDESC$\nHost: $HOSTNAME$\nInfo: $SERVICEOUTPUT$\nSource: $$(hostname -s)\nDate: $LONGDATETIME$" | /bin/mail -s "$NOTIFICATIONTYPE$: $HOSTALIAS$/$SERVICEDESC$ is $SERVICESTATE$" $CONTACTPAGER$
|
||||
}
|
||||
|
||||
|
||||
# 'host-notify-by-epager' command definition
|
||||
define command{
|
||||
command_name host-notify-by-epager
|
||||
command_line /usr/bin/printf "%b" "Host '$HOSTALIAS$' is $HOSTSTATE$\nInfo: $HOSTOUTPUT$\nSource: $$(hostname -s)\nTime: $LONGDATETIME$" | /bin/mail -s "$NOTIFICATIONTYPE$ alert - Host $HOSTNAME$ is $HOSTSTATE$" $CONTACTPAGER$
|
||||
}
|
||||
|
||||
# 'host-notify-by-ircbot' command definition
|
||||
define command{
|
||||
command_name host-notify-by-ircbot
|
||||
command_line /usr/bin/printf "%b" "#fedora-noc $NOTIFICATIONTYPE$ - $HOSTALIAS$ is $HOSTSTATE$: $HOSTOUTPUT$ ($$(hostname -s)) $HOSTACKAUTHOR$ $SERVICEACKAUTHOR$" | /usr/local/bin/irc-colorize.py | nc -w 1 value01 5050
|
||||
}
|
||||
|
||||
# 'notify-by-email' command definition
|
||||
define command{
|
||||
command_name notify-by-email
|
||||
command_line /usr/bin/printf "%b" "***** Nagios *****\n\nNotification Type: $NOTIFICATIONTYPE$\n\nService: $SERVICEDESC$\nHost: $HOSTALIAS$\nAddress: $HOSTADDRESS$\nState: $SERVICESTATE$\nSource: $$(hostname)\n\nDate/Time: $LONGDATETIME$\n\nAdditional Info:\n\n$SERVICEOUTPUT$" | /bin/mail -s "** $NOTIFICATIONTYPE$ alert - $HOSTALIAS$/$SERVICEDESC$ is $SERVICESTATE$ **" $CONTACTEMAIL$
|
||||
}
|
||||
|
||||
# 'notify-by-ircbot' command definition
|
||||
define command{
|
||||
command_name notify-by-ircbot
|
||||
command_line /usr/bin/printf "%b" "#fedora-noc $NOTIFICATIONTYPE$ - $HOSTALIAS$/$SERVICEDESC$ is $SERVICESTATE$: $SERVICEOUTPUT$ ($$(hostname -s)) $HOSTACKAUTHOR$ $SERVICEACKAUTHOR$" | /usr/local/bin/irc-colorize.py | nc -w 1 value01 5050
|
||||
}
|
||||
|
||||
# 'host-notify-by-fedmsg' command definition
|
||||
define command{
|
||||
command_name host-notify-by-fedmsg
|
||||
command_line /usr/bin/echo '{"type": "$NOTIFICATIONTYPE$", "host": "$HOSTALIAS$", "state": "$HOSTSTATE$", "output": "$HOSTOUTPUT$", "host_ack_author": "$HOSTACKAUTHOR$", "service_ack_author": "$SERVICEACKAUTHOR$"}' | fedmsg-logger --cert-prefix nagios --modname nagios --topic host.state.change --json-input
|
||||
}
|
||||
|
||||
# 'notify-by-epager' command definition
|
||||
define command{
|
||||
command_name notify-by-epager
|
||||
command_line /usr/bin/printf "%b" "Service: $SERVICEDESC$\nHost: $HOSTNAME$\nInfo: $SERVICEOUTPUT$\nSource: $$(hostname -s)\nDate: $LONGDATETIME$" | /bin/mail -s "$NOTIFICATIONTYPE$: $HOSTALIAS$/$SERVICEDESC$ is $SERVICESTATE$" $CONTACTPAGER$
|
||||
}
|
||||
|
||||
|
||||
# 'notify-by-fedmsg' command definition
|
||||
define command{
|
||||
command_name notify-by-fedmsg
|
||||
command_line /usr/bin/echo '{"type": "$NOTIFICATIONTYPE$", "host": "$HOSTALIAS$", "state": "$SERVICESTATE$", "service": "$SERVICEDESC$", "output": "$SERVICEOUTPUT$", "host_ack_author": "$HOSTACKAUTHOR$", "service_ack_author": "$SERVICEACKAUTHOR$"}' | fedmsg-logger --cert-prefix nagios --modname nagios --topic service.state.change --json-input
|
||||
}
|
||||
|
||||
# 'notify-by-xmpp' command definition
|
||||
define command{
|
||||
command_name notify-by-xmpp
|
||||
command_line /usr/local/bin/xmppsend -a /etc/nagios/private/xmppnagios.ini "Service: $SERVICEDESC$\nHost: $HOSTNAME$\nInfo: $SERVICEOUTPUT$\nDate: $LONGDATETIME$" $CONTACTEMAIL$
|
||||
}
|
||||
|
||||
|
||||
# 'host-notify-by-xmpp' command definition
|
||||
define command{
|
||||
command_name host-notify-by-xmpp
|
||||
command_line /usr/local/bin/xmppsend -a /etc/nagios/private/xmppnagios.ini "Host '$HOSTALIAS$' is $HOSTSTATE$\nInfo: $HOSTOUTPUT$\nDate: $LONGDATETIME$" $CONTACTEMAIL$
|
||||
}
|
||||
|
||||
|
||||
|
17
roles/nagios_server/files/nagios/commands/nrpe.cfg
Normal file
17
roles/nagios_server/files/nagios/commands/nrpe.cfg
Normal file
|
@ -0,0 +1,17 @@
|
|||
# 'test nrpe'
|
||||
define command{
|
||||
command_name test_nrpe
|
||||
command_line $USER1$/check_nrpe -t 30 -H $HOSTADDRESS$
|
||||
|
||||
}
|
||||
# 'check by nrpe'
|
||||
define command{
|
||||
command_name check_by_nrpe
|
||||
command_line $USER1$/check_nrpe -t 30 -H $HOSTADDRESS$ -c $ARG1$
|
||||
}
|
||||
|
||||
# 'check-host-alive-nrpe' is better for hosts that are on vpn.
|
||||
define command{
|
||||
command_name check-host-alive-nrpe
|
||||
command_line $USER1$/check_nrpe -t 30 -H $HOSTADDRESS$
|
||||
}
|
26
roles/nagios_server/files/nagios/commands/perfdata.cfg
Normal file
26
roles/nagios_server/files/nagios/commands/perfdata.cfg
Normal file
|
@ -0,0 +1,26 @@
|
|||
################################################################################
|
||||
#
|
||||
# SAMPLE PERFORMANCE DATA COMMANDS
|
||||
#
|
||||
# These are sample performance data commands that can be used to send performance
|
||||
# data output to two text files (one for hosts, another for services). If you
|
||||
# plan on simply writing performance data out to a file, consider using the
|
||||
# host_perfdata_file and service_perfdata_file options in the main config file.
|
||||
#
|
||||
################################################################################
|
||||
|
||||
|
||||
# 'process-host-perfdata' command definition
|
||||
define command{
|
||||
command_name process-host-perfdata
|
||||
command_line /usr/bin/printf "%b" "$LASTHOSTCHECK$\t$HOSTNAME$\t$HOSTSTATE$\t$HOSTATTEMPT$\t$HOSTSTATETYPE$\t$HOSTEXECUTIONTIME$\t$HOSTOUTPUT$\t$HOSTPERFDATA$\n" >> /var/log/nagios/host-perfdata.out
|
||||
}
|
||||
|
||||
|
||||
# 'process-service-perfdata' command definition
|
||||
define command{
|
||||
command_name process-service-perfdata
|
||||
command_line /usr/bin/printf "%b" "$LASTSERVICECHECK$\t$HOSTNAME$\t$SERVICEDESC$\t$SERVICESTATE$\t$SERVICEATTEMPT$\t$SERVICESTATETYPE$\t$SERVICEEXECUTIONTIME$\t$SERVICELATENCY$\t$SERVICEOUTPUT$\t$SERVICEPERFDATA$\n" >> /var/log/nagios/service-perfdata.out
|
||||
}
|
||||
|
||||
|
31
roles/nagios_server/files/nagios/commands/ping.cfg
Normal file
31
roles/nagios_server/files/nagios/commands/ping.cfg
Normal file
|
@ -0,0 +1,31 @@
|
|||
# This command checks to see if a host is "alive" by pinging it
|
||||
# The check must result in a 100% packet loss or 5 second (3000ms) round trip
|
||||
# average time to produce a critical error.
|
||||
# Note: Only one ICMP echo packet is sent (determined by the '-p 1' argument)
|
||||
|
||||
# 'check-host-alive' command definition
|
||||
define command{
|
||||
command_name check-host-alive
|
||||
command_line $USER1$/check_ping -H $HOSTADDRESS$ -w 3000.0,80% -c 5000.0,100% -p 5
|
||||
}
|
||||
|
||||
define command{
|
||||
command_name check-host-alive4
|
||||
command_line $USER1$/check_ping -4 -H $HOSTADDRESS$ -w 3000.0,80% -c 5000.0,100% -p 2
|
||||
}
|
||||
|
||||
define command{
|
||||
command_name check-host-alive6
|
||||
command_line $USER1$/check_ping -6 -H $HOSTADDRESS$ -w 3000.0,80% -c 5000.0,100% -p 2
|
||||
}
|
||||
|
||||
# 'check_ping' command definition
|
||||
define command{
|
||||
command_name check_ping4
|
||||
command_line $USER1$/check_ping -4 -H $HOSTADDRESS$ -w $ARG1$ -c $ARG2$ -p 5
|
||||
}
|
||||
|
||||
define command{
|
||||
command_name check_ping6
|
||||
command_line $USER1$/check_ping -6 -H $HOSTADDRESS$ -w $ARG1$ -c $ARG2$ -p 5
|
||||
}
|
5
roles/nagios_server/files/nagios/commands/postgres.cfg
Normal file
5
roles/nagios_server/files/nagios/commands/postgres.cfg
Normal file
|
@ -0,0 +1,5 @@
|
|||
# 'pgsql'
|
||||
define command{
|
||||
command_name check_pgsql
|
||||
command_line $USER1$/check_pgsql -H $HOSTADDRESS$ -d $ARG1$ -p '{{nagios_db_user_password}}' --logname 'nagiosuser'
|
||||
}
|
28
roles/nagios_server/files/nagios/commands/rsyslog.cfg
Normal file
28
roles/nagios_server/files/nagios/commands/rsyslog.cfg
Normal file
|
@ -0,0 +1,28 @@
|
|||
################################################################################
|
||||
# COMMAND DEFINITIONS
|
||||
#
|
||||
# SYNTAX:
|
||||
#
|
||||
# define command{
|
||||
# template <templatename>
|
||||
# name <objectname>
|
||||
# command_name <commandname>
|
||||
# command_line <commandline>
|
||||
# }
|
||||
#
|
||||
# WHERE:
|
||||
#
|
||||
# <templatename> = object name of another command definition that should be
|
||||
# used as a template for this definition (optional)
|
||||
# <objectname> = object name of command definition, referenced by other
|
||||
# command definitions that use it as a template (optional)
|
||||
# <commandname> = name of the command, as recognized/used by Nagios
|
||||
# <commandline> = command line
|
||||
#
|
||||
################################################################################
|
||||
|
||||
|
||||
define command {
|
||||
command_name restart_rsyslog
|
||||
command_line $USER1$/restart_rsyslog $SERVICESTATE$ $SERVICESTATETYPE$ $SERVICEATTEMPT$ $HOSTADDRESS$ "$HOSTALIAS$" "$SERVICEDESC$" "$SERVICESTATE$"
|
||||
}
|
12
roles/nagios_server/files/nagios/commands/smtp.cfg
Normal file
12
roles/nagios_server/files/nagios/commands/smtp.cfg
Normal file
|
@ -0,0 +1,12 @@
|
|||
# 'check_smtp' command definition
|
||||
define command{
|
||||
command_name check_smtp
|
||||
command_line $USER1$/check_smtp -H $HOSTADDRESS$
|
||||
}
|
||||
|
||||
|
||||
# 'check_email_delivery' command definition
|
||||
define command{
|
||||
command_name check_email_delivery
|
||||
command_line $USER1$/check_email_delivery_epn -H $ARG1$ --mailto $ARG2$ --mailfrom $ARG3$ --username $ARG4$ --password $ARG5$ -w $ARG6$ -c $ARG7$
|
||||
}
|
22
roles/nagios_server/files/nagios/commands/ssh.cfg
Normal file
22
roles/nagios_server/files/nagios/commands/ssh.cfg
Normal file
|
@ -0,0 +1,22 @@
|
|||
# 'check_ssh' command definition
|
||||
define command{
|
||||
command_name check_ssh
|
||||
command_line $USER1$/check_ssh -H $HOSTADDRESS$
|
||||
}
|
||||
|
||||
|
||||
define command {
|
||||
command_name check_by_ssh_check_raid
|
||||
command_line $USER1$/check_by_ssh -t 30 -H $HOSTADDRESS$ "$USER1$/check_raid.py"
|
||||
}
|
||||
|
||||
define command {
|
||||
command_name check_by_ssh_check_disk
|
||||
command_line $USER1$/check_by_ssh -t 30 -H $HOSTADDRESS$ "$USER1$/check_disk -w $ARG1$% -c $ARG2$% -p $ARG3$"
|
||||
}
|
||||
|
||||
# 'check_postgres_conns' command definition
|
||||
define command{
|
||||
command_name check_postgres_conns
|
||||
command_line $USER1$/check_by_ssh -t 30 -H $HOSTADDRESS$ "$USER1$/check_procs -u postgres -w $ARG1$ -c $ARG2$ -a $ARG3$"
|
||||
}
|
6
roles/nagios_server/files/nagios/commands/tcp.cfg
Normal file
6
roles/nagios_server/files/nagios/commands/tcp.cfg
Normal file
|
@ -0,0 +1,6 @@
|
|||
|
||||
# 'check_tcp' command definition
|
||||
define command{
|
||||
command_name check_tcp
|
||||
command_line $USER1$/check_tcp -H $HOSTADDRESS$ -p $ARG1$
|
||||
}
|
5
roles/nagios_server/files/nagios/commands/testcloud.cfg
Normal file
5
roles/nagios_server/files/nagios/commands/testcloud.cfg
Normal file
|
@ -0,0 +1,5 @@
|
|||
# 'check_testcloud'
|
||||
define command{
|
||||
command_name check_testcloud
|
||||
command_line $USER1$/check_testcloud
|
||||
}
|
5
roles/nagios_server/files/nagios/commands/udp.cfg
Normal file
5
roles/nagios_server/files/nagios/commands/udp.cfg
Normal file
|
@ -0,0 +1,5 @@
|
|||
# 'check_udp' command definition
|
||||
define command{
|
||||
command_name check_udp
|
||||
command_line $USER1$/check_udp -H $HOSTADDRESS$ -p $ARG1$
|
||||
}
|
12
roles/nagios_server/files/nagios/commands/unbound.cfg
Normal file
12
roles/nagios_server/files/nagios/commands/unbound.cfg
Normal file
|
@ -0,0 +1,12 @@
|
|||
# 'check_unbound_80' command definition
|
||||
define command{
|
||||
command_name check_unbound_80
|
||||
command_line $USER1$/check_dig -H $HOSTADDRESS$ -w 5 -c 9 -p 80 -l $ARG1$ -A "+tcp"
|
||||
}
|
||||
|
||||
|
||||
# 'check_unbound_443' command definition
|
||||
define command{
|
||||
command_name check_unbound_443
|
||||
command_line $USER1$/check_dig_ssl -H $HOSTADDRESS$ -w 5 -c 9 -p 443 -L $ARG1$ -l $ARG2$ -A "+tcp"
|
||||
}
|
22
roles/nagios_server/files/nagios/configs/escalations.cfg
Normal file
22
roles/nagios_server/files/nagios/configs/escalations.cfg
Normal file
|
@ -0,0 +1,22 @@
|
|||
define hostescalation{
|
||||
host_name *
|
||||
hostgroup_name *
|
||||
contact_groups fedora-sysadmin-email,fedora-sysadmin-emergency,fedora-sysadmin-ircbot
|
||||
first_notification 2
|
||||
last_notification 0
|
||||
notification_interval 60
|
||||
escalation_period 24x7
|
||||
escalation_options d,u,r
|
||||
}
|
||||
|
||||
|
||||
define serviceescalation{
|
||||
host_name *
|
||||
service_description *
|
||||
contact_groups fedora-sysadmin-email,fedora-sysadmin-emergency,fedora-sysadmin-ircbot
|
||||
first_notification 2
|
||||
last_notification 0
|
||||
notification_interval 60
|
||||
escalation_period 24x7
|
||||
escalation_options w,u,c,r
|
||||
}
|
362
roles/nagios_server/files/nagios/configs/minimal.cfg
Normal file
362
roles/nagios_server/files/nagios/configs/minimal.cfg
Normal file
|
@ -0,0 +1,362 @@
|
|||
###############################################################################
|
||||
# MINIMAL.CFG
|
||||
#
|
||||
# MINIMALISTIC OBJECT CONFIG FILE (Template-Based Object File Format)
|
||||
#
|
||||
# Last Modified: 08-10-2005
|
||||
#
|
||||
#
|
||||
# NOTE: This config file is intended to be used to test a Nagios installation
|
||||
# that has been compiled with support for the template-based object
|
||||
# configuration files.
|
||||
#
|
||||
# This config file is intended to servce as an *extremely* simple
|
||||
# example of how you can create your object configuration file(s).
|
||||
# If you're interested in more complex object configuration files for
|
||||
# Nagios, look in the sample-config/template-object/ subdirectory of
|
||||
# the distribution.
|
||||
#
|
||||
###############################################################################
|
||||
|
||||
|
||||
|
||||
###############################################################################
|
||||
###############################################################################
|
||||
#
|
||||
# TIME PERIODS
|
||||
#
|
||||
###############################################################################
|
||||
###############################################################################
|
||||
|
||||
# This defines a timeperiod where all times are valid for checks,
|
||||
# notifications, etc. The classic "24x7" support nightmare. :-)
|
||||
|
||||
define timeperiod{
|
||||
timeperiod_name 24x7
|
||||
alias 24 Hours A Day, 7 Days A Week
|
||||
sunday 00:00-24:00
|
||||
monday 00:00-24:00
|
||||
tuesday 00:00-24:00
|
||||
wednesday 00:00-24:00
|
||||
thursday 00:00-24:00
|
||||
friday 00:00-24:00
|
||||
saturday 00:00-24:00
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
###############################################################################
|
||||
###############################################################################
|
||||
#
|
||||
# COMMANDS
|
||||
#
|
||||
###############################################################################
|
||||
###############################################################################
|
||||
|
||||
# This is a sample service notification command that can be used to send email
|
||||
# notifications (about service alerts) to contacts.
|
||||
# 'check_ssh' command definition
|
||||
define command{
|
||||
command_name notify-by-email
|
||||
command_line /usr/bin/printf "%b" "***** Nagios *****\n\nNotification Type: $NOTIFICATIONTYPE$\n\nService: $SERVICEDESC$\nHost: $HOSTALIAS$\nAddress: $HOSTADDRESS$\nState: $SERVICESTATE$\n\nDate/Time: $LONGDATETIME$\n\nAdditional Info:\n\n$OUTPUT$" | /bin/mail -s "** $NOTIFICATIONTYPE$ alert - $HOSTALIAS$/$SERVICEDESC$ is $SERVICESTATE$ **" $CONTACTEMAIL$
|
||||
}
|
||||
|
||||
|
||||
# This is a sample host notification command that can be used to send email
|
||||
# notifications (about host alerts) to contacts.
|
||||
|
||||
define command{
|
||||
command_name host-notify-by-email
|
||||
command_line /usr/bin/printf "%b" "***** Nagios *****\n\nNotification Type: $NOTIFICATIONTYPE$\nHost: $HOSTNAME$\nState: $HOSTSTATE$\nAddress: $HOSTADDRESS$\nInfo: $OUTPUT$\n\nDate/Time: $LONGDATETIME$\n" | /bin/mail -s "Host $HOSTSTATE$ alert for $HOSTNAME$!" $CONTACTEMAIL$
|
||||
}
|
||||
|
||||
|
||||
# Command to check to see if a host is "alive" (up) by pinging it
|
||||
|
||||
define command{
|
||||
command_name check-host-alive
|
||||
command_line $USER1$/check_ping -4 -H $HOSTADDRESS$ -w 300,99% -c 500,100% -p 2
|
||||
}
|
||||
|
||||
|
||||
# Generic command to check a device by pinging it
|
||||
|
||||
define command{
|
||||
command_name check_ping
|
||||
command_line $USER1$/check_ping -4 -H $HOSTADDRESS$ -w $ARG1$ -c $ARG2$ -p 5
|
||||
}
|
||||
|
||||
|
||||
# Command used to check disk space usage on local partitions
|
||||
|
||||
define command{
|
||||
command_name check_local_disk
|
||||
command_line $USER1$/check_disk -w $ARG1$ -c $ARG2$ -p $ARG3$
|
||||
}
|
||||
|
||||
|
||||
# Command used to check the number of currently logged in users on the
|
||||
# local machine
|
||||
|
||||
define command{
|
||||
command_name check_local_users
|
||||
command_line $USER1$/check_users -w $ARG1$ -c $ARG2$
|
||||
}
|
||||
|
||||
|
||||
# Command to check the number of running processing on the local machine
|
||||
|
||||
define command{
|
||||
command_name check_local_procs
|
||||
command_line $USER1$/check_procs -w $ARG1$ -c $ARG2$
|
||||
}
|
||||
|
||||
|
||||
# Command to check the load on the local machine
|
||||
|
||||
define command{
|
||||
command_name check_local_load
|
||||
command_line $USER1$/check_load -w $ARG1$ -c $ARG2$
|
||||
}
|
||||
|
||||
|
||||
|
||||
###############################################################################
|
||||
###############################################################################
|
||||
#
|
||||
# CONTACTS
|
||||
#
|
||||
###############################################################################
|
||||
###############################################################################
|
||||
|
||||
# In this simple config file, a single contact will receive all alerts.
|
||||
# This assumes that you have an account (or email alias) called
|
||||
# "nagios-admin" on the local host.
|
||||
|
||||
define contact{
|
||||
contact_name nagios-admin
|
||||
alias Nagios Admin
|
||||
service_notification_period 24x7
|
||||
host_notification_period 24x7
|
||||
service_notification_options w,u,c,r
|
||||
host_notification_options d,r
|
||||
service_notification_commands notify-by-email
|
||||
host_notification_commands host-notify-by-email
|
||||
email admin@fedoraproject.org
|
||||
}
|
||||
|
||||
|
||||
|
||||
###############################################################################
|
||||
###############################################################################
|
||||
#
|
||||
# CONTACT GROUPS
|
||||
#
|
||||
###############################################################################
|
||||
###############################################################################
|
||||
|
||||
# We only have one contact in this simple configuration file, so there is
|
||||
# no need to create more than one contact group.
|
||||
|
||||
define contactgroup{
|
||||
contactgroup_name admins
|
||||
alias Nagios Administrators
|
||||
members nagios-admin
|
||||
}
|
||||
|
||||
|
||||
|
||||
###############################################################################
|
||||
###############################################################################
|
||||
#
|
||||
# HOSTS
|
||||
#
|
||||
###############################################################################
|
||||
###############################################################################
|
||||
|
||||
# Generic host definition template - This is NOT a real host, just a template!
|
||||
|
||||
define host{
|
||||
name generic-host ; The name of this host template
|
||||
notifications_enabled 1 ; Host notifications are enabled
|
||||
event_handler_enabled 1 ; Host event handler is enabled
|
||||
flap_detection_enabled 1 ; Flap detection is enabled
|
||||
failure_prediction_enabled 1 ; Failure prediction is enabled
|
||||
process_perf_data 1 ; Process performance data
|
||||
retain_status_information 1 ; Retain status information across program restarts
|
||||
retain_nonstatus_information 1 ; Retain non-status information across program restarts
|
||||
register 0 ; DONT REGISTER THIS DEFINITION - ITS NOT A REAL HOST, JUST A TEMPLATE!
|
||||
}
|
||||
|
||||
|
||||
# Since this is a simple configuration file, we only monitor one host - the
|
||||
# local host (this machine).
|
||||
|
||||
define host{
|
||||
use generic-host ; Name of host template to use
|
||||
host_name localhost
|
||||
alias localhost
|
||||
address 127.0.0.1
|
||||
check_command check-host-alive
|
||||
max_check_attempts 10
|
||||
notification_interval 120
|
||||
notification_period 24x7
|
||||
notification_options d,r
|
||||
contact_groups admins
|
||||
}
|
||||
|
||||
|
||||
|
||||
###############################################################################
|
||||
###############################################################################
|
||||
#
|
||||
# HOST GROUPS
|
||||
#
|
||||
###############################################################################
|
||||
###############################################################################
|
||||
|
||||
# We only have one host in our simple config file, so there is no need to
|
||||
# create more than one hostgroup.
|
||||
|
||||
define hostgroup{
|
||||
hostgroup_name test
|
||||
alias Test Servers
|
||||
members localhost
|
||||
}
|
||||
|
||||
|
||||
|
||||
###############################################################################
|
||||
###############################################################################
|
||||
#
|
||||
# SERVICES
|
||||
#
|
||||
###############################################################################
|
||||
###############################################################################
|
||||
|
||||
# Generic service definition template - This is NOT a real service, just a template!
|
||||
|
||||
define service{
|
||||
name generic-service ; The 'name' of this service template
|
||||
active_checks_enabled 1 ; Active service checks are enabled
|
||||
passive_checks_enabled 1 ; Passive service checks are enabled/accepted
|
||||
parallelize_check 1 ; Active service checks should be parallelized (disabling this can lead to major performance problems)
|
||||
obsess_over_service 1 ; We should obsess over this service (if necessary)
|
||||
check_freshness 0 ; Default is to NOT check service 'freshness'
|
||||
notifications_enabled 1 ; Service notifications are enabled
|
||||
event_handler_enabled 1 ; Service event handler is enabled
|
||||
flap_detection_enabled 1 ; Flap detection is enabled
|
||||
failure_prediction_enabled 1 ; Failure prediction is enabled
|
||||
process_perf_data 1 ; Process performance data
|
||||
retain_status_information 1 ; Retain status information across program restarts
|
||||
retain_nonstatus_information 1 ; Retain non-status information across program restarts
|
||||
register 0 ; DONT REGISTER THIS DEFINITION - ITS NOT A REAL SERVICE, JUST A TEMPLATE!
|
||||
}
|
||||
|
||||
|
||||
# Define a service to "ping" the local machine
|
||||
|
||||
define service{
|
||||
use generic-service ; Name of service template to use
|
||||
host_name localhost
|
||||
service_description PING
|
||||
is_volatile 0
|
||||
check_period 24x7
|
||||
max_check_attempts 4
|
||||
normal_check_interval 5
|
||||
retry_check_interval 1
|
||||
contact_groups admins
|
||||
notification_options w,u,c,r
|
||||
notification_interval 960
|
||||
notification_period 24x7
|
||||
check_command check_ping!100.0,20%!500.0,60%
|
||||
}
|
||||
|
||||
|
||||
# Define a service to check the disk space of the root partition
|
||||
# on the local machine. Warning if < 20% free, critical if
|
||||
# < 10% free space on partition.
|
||||
|
||||
define service{
|
||||
use generic-service ; Name of service template to use
|
||||
host_name localhost
|
||||
service_description Root Partition
|
||||
is_volatile 0
|
||||
check_period 24x7
|
||||
max_check_attempts 4
|
||||
normal_check_interval 5
|
||||
retry_check_interval 1
|
||||
contact_groups admins
|
||||
notification_options w,u,c,r
|
||||
notification_interval 960
|
||||
notification_period 24x7
|
||||
check_command check_local_disk!20%!10%!/
|
||||
}
|
||||
|
||||
|
||||
|
||||
# Define a service to check the number of currently logged in
|
||||
# users on the local machine. Warning if > 20 users, critical
|
||||
# if > 50 users.
|
||||
|
||||
define service{
|
||||
use generic-service ; Name of service template to use
|
||||
host_name localhost
|
||||
service_description Current Users
|
||||
is_volatile 0
|
||||
check_period 24x7
|
||||
max_check_attempts 4
|
||||
normal_check_interval 5
|
||||
retry_check_interval 1
|
||||
contact_groups admins
|
||||
notification_options w,u,c,r
|
||||
notification_interval 960
|
||||
notification_period 24x7
|
||||
check_command check_local_users!20!50
|
||||
}
|
||||
|
||||
|
||||
# Define a service to check the number of currently running procs
|
||||
# on the local machine. Warning if > 250 processes, critical if
|
||||
# > 400 users.
|
||||
|
||||
define service{
|
||||
use generic-service ; Name of service template to use
|
||||
host_name localhost
|
||||
service_description Total Processes
|
||||
is_volatile 0
|
||||
check_period 24x7
|
||||
max_check_attempts 4
|
||||
normal_check_interval 5
|
||||
retry_check_interval 1
|
||||
contact_groups admins
|
||||
notification_options w,u,c,r
|
||||
notification_interval 960
|
||||
notification_period 24x7
|
||||
check_command check_local_procs!250!400
|
||||
}
|
||||
|
||||
|
||||
|
||||
# Define a service to check the load on the local machine.
|
||||
|
||||
define service{
|
||||
use generic-service ; Name of service template to use
|
||||
host_name localhost
|
||||
service_description Current Load
|
||||
is_volatile 0
|
||||
check_period 24x7
|
||||
max_check_attempts 4
|
||||
normal_check_interval 5
|
||||
retry_check_interval 1
|
||||
contact_groups admins
|
||||
notification_options w,u,c,r
|
||||
notification_interval 960
|
||||
notification_period 24x7
|
||||
check_command check_local_load!5.0,4.0,3.0!10.0,6.0,4.0
|
||||
}
|
||||
|
||||
|
||||
|
||||
# EOF
|
1349
roles/nagios_server/files/nagios/configs/nagios.cfg
Normal file
1349
roles/nagios_server/files/nagios/configs/nagios.cfg
Normal file
File diff suppressed because it is too large
Load diff
135
roles/nagios_server/files/nagios/configs/timeperiods.cfg
Normal file
135
roles/nagios_server/files/nagios/configs/timeperiods.cfg
Normal file
|
@ -0,0 +1,135 @@
|
|||
###############################################################################
|
||||
# TIMEPERIODS.CFG - SAMPLE TIMEPERIOD DEFINITIONS
|
||||
#
|
||||
#
|
||||
# NOTES: This config file provides you with some example timeperiod definitions
|
||||
# that you can reference in host, service, contact, and dependency
|
||||
# definitions.
|
||||
#
|
||||
# You don't need to keep timeperiods in a separate file from your other
|
||||
# object definitions. This has been done just to make things easier to
|
||||
# understand.
|
||||
#
|
||||
###############################################################################
|
||||
|
||||
|
||||
|
||||
###############################################################################
|
||||
###############################################################################
|
||||
#
|
||||
# TIME PERIODS
|
||||
#
|
||||
###############################################################################
|
||||
###############################################################################
|
||||
|
||||
|
||||
define timeperiod{
|
||||
timeperiod_name 24x7
|
||||
alias 24 Hours A Day, 7 Days A Week
|
||||
sunday 00:00-24:00
|
||||
monday 00:00-24:00
|
||||
tuesday 00:00-24:00
|
||||
wednesday 00:00-24:00
|
||||
thursday 00:00-24:00
|
||||
friday 00:00-24:00
|
||||
saturday 00:00-24:00
|
||||
}
|
||||
|
||||
define timeperiod{
|
||||
timeperiod_name 16x7
|
||||
alias 15 Hours a day, 7 days a week
|
||||
sunday 00:00-04:00,13:00-24:00
|
||||
monday 00:00-04:00,13:00-24:00
|
||||
tuesday 00:00-04:00,13:00-24:00
|
||||
wednesday 00:00-04:00,13:00-24:00
|
||||
thursday 00:00-04:00,13:00-24:00
|
||||
friday 00:00-04:00,13:00-24:00
|
||||
saturday 00:00-04:00,13:00-24:00
|
||||
}
|
||||
|
||||
define timeperiod{
|
||||
timeperiod_name 16x7-AU
|
||||
alias 15 Hours a day, 7 days a week
|
||||
sunday 00:00-14:00,22:00-24:00
|
||||
monday 00:00-14:00,22:00-24:00
|
||||
tuesday 00:00-14:00,22:00-24:00
|
||||
wednesday 00:00-14:00,22:00-24:00
|
||||
thursday 00:00-14:00,22:00-24:00
|
||||
friday 00:00-14:00,22:00-24:00
|
||||
saturday 00:00-14:00,22:00-24:00
|
||||
}
|
||||
|
||||
|
||||
# Members of sysadmin-main already get nagios messages
|
||||
define timeperiod{
|
||||
timeperiod_name never
|
||||
alias Never
|
||||
}
|
||||
|
||||
# This defines a timeperiod where all times are valid for checks,
|
||||
# notifications, etc. The classic "24x7" support nightmare. :-)
|
||||
define timeperiod{
|
||||
timeperiod_name 24x7
|
||||
alias 24 Hours A Day, 7 Days A Week
|
||||
sunday 00:00-24:00
|
||||
monday 00:00-24:00
|
||||
tuesday 00:00-24:00
|
||||
wednesday 00:00-24:00
|
||||
thursday 00:00-24:00
|
||||
friday 00:00-24:00
|
||||
saturday 00:00-24:00
|
||||
}
|
||||
|
||||
|
||||
# 'workhours' timeperiod definition
|
||||
define timeperiod{
|
||||
timeperiod_name workhours
|
||||
alias Normal Work Hours
|
||||
monday 09:00-17:00
|
||||
tuesday 09:00-17:00
|
||||
wednesday 09:00-17:00
|
||||
thursday 09:00-17:00
|
||||
friday 09:00-17:00
|
||||
}
|
||||
|
||||
|
||||
# 'none' timeperiod definition
|
||||
define timeperiod{
|
||||
timeperiod_name none
|
||||
alias No Time Is A Good Time
|
||||
}
|
||||
|
||||
# Some U.S. holidays
|
||||
# Note: The timeranges for each holiday are meant to *exclude* the holidays from being
|
||||
# treated as a valid time for notifications, etc. You probably don't want your pager
|
||||
# going off on New Year's. Although you're employer might... :-)
|
||||
define timeperiod{
|
||||
name us-holidays
|
||||
timeperiod_name us-holidays
|
||||
alias U.S. Holidays
|
||||
|
||||
january 1 00:00-00:00 ; New Years
|
||||
monday -1 may 00:00-00:00 ; Memorial Day (last Monday in May)
|
||||
july 4 00:00-00:00 ; Independence Day
|
||||
monday 1 september 00:00-00:00 ; Labor Day (first Monday in September)
|
||||
thursday 4 november 00:00-00:00 ; Thanksgiving (4th Thursday in November)
|
||||
december 25 00:00-00:00 ; Christmas
|
||||
}
|
||||
|
||||
|
||||
# This defines a modified "24x7" timeperiod that covers every day of the
|
||||
# year, except for U.S. holidays (defined in the timeperiod above).
|
||||
define timeperiod{
|
||||
timeperiod_name 24x7_sans_holidays
|
||||
alias 24x7 Sans Holidays
|
||||
|
||||
use us-holidays ; Get holiday exceptions from other timeperiod
|
||||
|
||||
sunday 00:00-24:00
|
||||
monday 00:00-24:00
|
||||
tuesday 00:00-24:00
|
||||
wednesday 00:00-24:00
|
||||
thursday 00:00-24:00
|
||||
friday 00:00-24:00
|
||||
saturday 00:00-24:00
|
||||
}
|
5
roles/nagios_server/files/nagios/contactgroups/bodhi.cfg
Normal file
5
roles/nagios_server/files/nagios/contactgroups/bodhi.cfg
Normal file
|
@ -0,0 +1,5 @@
|
|||
define contactgroup {
|
||||
contactgroup_name bodhi
|
||||
alias Bodhi Notifications
|
||||
members bowlofeggs
|
||||
}
|
|
@ -0,0 +1,5 @@
|
|||
#define contactgroup{
|
||||
# contactgroup_name build-sysadmin-email
|
||||
# alias Build Sysadmin Email Contacts
|
||||
# members kevin,aditya
|
||||
# }
|
|
@ -0,0 +1,5 @@
|
|||
define contactgroup{
|
||||
contactgroup_name fedora-sysadmin-email
|
||||
alias Fedora Sysadmin Email Contacts
|
||||
members admin,kevin,puiterwijkp,smooge,ausil,jcollie,nb,rigeld2,codeblock,hvivani
|
||||
}
|
|
@ -0,0 +1,5 @@
|
|||
define contactgroup{
|
||||
contactgroup_name fedora-sysadmin-ircbot
|
||||
alias Fedora Sysadmin irc Contacts
|
||||
members ircbot,fedmsg
|
||||
}
|
|
@ -0,0 +1,10 @@
|
|||
define contactgroup{
|
||||
contactgroup_name fedora-sysadmin-pager
|
||||
alias Fedora Sysadmin Pager Contacts
|
||||
members smoogep,kevinp,puiterwijkp
|
||||
}
|
||||
define contactgroup{
|
||||
contactgroup_name fedora-sysadmin-emergency
|
||||
alias Fedora Sysadmin Pager Contacts
|
||||
members smooge-emergency,kevin-emergency,puiterwijk-emergency
|
||||
}
|
5
roles/nagios_server/files/nagios/contactgroups/null.cfg
Normal file
5
roles/nagios_server/files/nagios/contactgroups/null.cfg
Normal file
|
@ -0,0 +1,5 @@
|
|||
define contactgroup{
|
||||
contactgroup_name null
|
||||
alias null
|
||||
members null
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue