put in the first run at new nagios configs
This commit is contained in:
parent
a1957d29d4
commit
8cf72ff116
310 changed files with 13255 additions and 26 deletions
|
@ -1264,6 +1264,12 @@ docker-candidate-registry01.phx2.fedoraproject.org
|
||||||
docker-registry01.stg.phx2.fedoraproject.org
|
docker-registry01.stg.phx2.fedoraproject.org
|
||||||
docker-candidate-registry01.stg.phx2.fedoraproject.org
|
docker-candidate-registry01.stg.phx2.fedoraproject.org
|
||||||
|
|
||||||
|
[webservers:children]
|
||||||
|
proxies
|
||||||
|
ipsilon
|
||||||
|
ipa
|
||||||
|
fas
|
||||||
|
|
||||||
#
|
#
|
||||||
# Hosts in this group have zombie processes for various reasons
|
# Hosts in this group have zombie processes for various reasons
|
||||||
# and we want to not alert on those, so to the client nrpe.conf uses
|
# and we want to not alert on those, so to the client nrpe.conf uses
|
||||||
|
@ -1276,3 +1282,4 @@ pkgs02.phx2.fedoraproject.org
|
||||||
fed-cloud09.cloud.fedoraproject.org
|
fed-cloud09.cloud.fedoraproject.org
|
||||||
# Ansible from time to time in large runs has zombie threads
|
# Ansible from time to time in large runs has zombie threads
|
||||||
batcave01.phx2.fedoraproject.org
|
batcave01.phx2.fedoraproject.org
|
||||||
|
|
||||||
|
|
|
@ -35,4 +35,3 @@ define contact{
|
||||||
email 9178159801@vtext.com
|
email 9178159801@vtext.com
|
||||||
pager 9178159801@vtext.com
|
pager 9178159801@vtext.com
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -10,29 +10,29 @@ define contact{
|
||||||
email nick@bebout.net
|
email nick@bebout.net
|
||||||
}
|
}
|
||||||
|
|
||||||
define contact{
|
#define contact{
|
||||||
contact_name nb-emergency
|
# contact_name nb-emergency
|
||||||
alias Nick Bebout
|
# alias Nick Bebout
|
||||||
service_notification_period never
|
# service_notification_period never
|
||||||
host_notification_period never
|
# host_notification_period never
|
||||||
service_notification_options w,u,c,r
|
# service_notification_options w,u,c,r
|
||||||
host_notification_options d,u,r
|
# host_notification_options d,u,r
|
||||||
service_notification_commands notify-by-epager
|
# service_notification_commands notify-by-epager
|
||||||
host_notification_commands host-notify-by-epager
|
# host_notification_commands host-notify-by-epager
|
||||||
email nb5@txt.att.net
|
# email nb5@txt.att.net
|
||||||
pager nb5@txt.att.net
|
# pager nb5@txt.att.net
|
||||||
}
|
#}
|
||||||
|
|
||||||
define contact{
|
#define contact{
|
||||||
contact_name nbp
|
# contact_name nbp
|
||||||
alias Nick Bebout
|
# alias Nick Bebout
|
||||||
service_notification_period never
|
# service_notification_period never
|
||||||
host_notification_period never
|
# host_notification_period never
|
||||||
service_notification_options w,u,c,r
|
# service_notification_options w,u,c,r
|
||||||
host_notification_options d,u,r
|
# host_notification_options d,u,r
|
||||||
service_notification_commands notify-by-epager
|
# service_notification_commands notify-by-epager
|
||||||
host_notification_commands host-notify-by-epager
|
# host_notification_commands host-notify-by-epager
|
||||||
email nb5@txt.att.net
|
# email nb5@txt.att.net
|
||||||
pager nb5@txt.att.net
|
# pager nb5@txt.att.net
|
||||||
}
|
#}
|
||||||
|
|
||||||
|
|
|
@ -11,7 +11,19 @@
|
||||||
#}
|
#}
|
||||||
#
|
#
|
||||||
#define contact{
|
#define contact{
|
||||||
# contact_name skvidalp
|
# contact_name skvidal_xmpp
|
||||||
|
# alias Seth Vidal
|
||||||
|
# service_notification_period 24x7
|
||||||
|
# host_notification_period 24x7
|
||||||
|
# service_notification_options w,u,c,r
|
||||||
|
# host_notification_options d,u,r
|
||||||
|
# service_notification_commands notify-by-xmpp
|
||||||
|
# host_notification_commands host-notify-by-xmpp
|
||||||
|
# email skvidal@jabber.org
|
||||||
|
#}
|
||||||
|
#
|
||||||
|
#define contact{
|
||||||
|
# contact_name skvidal-emergency
|
||||||
# alias Seth Vidal
|
# alias Seth Vidal
|
||||||
# service_notification_period 24x7
|
# service_notification_period 24x7
|
||||||
# host_notification_period 24x7
|
# host_notification_period 24x7
|
||||||
|
@ -20,5 +32,17 @@
|
||||||
# service_notification_commands notify-by-epager
|
# service_notification_commands notify-by-epager
|
||||||
# host_notification_commands host-notify-by-epager
|
# host_notification_commands host-notify-by-epager
|
||||||
# email page-seth-vidal@sethdot.org
|
# email page-seth-vidal@sethdot.org
|
||||||
|
#}
|
||||||
|
#
|
||||||
|
#define contact{
|
||||||
|
# contact_name skvidalp
|
||||||
|
# alias Seth Vidal
|
||||||
|
# service_notification_period 16x7
|
||||||
|
# host_notification_period 16x7
|
||||||
|
# service_notification_options w,u,c,r
|
||||||
|
# host_notification_options d,u,r
|
||||||
|
# service_notification_commands notify-by-epager
|
||||||
|
# host_notification_commands host-notify-by-epager
|
||||||
|
# email page-seth-vidal@sethdot.org
|
||||||
# pager page-seth-vidal@sethdot.org
|
# pager page-seth-vidal@sethdot.org
|
||||||
#}
|
#}
|
||||||
|
|
36
roles/nagios_client/README.rst
Normal file
36
roles/nagios_client/README.rst
Normal file
|
@ -0,0 +1,36 @@
|
||||||
|
===================================
|
||||||
|
Nagios 4 Configuration for Fedora
|
||||||
|
===================================
|
||||||
|
|
||||||
|
The Fedora Infrastructure Nagios is built on a set of configurations
|
||||||
|
originally written for Nagios 2 and then upgraded over time to Nagios
|
||||||
|
3 and then 4.08. With additional changes made in the 4.2 series of
|
||||||
|
Nagios this needed a better rewrite as various parts came from
|
||||||
|
pre-puppet and then various puppet modules added on top.
|
||||||
|
|
||||||
|
In order to get this rewrite done, we will use as much of the original
|
||||||
|
layout of the Fedora ansible nagios module but with rewrites to better
|
||||||
|
match current Nagios configurations so that it can be maintained.
|
||||||
|
|
||||||
|
Role directory layout
|
||||||
|
=====================
|
||||||
|
The original layout branched out from
|
||||||
|
|
||||||
|
roles/nagios/client/
|
||||||
|
roles/nagios/server/
|
||||||
|
|
||||||
|
With the usual trees below this. This breaks ansible best practices
|
||||||
|
and how most new modules are set up so the rewrite uses:
|
||||||
|
|
||||||
|
roles/nagios_client/
|
||||||
|
roles/nagios_server/
|
||||||
|
|
||||||
|
=====================
|
||||||
|
Nagios Client Files
|
||||||
|
=====================
|
||||||
|
|
||||||
|
For the most part the Nagios Client files seem to work from the
|
||||||
|
original layout to the new site. Changes will only need to be made to
|
||||||
|
playbooks for the initial changes.
|
||||||
|
|
||||||
|
|
72
roles/nagios_client/files/scripts/check_datanommer_timesince.py
Executable file
72
roles/nagios_client/files/scripts/check_datanommer_timesince.py
Executable file
|
@ -0,0 +1,72 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
""" NRPE check for datanommer/fedmsg health.
|
||||||
|
Given a category like 'bodhi', 'buildsys', or 'git', return an error if
|
||||||
|
datanommer hasn't seen a message of that type in such and such time.
|
||||||
|
You can alternatively provide a 'topic' which might look like
|
||||||
|
org.fedoraproject.prod.bodhi.update.comment.
|
||||||
|
|
||||||
|
Requires: python-dateutil
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
|
||||||
|
$ check_datanommer_timesince CATEGORY WARNING_THRESH CRITICAL_THRESH
|
||||||
|
|
||||||
|
:Author: Ralph Bean <rbean@redhat.com>
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
import dateutil.relativedelta
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
import json
|
||||||
|
|
||||||
|
|
||||||
|
def query_timesince(identifier):
|
||||||
|
# If it has a '.', then assume it is a topic.
|
||||||
|
if '.' in identifier:
|
||||||
|
cmd = 'datanommer-latest --topic %s --timesince' % identifier
|
||||||
|
else:
|
||||||
|
cmd = 'datanommer-latest --category %s --timesince' % identifier
|
||||||
|
sys.stderr.write("Running %r\n" % cmd)
|
||||||
|
process = subprocess.Popen(cmd.split(), shell=False,
|
||||||
|
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||||
|
stdout, stderr = process.communicate()
|
||||||
|
prefix, stdout = stdout.split("INFO] ", 1)
|
||||||
|
data = json.loads(stdout)
|
||||||
|
return float(data[0])
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
identifier, warning_threshold, critical_threshold = sys.argv[-3:]
|
||||||
|
timesince = query_timesince(identifier)
|
||||||
|
warning_threshold = int(warning_threshold)
|
||||||
|
critical_threshold = int(critical_threshold)
|
||||||
|
|
||||||
|
time_strings = []
|
||||||
|
rd = dateutil.relativedelta.relativedelta(seconds=timesince)
|
||||||
|
for denomination in ['years', 'months', 'days', 'hours', 'minutes', 'seconds']:
|
||||||
|
value = getattr(rd, denomination, 0)
|
||||||
|
if value:
|
||||||
|
time_strings.append("%d %s" % (value, denomination))
|
||||||
|
|
||||||
|
string = ", ".join(time_strings)
|
||||||
|
reason = "datanommer has not seen a %r message in %s" % (identifier, string)
|
||||||
|
|
||||||
|
if timesince > critical_threshold:
|
||||||
|
print "CRIT: ", reason
|
||||||
|
sys.exit(2)
|
||||||
|
|
||||||
|
if timesince > warning_threshold:
|
||||||
|
print "WARN: ", reason
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
print "OK: ", reason
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
try:
|
||||||
|
main()
|
||||||
|
except Exception as e:
|
||||||
|
print "UNKNOWN: ", str(e)
|
||||||
|
sys.exit(3)
|
23
roles/nagios_client/files/scripts/check_fcomm_queue
Normal file
23
roles/nagios_client/files/scripts/check_fcomm_queue
Normal file
|
@ -0,0 +1,23 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
import sys
|
||||||
|
|
||||||
|
try:
|
||||||
|
import retask.queue
|
||||||
|
|
||||||
|
queue = retask.queue.Queue('fedora-packages')
|
||||||
|
queue.connect()
|
||||||
|
|
||||||
|
items = queue.length
|
||||||
|
if items > 500:
|
||||||
|
print "CRITICAL: %i tasks in fcomm queue" % items
|
||||||
|
sys.exit(2)
|
||||||
|
elif items > 250:
|
||||||
|
print "WARNING: %i tasks in fcomm queue" % items
|
||||||
|
sys.exit(1)
|
||||||
|
else:
|
||||||
|
print "OK: %i tasks in fcomm queue" % items
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print "UNKNOWN:", str(e)
|
||||||
|
sys.exit(3)
|
|
@ -0,0 +1,62 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import socket
|
||||||
|
import sys
|
||||||
|
import zmq
|
||||||
|
|
||||||
|
try:
|
||||||
|
service = sys.argv[1]
|
||||||
|
check_consumer = sys.argv[2]
|
||||||
|
backlog_warning = int(sys.argv[3])
|
||||||
|
backlog_critical = int(sys.argv[4])
|
||||||
|
fname = '/var/run/fedmsg/monitoring-%s.socket' % service
|
||||||
|
|
||||||
|
if not os.path.exists(fname):
|
||||||
|
print "UNKNOWN - %s does not exist" % fname
|
||||||
|
sys.exit(3)
|
||||||
|
|
||||||
|
if not os.access(fname, os.W_OK):
|
||||||
|
print "UNKNOWN - cannot write to %s" % fname
|
||||||
|
sys.exit(3)
|
||||||
|
|
||||||
|
connect_to = "ipc:///%s" % fname
|
||||||
|
ctx = zmq.Context()
|
||||||
|
s = ctx.socket(zmq.SUB)
|
||||||
|
s.connect(connect_to)
|
||||||
|
s.setsockopt(zmq.SUBSCRIBE, '')
|
||||||
|
|
||||||
|
poller = zmq.Poller()
|
||||||
|
poller.register(s, zmq.POLLIN)
|
||||||
|
|
||||||
|
timeout = 20000
|
||||||
|
|
||||||
|
events = dict(poller.poll(timeout))
|
||||||
|
if s in events and events[s] == zmq.POLLIN:
|
||||||
|
msg = s.recv()
|
||||||
|
msg = json.loads(msg)
|
||||||
|
else:
|
||||||
|
print 'UNKNOWN - ZMQ timeout. No message received in %i ms' % timeout
|
||||||
|
sys.exit(3)
|
||||||
|
|
||||||
|
for consumer in msg['consumers']:
|
||||||
|
if consumer['name'] == check_consumer:
|
||||||
|
if consumer['backlog'] is None:
|
||||||
|
print 'ERROR: fedmsg consumer %s is not initialized' % consumer['name']
|
||||||
|
sys.exit(3)
|
||||||
|
elif consumer['backlog'] > backlog_critical:
|
||||||
|
print 'CRITICAL: fedmsg consumer %s backlog value is %i' % (consumer['name'],consumer['backlog'])
|
||||||
|
sys.exit(2)
|
||||||
|
elif consumer['backlog'] > backlog_warning:
|
||||||
|
print 'WARNING: fedmsg consumer %s backlog value is %i' % (consumer['name'],consumer['backlog'])
|
||||||
|
sys.exit(1)
|
||||||
|
else:
|
||||||
|
print 'OK: fedmsg consumer %s backlog value is %i' % (consumer['name'],consumer['backlog'])
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
print "UNKNOWN: fedmsg consumer %s not found" % check_consumer
|
||||||
|
sys.exit(3)
|
||||||
|
except Exception as err:
|
||||||
|
print "UNKNOWN:", str(err)
|
||||||
|
sys.exit(3)
|
|
@ -0,0 +1,58 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import socket
|
||||||
|
import sys
|
||||||
|
import zmq
|
||||||
|
|
||||||
|
try:
|
||||||
|
service = sys.argv[1]
|
||||||
|
check_consumer = sys.argv[2]
|
||||||
|
exceptions_warning = int(sys.argv[3])
|
||||||
|
exceptions_critical = int(sys.argv[4])
|
||||||
|
fname = '/var/run/fedmsg/monitoring-%s.socket' % service
|
||||||
|
|
||||||
|
if not os.path.exists(fname):
|
||||||
|
print "UNKNOWN - %s does not exist" % fname
|
||||||
|
sys.exit(3)
|
||||||
|
|
||||||
|
if not os.access(fname, os.W_OK):
|
||||||
|
print "UNKNOWN - cannot write to %s" % fname
|
||||||
|
sys.exit(3)
|
||||||
|
|
||||||
|
connect_to = "ipc:///%s" % fname
|
||||||
|
ctx = zmq.Context()
|
||||||
|
s = ctx.socket(zmq.SUB)
|
||||||
|
s.connect(connect_to)
|
||||||
|
s.setsockopt(zmq.SUBSCRIBE, '')
|
||||||
|
poller = zmq.Poller()
|
||||||
|
poller.register(s, zmq.POLLIN)
|
||||||
|
|
||||||
|
timeout = 20000
|
||||||
|
|
||||||
|
events = dict(poller.poll(timeout))
|
||||||
|
if s in events and events[s] == zmq.POLLIN:
|
||||||
|
msg = s.recv()
|
||||||
|
msg = json.loads(msg)
|
||||||
|
else:
|
||||||
|
print 'UNKNOWN - ZMQ timeout. No message received in %i ms' % timeout
|
||||||
|
sys.exit(3)
|
||||||
|
|
||||||
|
for consumer in msg['consumers']:
|
||||||
|
if consumer['name'] == check_consumer:
|
||||||
|
if consumer['exceptions'] > exceptions_critical:
|
||||||
|
print 'CRITICAL: fedmsg consumer %s exceptions value is %i' % (consumer['name'],consumer['exceptions'])
|
||||||
|
sys.exit(2)
|
||||||
|
elif consumer['exceptions'] > exceptions_warning:
|
||||||
|
print 'WARNING: fedmsg consumer %s exceptions value is %i' % (consumer['name'],consumer['exceptions'])
|
||||||
|
sys.exit(1)
|
||||||
|
else:
|
||||||
|
print 'OK: fedmsg consumer %s exceptions value is %i' % (consumer['name'],consumer['exceptions'])
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
print "UNKNOWN: fedmsg consumers %s not found" % check_consumer
|
||||||
|
sys.exit(3)
|
||||||
|
except Exception as err:
|
||||||
|
print "UNKNOWN:", str(err)
|
||||||
|
sys.exit(3)
|
|
@ -0,0 +1,69 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
import arrow
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import socket
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import zmq
|
||||||
|
|
||||||
|
try:
|
||||||
|
service = sys.argv[1]
|
||||||
|
check_producer = sys.argv[2]
|
||||||
|
elapsed_warning = int(sys.argv[3])
|
||||||
|
elapsed_critical = int(sys.argv[4])
|
||||||
|
fname = '/var/run/fedmsg/monitoring-%s.socket' % service
|
||||||
|
|
||||||
|
if not os.path.exists(fname):
|
||||||
|
print "UNKNOWN - %s does not exist" % fname
|
||||||
|
sys.exit(3)
|
||||||
|
|
||||||
|
if not os.access(fname, os.W_OK):
|
||||||
|
print "UNKNOWN - cannot write to %s" % fname
|
||||||
|
sys.exit(3)
|
||||||
|
|
||||||
|
connect_to = "ipc:///%s" % fname
|
||||||
|
ctx = zmq.Context()
|
||||||
|
s = ctx.socket(zmq.SUB)
|
||||||
|
s.connect(connect_to)
|
||||||
|
s.setsockopt(zmq.SUBSCRIBE, '')
|
||||||
|
|
||||||
|
poller = zmq.Poller()
|
||||||
|
poller.register(s, zmq.POLLIN)
|
||||||
|
|
||||||
|
timeout = 20000
|
||||||
|
|
||||||
|
events = dict(poller.poll(timeout))
|
||||||
|
if s in events and events[s] == zmq.POLLIN:
|
||||||
|
msg = s.recv()
|
||||||
|
msg = json.loads(msg)
|
||||||
|
else:
|
||||||
|
print 'UNKNOWN - ZMQ timeout. No message received in %i ms' % timeout
|
||||||
|
sys.exit(3)
|
||||||
|
|
||||||
|
now = time.time()
|
||||||
|
|
||||||
|
for prod in msg['producers']:
|
||||||
|
if prod['name'] != check_producer:
|
||||||
|
continue
|
||||||
|
diff = now - prod['last_ran']
|
||||||
|
then = arrow.get(prod['last_ran']).humanize()
|
||||||
|
if diff > elapsed_critical:
|
||||||
|
print "CRITICAL: %s last ran %s (%i seconds ago)" % (
|
||||||
|
check_producer, then, diff)
|
||||||
|
sys.exit(2)
|
||||||
|
elif diff > elapsed_warning:
|
||||||
|
print "WARNING: %s last ran %s (%i seconds ago)" % (
|
||||||
|
check_producer, then, diff)
|
||||||
|
sys.exit(1)
|
||||||
|
else:
|
||||||
|
print "OK: %s last ran %s (%i seconds ago)" % (
|
||||||
|
check_producer, then, diff)
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
print "UNKNOWN: fedmsg producer %s not found" % check_producer
|
||||||
|
sys.exit(3)
|
||||||
|
except Exception as err:
|
||||||
|
print "UNKNOWN:", str(err)
|
||||||
|
sys.exit(3)
|
|
@ -0,0 +1,64 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import socket
|
||||||
|
import sys
|
||||||
|
import zmq
|
||||||
|
|
||||||
|
try:
|
||||||
|
service = sys.argv[1]
|
||||||
|
check_list = frozenset(sys.argv[2:])
|
||||||
|
fname = '/var/run/fedmsg/monitoring-%s.socket' % service
|
||||||
|
|
||||||
|
if not check_list:
|
||||||
|
print "UNKNOWN - empty list of fedmsg consumers and producers to check"
|
||||||
|
sys.exit(3)
|
||||||
|
|
||||||
|
if not os.path.exists(fname):
|
||||||
|
print "UNKNOWN - %s does not exist" % fname
|
||||||
|
sys.exit(3)
|
||||||
|
|
||||||
|
if not os.access(fname, os.W_OK):
|
||||||
|
print "UNKNOWN - cannot write to %s" % fname
|
||||||
|
sys.exit(3)
|
||||||
|
|
||||||
|
connect_to = "ipc:///%s" % fname
|
||||||
|
ctx = zmq.Context()
|
||||||
|
s = ctx.socket(zmq.SUB)
|
||||||
|
s.connect(connect_to)
|
||||||
|
s.setsockopt(zmq.SUBSCRIBE, '')
|
||||||
|
poller = zmq.Poller()
|
||||||
|
poller.register(s, zmq.POLLIN)
|
||||||
|
|
||||||
|
timeout = 20000
|
||||||
|
|
||||||
|
events = dict(poller.poll(timeout))
|
||||||
|
if s in events and events[s] == zmq.POLLIN:
|
||||||
|
msg = s.recv()
|
||||||
|
msg = json.loads(msg)
|
||||||
|
else:
|
||||||
|
print 'UNKNOWN - ZMQ timeout. No message received in %i ms' % timeout
|
||||||
|
sys.exit(3)
|
||||||
|
|
||||||
|
for consumer in msg['consumers']:
|
||||||
|
if consumer['name'] in check_list and not consumer['initialized']:
|
||||||
|
print 'ERROR: fedmsg consumer %s is not initialized' % consumer['name']
|
||||||
|
sys.exit(2)
|
||||||
|
|
||||||
|
for producer in msg['producers']:
|
||||||
|
if producer['name'] in check_list and not producer['initialized']:
|
||||||
|
print 'ERROR: fedmsg producer %s is not initialized' % producer['name']
|
||||||
|
sys.exit(2)
|
||||||
|
|
||||||
|
for item in check_list:
|
||||||
|
if item not in [p['name'] for p in msg['producers'] + msg['consumers']]:
|
||||||
|
print 'ERROR: %s not found among installed plugins' % item
|
||||||
|
sys.exit(2)
|
||||||
|
|
||||||
|
print "OK: fedmsg consumer(s) and producer(s) initialized"
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
except Exception as err:
|
||||||
|
print "UNKNOWN:", str(err)
|
||||||
|
sys.exit(3)
|
76
roles/nagios_client/files/scripts/check_haproxy_conns.py
Executable file
76
roles/nagios_client/files/scripts/check_haproxy_conns.py
Executable file
|
@ -0,0 +1,76 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
""" Nagios check for haproxy over-subscription.
|
||||||
|
|
||||||
|
fedmsg-gateway is the primary concern as it can eat up a ton of simultaneous
|
||||||
|
connections.
|
||||||
|
|
||||||
|
:Author: Ralph Bean <rbean@redhat.com>
|
||||||
|
"""
|
||||||
|
|
||||||
|
import socket
|
||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
|
def _numeric(value):
|
||||||
|
""" Type casting utility """
|
||||||
|
try:
|
||||||
|
return int(value)
|
||||||
|
except ValueError:
|
||||||
|
try:
|
||||||
|
return float(value)
|
||||||
|
except ValueError:
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
def query(sockname="/var/run/haproxy-stat"):
|
||||||
|
""" Read stats from the haproxy socket and return a dict """
|
||||||
|
s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
|
||||||
|
s.connect("/var/run/haproxy-stat")
|
||||||
|
s.send('show info\n')
|
||||||
|
try:
|
||||||
|
response = s.recv(1024).strip()
|
||||||
|
lines = response.split('\n')
|
||||||
|
data = dict([map(str.strip, line.split(':')) for line in lines])
|
||||||
|
data = dict([(k, _numeric(v)) for k, v in data.items()])
|
||||||
|
return data
|
||||||
|
except Exception, e:
|
||||||
|
print str(e)
|
||||||
|
finally:
|
||||||
|
s.close()
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def nagios_check(data):
|
||||||
|
""" Print warnings and return nagios exit codes. """
|
||||||
|
|
||||||
|
current = data['CurrConns']
|
||||||
|
maxconn = data['Maxconn']
|
||||||
|
percent = 100 * float(current) / float(maxconn)
|
||||||
|
details = "%.2f%% subscribed. %i current of %i maxconn." % (
|
||||||
|
percent, current, maxconn,
|
||||||
|
)
|
||||||
|
|
||||||
|
if percent < 50:
|
||||||
|
print "HAPROXY SUBS OK: " + details
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if percent < 75:
|
||||||
|
print "HAPROXY SUBS WARN: " + details
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if percent <= 100:
|
||||||
|
print "HAPROXY SUBS CRIT: " + details
|
||||||
|
return 2
|
||||||
|
|
||||||
|
print "HAPROXY SUBS UNKNOWN: " + details
|
||||||
|
return 3
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
try:
|
||||||
|
data = query(sockname="/var/run/haproxy-stat")
|
||||||
|
except Exception as e:
|
||||||
|
print "HAPROXY SUBS UNKNOWN: " + str(e)
|
||||||
|
sys.exit(3)
|
||||||
|
sys.exit(nagios_check(data))
|
59
roles/nagios_client/files/scripts/check_haproxy_mirrorlist.py
Executable file
59
roles/nagios_client/files/scripts/check_haproxy_mirrorlist.py
Executable file
|
@ -0,0 +1,59 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
import socket
|
||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
|
try:
|
||||||
|
|
||||||
|
unixsocket="/var/run/haproxy-stat"
|
||||||
|
s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
|
||||||
|
s.connect(unixsocket)
|
||||||
|
s.send('show stat\n')
|
||||||
|
|
||||||
|
try:
|
||||||
|
|
||||||
|
output = s.recv(16384).strip().split('\n')
|
||||||
|
fields = output.pop(0).split(',')
|
||||||
|
fields[0]=fields[0].replace('# ','')
|
||||||
|
proxies = list()
|
||||||
|
for line in output:
|
||||||
|
proxies.append(dict(zip(fields,line.split(','))))
|
||||||
|
|
||||||
|
except Exception, e:
|
||||||
|
print str(e)
|
||||||
|
finally:
|
||||||
|
s.close()
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print "MIRRORLIST STATE UNKNOWN: " + str(e)
|
||||||
|
sys.exit(3)
|
||||||
|
|
||||||
|
total=0
|
||||||
|
downcount=0
|
||||||
|
downlist=""
|
||||||
|
for proxy in proxies:
|
||||||
|
if proxy['svname'] == "FRONTEND" or proxy['svname'] == "BACKEND":
|
||||||
|
continue
|
||||||
|
if proxy['pxname'] == "mirror-lists":
|
||||||
|
total+=1
|
||||||
|
if proxy['status'] == "DOWN":
|
||||||
|
downlist+=proxy["svname"]+" "
|
||||||
|
downcount+=1
|
||||||
|
|
||||||
|
unavailability = 100 * float(downcount) / float(total)
|
||||||
|
|
||||||
|
if unavailability == 0:
|
||||||
|
print "MIRRORLIST STATE OK: " + downlist
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
if unavailability < 50:
|
||||||
|
print "MIRRORLIST STATE WARN: " + downlist
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
if unavailability >= 50:
|
||||||
|
print "MIRRORLIST STATE CRIT: " + downlist
|
||||||
|
sys.exit(2)
|
||||||
|
|
||||||
|
print "MIRRORLIST STATE UNKNOWN: " + downlist
|
||||||
|
sys.exit(3)
|
74
roles/nagios_client/files/scripts/check_ipa_replication
Normal file
74
roles/nagios_client/files/scripts/check_ipa_replication
Normal file
|
@ -0,0 +1,74 @@
|
||||||
|
#!/usr/bin/python
|
||||||
|
# Source: https://github.com/opinkerfi/nagios-plugins/blob/master/check_ipa/check_ipa_replication
|
||||||
|
# Copyright 2013, Tomas Edwardsson
|
||||||
|
# Copyright 2016, Patrick Uiterwijk
|
||||||
|
#
|
||||||
|
# This script is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This script is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
import ldap
|
||||||
|
from pynag.Plugins import PluginHelper, critical, warning, ok
|
||||||
|
|
||||||
|
plugin = PluginHelper()
|
||||||
|
|
||||||
|
plugin.parser.add_option('-u', help="ldap uri", dest="uri")
|
||||||
|
plugin.parser.add_option('-D', help="bind DN", dest="binddn")
|
||||||
|
plugin.parser.add_option('-w', help="bind password", dest="bindpw")
|
||||||
|
plugin.parse_arguments()
|
||||||
|
|
||||||
|
if not plugin.options.uri:
|
||||||
|
plugin.parser.error('-u (uri) argument is required')
|
||||||
|
|
||||||
|
try:
|
||||||
|
l = ldap.initialize(plugin.options.uri)
|
||||||
|
|
||||||
|
if plugin.options.binddn:
|
||||||
|
l.bind_s(plugin.options.binddn, plugin.options.bindpw)
|
||||||
|
|
||||||
|
replication = l.search_s('cn=config',
|
||||||
|
ldap.SCOPE_SUBTREE,
|
||||||
|
'(objectclass=nsds5replicationagreement)',
|
||||||
|
['nsDS5ReplicaHost', 'nsds5replicaLastUpdateStatus'])
|
||||||
|
except Exception, e:
|
||||||
|
plugin.status(critical)
|
||||||
|
plugin.add_summary("Unable to initialize ldap connection: %s" % (e))
|
||||||
|
plugin.exit()
|
||||||
|
|
||||||
|
|
||||||
|
# Loop through replication agreements
|
||||||
|
for rhost in replication:
|
||||||
|
plugin.add_summary("Replica %s Status: %s" % (rhost[1]['nsDS5ReplicaHost'][0], rhost[1]['nsds5replicaLastUpdateStatus'][0]))
|
||||||
|
|
||||||
|
status = rhost[1]['nsds5replicaLastUpdateStatus'][0]
|
||||||
|
code = status[:2]
|
||||||
|
if status.startswith('Error ('):
|
||||||
|
# IPA >=4.4.0
|
||||||
|
code = status[status.find('(')+1:status.find(')')]
|
||||||
|
else:
|
||||||
|
# IPA <4.4.0
|
||||||
|
code = status[:status.find(' ')]
|
||||||
|
|
||||||
|
if code == '0':
|
||||||
|
plugin.status(ok)
|
||||||
|
elif code == '1':
|
||||||
|
# Busy Replica is not an error, its "unknown" (but its "ok" for now)
|
||||||
|
plugin.status(ok)
|
||||||
|
else:
|
||||||
|
plugin.status(critical)
|
||||||
|
|
||||||
|
if not len(replication):
|
||||||
|
plugin.add_summary("Warning: No replicas found")
|
||||||
|
plugin.status(warning)
|
||||||
|
|
||||||
|
plugin.exit()
|
||||||
|
|
17
roles/nagios_client/files/scripts/check_lock
Normal file
17
roles/nagios_client/files/scripts/check_lock
Normal file
|
@ -0,0 +1,17 @@
|
||||||
|
#!/usr/bin/python
|
||||||
|
|
||||||
|
import fcntl
|
||||||
|
import sys
|
||||||
|
|
||||||
|
try:
|
||||||
|
f = open('/mnt/koji/.nagios_test', 'r')
|
||||||
|
f.close()
|
||||||
|
f = open('/mnt/koji/.nagios_test', 'w')
|
||||||
|
except IOError:
|
||||||
|
print "Could not create file"
|
||||||
|
sys.exit(2)
|
||||||
|
|
||||||
|
fcntl.flock(f, fcntl.LOCK_EX)
|
||||||
|
f.close()
|
||||||
|
print "File Locked Successfully"
|
||||||
|
sys.exit(0)
|
123
roles/nagios_client/files/scripts/check_lock_file_age
Executable file
123
roles/nagios_client/files/scripts/check_lock_file_age
Executable file
|
@ -0,0 +1,123 @@
|
||||||
|
#! /usr/bin/perl -w
|
||||||
|
|
||||||
|
# check_lock_file_age.pl Copyright (C) 2010 Ricky Elrod <codeblock@fedoraproject.org>
|
||||||
|
#
|
||||||
|
# Fork of check_file_age.pl
|
||||||
|
#
|
||||||
|
# Checks a lock file's size and modification time to make sure it's not empty
|
||||||
|
# and that it's sufficiently recent.
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or
|
||||||
|
# modify it under the terms of the GNU General Public License
|
||||||
|
# as published by the Free Software Foundation; either version 2
|
||||||
|
# of the License, or (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty
|
||||||
|
# of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# you should have received a copy of the GNU General Public License
|
||||||
|
# along with this program (or with Nagios); if not, write to the
|
||||||
|
# Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||||
|
# Boston, MA 02111-1307, USA
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
use English;
|
||||||
|
use Getopt::Long;
|
||||||
|
use File::stat;
|
||||||
|
use vars qw($PROGNAME);
|
||||||
|
use lib "/usr/lib64/nagios/plugins";
|
||||||
|
use utils qw (%ERRORS &print_revision &support);
|
||||||
|
|
||||||
|
sub print_help ();
|
||||||
|
sub print_usage ();
|
||||||
|
|
||||||
|
my ($opt_c, $opt_f, $opt_w, $opt_h, $opt_V);
|
||||||
|
my ($result, $message, $age, $size, $st);
|
||||||
|
|
||||||
|
$PROGNAME="check_lock_file_age";
|
||||||
|
|
||||||
|
$opt_w = 1;
|
||||||
|
$opt_c = 5;
|
||||||
|
$opt_f = "";
|
||||||
|
|
||||||
|
Getopt::Long::Configure('bundling');
|
||||||
|
GetOptions(
|
||||||
|
"V" => \$opt_V, "version" => \$opt_V,
|
||||||
|
"h" => \$opt_h, "help" => \$opt_h,
|
||||||
|
"f=s" => \$opt_f, "file" => \$opt_f,
|
||||||
|
"w=f" => \$opt_w, "warning-age=f" => \$opt_w,
|
||||||
|
"c=f" => \$opt_c, "critical-age=f" => \$opt_c);
|
||||||
|
|
||||||
|
if ($opt_V) {
|
||||||
|
print_revision($PROGNAME, '1.4.14');
|
||||||
|
exit $ERRORS{'OK'};
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($opt_h) {
|
||||||
|
print_help();
|
||||||
|
exit $ERRORS{'OK'};
|
||||||
|
}
|
||||||
|
|
||||||
|
if (($opt_c and $opt_w) and ($opt_c < $opt_w)) {
|
||||||
|
print "Warning time must be less than Critical time.\n";
|
||||||
|
exit $ERRORS{'UNKNOWN'};
|
||||||
|
}
|
||||||
|
|
||||||
|
$opt_f = shift unless ($opt_f);
|
||||||
|
|
||||||
|
if (! $opt_f) {
|
||||||
|
print "LOCK_FILE_AGE UNKNOWN: No file specified\n";
|
||||||
|
exit $ERRORS{'UNKNOWN'};
|
||||||
|
}
|
||||||
|
|
||||||
|
# Check that file exists (can be directory or link)
|
||||||
|
unless (-e $opt_f) {
|
||||||
|
print "LOCK_FILE_AGE OK: File not found (Lock file removed) - $opt_f\n";
|
||||||
|
exit $ERRORS{'OK'};
|
||||||
|
}
|
||||||
|
|
||||||
|
$st = File::stat::stat($opt_f);
|
||||||
|
$age = time - $st->mtime;
|
||||||
|
|
||||||
|
$result = 'OK';
|
||||||
|
|
||||||
|
# Convert minutes to seconds
|
||||||
|
if($opt_c) { $opt_c *= 60; }
|
||||||
|
if($opt_w) { $opt_w *= 60; }
|
||||||
|
|
||||||
|
if ($opt_c and $age > $opt_c) {
|
||||||
|
$result = 'CRITICAL';
|
||||||
|
}
|
||||||
|
elsif ($opt_w and $age > $opt_w) {
|
||||||
|
$result = 'WARNING';
|
||||||
|
}
|
||||||
|
|
||||||
|
# If the age is higher than 2 minutes, convert seconds -> minutes
|
||||||
|
# If it's higher than a day, use days.
|
||||||
|
# Just a nicety, to make people not have to do math ;)
|
||||||
|
if($age > 86400) { $age = int(($age/86400))." days"; }
|
||||||
|
elsif($age > 120) { $age = int(($age/60))." minutes"; }
|
||||||
|
else { $age = "$age seconds"; }
|
||||||
|
|
||||||
|
print "LOCK_FILE_AGE $result: $opt_f is $age old.\n";
|
||||||
|
exit $ERRORS{$result};
|
||||||
|
|
||||||
|
sub print_usage () {
|
||||||
|
print "Usage:\n";
|
||||||
|
print " $PROGNAME [-w <secs>] [-c <secs>] -f <file>\n";
|
||||||
|
print " $PROGNAME [-h | --help]\n";
|
||||||
|
print " $PROGNAME [-V | --version]\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
sub print_help () {
|
||||||
|
print_revision($PROGNAME, '1.4.14');
|
||||||
|
print "Copyright (c) 2010 Ricky Elrod\n\n";
|
||||||
|
print_usage();
|
||||||
|
print "\n";
|
||||||
|
print " <mins> File must be no more than this many minutes old (default: warn 1m, crit 5m)\n";
|
||||||
|
print "\n";
|
||||||
|
support();
|
||||||
|
}
|
24
roles/nagios_client/files/scripts/check_memcache_connect
Normal file
24
roles/nagios_client/files/scripts/check_memcache_connect
Normal file
|
@ -0,0 +1,24 @@
|
||||||
|
#!/bin/bash
|
||||||
|
#
|
||||||
|
# 2014-11-19
|
||||||
|
# Author: Ralph Bean <rbean@redhat.com>
|
||||||
|
|
||||||
|
# exit codes
|
||||||
|
ok=0
|
||||||
|
warn=1
|
||||||
|
crit=2
|
||||||
|
unkn=3
|
||||||
|
|
||||||
|
# Right now we just check to see if we can even run this command without
|
||||||
|
# hanging and timing out. In the future, we could parse stdout for more
|
||||||
|
# fine-grained information.
|
||||||
|
echo stats | nc 127.0.0.1 11211 > /dev/null
|
||||||
|
status=$?
|
||||||
|
|
||||||
|
if [ $status -ne 0 ]; then
|
||||||
|
echo "CRIT: stats command got status code $status"
|
||||||
|
exit $crit
|
||||||
|
else
|
||||||
|
echo "OK: stats command got status code $status"
|
||||||
|
exit $ok
|
||||||
|
fi
|
14
roles/nagios_client/files/scripts/check_osbs_api.py
Executable file
14
roles/nagios_client/files/scripts/check_osbs_api.py
Executable file
|
@ -0,0 +1,14 @@
|
||||||
|
#!/usr/bin/python
|
||||||
|
|
||||||
|
import requests
|
||||||
|
import sys
|
||||||
|
|
||||||
|
r = requests.get("https://localhost:8443/", verify=False)
|
||||||
|
|
||||||
|
if 'paths' in r.json().keys():
|
||||||
|
print "OK: OSBS API endpoint is responding with path data"
|
||||||
|
sys.exit(0)
|
||||||
|
else:
|
||||||
|
print "CRITICAL: OSBS API not responding properly"
|
||||||
|
sys.exit(2)
|
||||||
|
|
23
roles/nagios_client/files/scripts/check_osbs_builds.py
Executable file
23
roles/nagios_client/files/scripts/check_osbs_builds.py
Executable file
|
@ -0,0 +1,23 @@
|
||||||
|
#!/usr/bin/python
|
||||||
|
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
|
||||||
|
sp = subprocess.Popen(
|
||||||
|
["osbs", "list-builds"],
|
||||||
|
stdout=subprocess.PIPE,
|
||||||
|
stderr=subprocess.PIPE,
|
||||||
|
stdin=subprocess.PIPE
|
||||||
|
)
|
||||||
|
sp_out, sp_err = sp.communicate()
|
||||||
|
sp_err = sp_err.split('\n')
|
||||||
|
|
||||||
|
if 'not attached to terminal' in sp_err[0]:
|
||||||
|
sp_err = sp_err[1:]
|
||||||
|
|
||||||
|
if sp_err[0].split()[0] == 'BUILD':
|
||||||
|
print "OK: OSBS is responsive to 'osbs list-builds'"
|
||||||
|
sys.exit(0)
|
||||||
|
else:
|
||||||
|
print "CRITICAL: OSBS UNRESPONSIVE"
|
||||||
|
sys.exit(2)
|
49
roles/nagios_client/files/scripts/check_postfix_queue
Normal file
49
roles/nagios_client/files/scripts/check_postfix_queue
Normal file
|
@ -0,0 +1,49 @@
|
||||||
|
#!/bin/bash
|
||||||
|
#
|
||||||
|
# 19-07-2010
|
||||||
|
# Author: Cherwin Nooitmeer <cherwin@gmail.com>
|
||||||
|
#
|
||||||
|
|
||||||
|
# exit codes
|
||||||
|
e_ok=0
|
||||||
|
e_warning=1
|
||||||
|
e_critical=2
|
||||||
|
e_unknown=3
|
||||||
|
|
||||||
|
# regular expression that matches queue IDs (e.g. D71EF7AC80F8)
|
||||||
|
queue_id='^[A-F0-9][A-F0-9][A-F0-9][A-F0-9][A-F0-9][A-F0-9][A-F0-9][A-F0-9][A-F0-9][A-F0-9]'
|
||||||
|
|
||||||
|
usage="Invalid command line usage"
|
||||||
|
|
||||||
|
if [ -z $1 ]; then
|
||||||
|
echo $usage
|
||||||
|
exit $e_unknown
|
||||||
|
fi
|
||||||
|
|
||||||
|
while getopts ":w:c:" options
|
||||||
|
do
|
||||||
|
case $options in
|
||||||
|
w ) warning=$OPTARG ;;
|
||||||
|
c ) critical=$OPTARG ;;
|
||||||
|
* ) echo $usage
|
||||||
|
exit $e_unknown ;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
# determine queue size
|
||||||
|
qsize=$(mailq | egrep -c $queue_id)
|
||||||
|
if [ -z $qsize ]
|
||||||
|
then
|
||||||
|
exit $e_unknown
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ $qsize -ge $critical ]; then
|
||||||
|
retval=$e_critical
|
||||||
|
elif [ $qsize -ge $warning ]; then
|
||||||
|
retval=$e_warning
|
||||||
|
elif [ $qsize -lt $warning ]; then
|
||||||
|
retval=$e_ok
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "$qsize mail(s) in queue | mail_queue=$qsize"
|
||||||
|
exit $retval
|
26
roles/nagios_client/files/scripts/check_rabbitmq_size
Normal file
26
roles/nagios_client/files/scripts/check_rabbitmq_size
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
#!/bin/python
|
||||||
|
import sys
|
||||||
|
import requests
|
||||||
|
|
||||||
|
url = 'http://localhost:15672/api/queues/%%2f/%s' % (sys.argv[1])
|
||||||
|
|
||||||
|
r = requests.get(url, auth=('guest', 'guest')).json()
|
||||||
|
consumers = r['consumers']
|
||||||
|
messages = r['messages']
|
||||||
|
|
||||||
|
msg = 'Messages in queue: %i (%i consumers)' % (messages, consumers)
|
||||||
|
|
||||||
|
if consumers < 1:
|
||||||
|
print 'CRITICAL: No consumers: %s' % msg
|
||||||
|
sys.exit(2)
|
||||||
|
|
||||||
|
if messages > sys.argv[2]:
|
||||||
|
print 'CRITICAL: %s' % msg
|
||||||
|
sys.exit(2)
|
||||||
|
|
||||||
|
if messages > sys.argv[3]:
|
||||||
|
print 'WARNING: %s' % msg
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
print 'OK: %s' % msg
|
||||||
|
sys.exit(0)
|
45
roles/nagios_client/files/scripts/check_raid.py
Normal file
45
roles/nagios_client/files/scripts/check_raid.py
Normal file
|
@ -0,0 +1,45 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
#
|
||||||
|
# very simple python script to parse out /proc/mdstat
|
||||||
|
# and give results for nagios to monitor
|
||||||
|
#
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import string
|
||||||
|
|
||||||
|
devices = []
|
||||||
|
|
||||||
|
try:
|
||||||
|
mdstat = string.split(open('/proc/mdstat').read(), '\n')
|
||||||
|
except IOError:
|
||||||
|
# seems we have no software raid on this machines
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
error = ""
|
||||||
|
i = 0
|
||||||
|
for line in mdstat:
|
||||||
|
if line[0:2] == 'md':
|
||||||
|
device = string.split(line)[0]
|
||||||
|
devices.append(device)
|
||||||
|
status = string.split(mdstat[i+1])[3]
|
||||||
|
if string.count(status, "_"):
|
||||||
|
# see if we can figure out what's going on
|
||||||
|
err = string.split(mdstat[i+2])
|
||||||
|
msg = "device=%s status=%s" % (device, status)
|
||||||
|
if len(err) > 0:
|
||||||
|
msg = msg + " rebuild=%s" % err[0]
|
||||||
|
|
||||||
|
if not error:
|
||||||
|
error = msg
|
||||||
|
else:
|
||||||
|
error = error + ", " + msg
|
||||||
|
i = i + 1
|
||||||
|
|
||||||
|
if not error:
|
||||||
|
print "DEVICES %s OK" % " ".join(devices)
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
else:
|
||||||
|
print error
|
||||||
|
sys.exit(2)
|
||||||
|
|
84
roles/nagios_client/files/scripts/check_readonly_fs
Executable file
84
roles/nagios_client/files/scripts/check_readonly_fs
Executable file
|
@ -0,0 +1,84 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# check_readonlyfs: Check for readonly filesystems
|
||||||
|
# Copyright (C) 2010 Davide Madrisan <davide.madrisan@gmail.com>
|
||||||
|
|
||||||
|
PROGNAME=`/bin/basename $0`
|
||||||
|
PROGPATH=`echo $0 | sed -e 's,[\\/][^\\/][^\\/]*$,,'`
|
||||||
|
REVISION=`echo '$Revision: 1 $' | sed -e 's/[^0-9.]//g'`
|
||||||
|
|
||||||
|
. $PROGPATH/utils.sh
|
||||||
|
|
||||||
|
print_usage() {
|
||||||
|
echo "Usage: $PROGNAME --no-network-fs"
|
||||||
|
echo "Usage: $PROGNAME --help"
|
||||||
|
echo "Usage: $PROGNAME --version"
|
||||||
|
}
|
||||||
|
|
||||||
|
print_help() {
|
||||||
|
print_revision $PROGNAME $REVISION
|
||||||
|
echo ""
|
||||||
|
print_usage
|
||||||
|
echo ""
|
||||||
|
echo "readonly filesystem checker plugin for Nagios"
|
||||||
|
echo ""
|
||||||
|
support
|
||||||
|
}
|
||||||
|
|
||||||
|
NETFS=1
|
||||||
|
|
||||||
|
# Grab the command line arguments
|
||||||
|
|
||||||
|
exitstatus=$STATE_WARNING #default
|
||||||
|
|
||||||
|
while test -n "$1"; do
|
||||||
|
case "$1" in
|
||||||
|
--help|-h)
|
||||||
|
print_help
|
||||||
|
exit $STATE_OK
|
||||||
|
;;
|
||||||
|
--version|-V)
|
||||||
|
print_revision $PROGNAME $REVISION
|
||||||
|
exit $STATE_OK
|
||||||
|
;;
|
||||||
|
--no-network-fs|-n)
|
||||||
|
NETFS="0"
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo "Unknown argument: $1"
|
||||||
|
print_usage
|
||||||
|
exit $STATE_UNKNOWN
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
shift
|
||||||
|
done
|
||||||
|
|
||||||
|
[ -r /proc/mounts ] || { echo "cannot read /proc/mounts!"; exit $STATE_UNKNOWN; }
|
||||||
|
|
||||||
|
nerr=0
|
||||||
|
IFS_SAVE="$IFS"
|
||||||
|
|
||||||
|
rofs_list=""
|
||||||
|
while read dev mp fs mopt ignore; do
|
||||||
|
[ "$dev" = none ] && continue
|
||||||
|
case $fs in binfmt_misc|devpts|iso9660|proc|selinuxfs|rpc_pipefs|sysfs|tmpfs|usbfs)
|
||||||
|
continue ;;
|
||||||
|
esac
|
||||||
|
case $fs in autofs|nfs|nfs4|smbfs)
|
||||||
|
# skip the network filesystems
|
||||||
|
[ "$NETFS" = 0 ] && continue ;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
IFS=","; set -- $mopt; IFS="$IFS_SAVE"
|
||||||
|
while :; do
|
||||||
|
case "$1" in
|
||||||
|
ro) rofs_list="$rofs_list $mp"; nerr=$(( $nerr + 1 )) ;;
|
||||||
|
"") shift; break ;;
|
||||||
|
esac
|
||||||
|
shift
|
||||||
|
done
|
||||||
|
done < <(LC_ALL=C /bin/cat /proc/mounts 2>/dev/null)
|
||||||
|
|
||||||
|
[ $nerr -eq 0 ] && { echo OK; exit $STATE_OK; } || echo "$rofs_list: read only fs"
|
||||||
|
|
||||||
|
exit $exitstatus
|
108
roles/nagios_client/files/scripts/check_supybot_plugin
Executable file
108
roles/nagios_client/files/scripts/check_supybot_plugin
Executable file
|
@ -0,0 +1,108 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
""" check_supybot_plugin -- ensure that a plugin is loaded by supybot.
|
||||||
|
|
||||||
|
Run like:
|
||||||
|
|
||||||
|
check_supybot_plugin --target fedmsg
|
||||||
|
check_supybot_plugin --target koji --debug
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import sys
|
||||||
|
import socket
|
||||||
|
import string
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
|
||||||
|
def process_args():
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument(
|
||||||
|
'-t', '--target', default=None, dest='target',
|
||||||
|
help="Required. The plugin we're looking for."
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'-n', '--nick', default=None, dest='nick',
|
||||||
|
help="NICK to use when connecting to freenode.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'-d', '--debug', default=False, action='store_true',
|
||||||
|
help='Print out debug information.', dest='debug',
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'-H', '--host', default='irc.freenode.net',
|
||||||
|
help='Host to connect to.', dest='host',
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'-p', '--port', default=6667, type=int,
|
||||||
|
help='Host to connect to.', dest='port',
|
||||||
|
)
|
||||||
|
return parser.parse_args()
|
||||||
|
|
||||||
|
args = process_args()
|
||||||
|
|
||||||
|
# Use a random nick so people can't mess with us
|
||||||
|
if not args.nick:
|
||||||
|
args.nick = 'nrpe-' + str(uuid.uuid4()).split('-')[0]
|
||||||
|
|
||||||
|
name = "NRPE Bot"
|
||||||
|
readbuffer = ""
|
||||||
|
|
||||||
|
if not args.target:
|
||||||
|
print "UNKNOWN: No 'target' specified."
|
||||||
|
sys.exit(3)
|
||||||
|
|
||||||
|
args.target = args.target.lower()
|
||||||
|
|
||||||
|
if args.debug:
|
||||||
|
print "connecting to %s/%i" % (args.host, args.port)
|
||||||
|
|
||||||
|
try:
|
||||||
|
s = socket.socket()
|
||||||
|
s.connect((args.host, args.port))
|
||||||
|
|
||||||
|
if args.debug:
|
||||||
|
print "as %s/%s (%s)" % (args.nick, args.nick, name)
|
||||||
|
|
||||||
|
s.send("nick %s\r\n" % args.nick)
|
||||||
|
s.send("USER %s %s bla :%s\r\n" % (args.nick, args.host, name))
|
||||||
|
|
||||||
|
while 1:
|
||||||
|
readbuffer = readbuffer+s.recv(1024)
|
||||||
|
temp = string.split(readbuffer, "\n")
|
||||||
|
readbuffer = temp.pop()
|
||||||
|
|
||||||
|
for line in temp:
|
||||||
|
line = string.rstrip(line)
|
||||||
|
|
||||||
|
if args.debug:
|
||||||
|
print " * ", line
|
||||||
|
|
||||||
|
line = string.split(line)
|
||||||
|
|
||||||
|
if line[1] == 'MODE':
|
||||||
|
msg = "privmsg zodbot :list\r\n"
|
||||||
|
if args.debug:
|
||||||
|
print "sending:"
|
||||||
|
print " ->", msg
|
||||||
|
s.send(msg)
|
||||||
|
|
||||||
|
if line[1] == 'PRIVMSG':
|
||||||
|
if args.debug:
|
||||||
|
print "Got our response.."
|
||||||
|
|
||||||
|
plugins = map(str.lower, ' '.join(line[3:][1:]).split(', '))
|
||||||
|
|
||||||
|
if args.target in plugins:
|
||||||
|
print "OK"
|
||||||
|
s.send("QUIT")
|
||||||
|
sys.exit(0)
|
||||||
|
else:
|
||||||
|
print "CRITICAL: %r not loaded by supybot" % args.target
|
||||||
|
s.send("QUIT")
|
||||||
|
sys.exit(2)
|
||||||
|
except Exception as e:
|
||||||
|
print "UNKNOWN: ", str(e)
|
||||||
|
if args.debug:
|
||||||
|
raise
|
||||||
|
sys.exit(3)
|
19
roles/nagios_client/files/scripts/check_testcloud
Normal file
19
roles/nagios_client/files/scripts/check_testcloud
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
RUNNING_VMS=`testcloud instance list | grep -i 'running' | wc -l`
|
||||||
|
CRITICAL=20
|
||||||
|
WARNING=15
|
||||||
|
|
||||||
|
|
||||||
|
if [ $RUNNING_VMS -gt $CRITICAL ]
|
||||||
|
then
|
||||||
|
echo "Testcloud: CRITICAL Number of VMs running: $RUNNING_VMS"
|
||||||
|
exit 2
|
||||||
|
elif [ $RUNNING_VMS -gt $WARNING ]
|
||||||
|
then
|
||||||
|
echo "Testcloud: WARNING Number of VMs running: $RUNNING_VMS"
|
||||||
|
exit 1
|
||||||
|
else
|
||||||
|
echo "Testcloud: OK Number of VMs running: $RUNNING_VMS"
|
||||||
|
exit 0
|
||||||
|
fi
|
BIN
roles/nagios_client/files/selinux/fi-nrpe.mod
Normal file
BIN
roles/nagios_client/files/selinux/fi-nrpe.mod
Normal file
Binary file not shown.
BIN
roles/nagios_client/files/selinux/fi-nrpe.pp
Normal file
BIN
roles/nagios_client/files/selinux/fi-nrpe.pp
Normal file
Binary file not shown.
11
roles/nagios_client/files/selinux/fi-nrpe.te
Normal file
11
roles/nagios_client/files/selinux/fi-nrpe.te
Normal file
|
@ -0,0 +1,11 @@
|
||||||
|
module fi-nrpe 1.0;
|
||||||
|
|
||||||
|
require {
|
||||||
|
type nagios_system_plugin_t;
|
||||||
|
type nrpe_exec_t;
|
||||||
|
class file getattr;
|
||||||
|
}
|
||||||
|
|
||||||
|
#============= nagios_system_plugin_t ==============
|
||||||
|
allow nagios_system_plugin_t nrpe_exec_t:file getattr;
|
||||||
|
|
3
roles/nagios_client/handlers/main.yml
Normal file
3
roles/nagios_client/handlers/main.yml
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
---
|
||||||
|
- name: restart nrpe
|
||||||
|
service: name=nrpe state=restarted
|
228
roles/nagios_client/tasks/main.yml
Normal file
228
roles/nagios_client/tasks/main.yml
Normal file
|
@ -0,0 +1,228 @@
|
||||||
|
# nagios-client/nrpe
|
||||||
|
|
||||||
|
---
|
||||||
|
# install pkgs:
|
||||||
|
- name: install nagios client pkgs
|
||||||
|
yum: name={{ item }} state=present
|
||||||
|
with_items:
|
||||||
|
- nrpe
|
||||||
|
- nagios-plugins
|
||||||
|
- nagios-plugins-disk
|
||||||
|
- nagios-plugins-file_age
|
||||||
|
- nagios-plugins-users
|
||||||
|
- nagios-plugins-procs
|
||||||
|
- nagios-plugins-swap
|
||||||
|
- nagios-plugins-load
|
||||||
|
- nagios-plugins-ping
|
||||||
|
tags:
|
||||||
|
- packages
|
||||||
|
- nagios_client
|
||||||
|
when: ansible_distribution_major_version|int < 22
|
||||||
|
|
||||||
|
# install pkgs:
|
||||||
|
- name: install nagios client pkgs
|
||||||
|
dnf: name={{ item }} state=present
|
||||||
|
with_items:
|
||||||
|
- nrpe
|
||||||
|
- nagios-plugins
|
||||||
|
- nagios-plugins-disk
|
||||||
|
- nagios-plugins-file_age
|
||||||
|
- nagios-plugins-users
|
||||||
|
- nagios-plugins-procs
|
||||||
|
- nagios-plugins-swap
|
||||||
|
- nagios-plugins-load
|
||||||
|
- nagios-plugins-ping
|
||||||
|
tags:
|
||||||
|
- packages
|
||||||
|
- nagios_client
|
||||||
|
when: ansible_distribution_major_version|int > 21
|
||||||
|
|
||||||
|
- name: install local nrpe check scripts that are not packaged
|
||||||
|
copy: src="scripts/{{ item }}" dest="{{ libdir }}/nagios/plugins/{{ item }}" mode=0755 owner=nagios group=nagios
|
||||||
|
with_items:
|
||||||
|
- check_haproxy_conns.py
|
||||||
|
- check_haproxy_mirrorlist.py
|
||||||
|
- check_postfix_queue
|
||||||
|
- check_raid.py
|
||||||
|
- check_lock
|
||||||
|
- check_fcomm_queue
|
||||||
|
- check_fedmsg_consumer_backlog.py
|
||||||
|
- check_fedmsg_consumer_exceptions.py
|
||||||
|
- check_fedmsg_producer_last_ran.py
|
||||||
|
- check_fedmsg_producers_consumers.py
|
||||||
|
- check_supybot_plugin
|
||||||
|
- check_rabbitmq_size
|
||||||
|
- check_datanommer_timesince.py
|
||||||
|
- check_memcache_connect
|
||||||
|
- check_readonly_fs
|
||||||
|
- check_lock_file_age
|
||||||
|
- check_testcloud
|
||||||
|
- check_osbs_builds.py
|
||||||
|
- check_osbs_api.py
|
||||||
|
- check_ipa_replication
|
||||||
|
when: not inventory_hostname.startswith('noc')
|
||||||
|
tags:
|
||||||
|
- nagios_client
|
||||||
|
|
||||||
|
# create dirs
|
||||||
|
# puppet used to make /var/spool/nagios (owned by nagios.nagios) mode 750
|
||||||
|
# and /usr/lib/nagios/plugins (owned by root) mode 755 - but we don't know WHY
|
||||||
|
# then stuff it with plugins from the plugins dir in the nagios module
|
||||||
|
# then we symlinked that to /usr/lib64/nagios/plugins
|
||||||
|
# it was a nightmare - don't do that - my ghost will haunt you if you do
|
||||||
|
# skvidal 2013-05-21
|
||||||
|
|
||||||
|
|
||||||
|
# Three tasks for handling our custom selinux module
|
||||||
|
- name: ensure a directory exists for our custom selinux module
|
||||||
|
file: dest=/usr/share/nrpe state=directory
|
||||||
|
|
||||||
|
- name: copy over our custom selinux module
|
||||||
|
copy: src=selinux/fi-nrpe.pp dest=/usr/share/nrpe/fi-nrpe.pp
|
||||||
|
register: selinux_module
|
||||||
|
|
||||||
|
- name: install our custom selinux module
|
||||||
|
command: semodule -i /usr/share/nrpe/fi-nrpe.pp
|
||||||
|
when: ansible_distribution_major_version|int == 7 and selinux_module|changed
|
||||||
|
|
||||||
|
|
||||||
|
# Set up our base config.
|
||||||
|
- name: /etc/nagios/nrpe.cfg
|
||||||
|
template: src=nrpe.cfg.j2 dest=/etc/nagios/nrpe.cfg
|
||||||
|
when: not inventory_hostname.startswith('noc')
|
||||||
|
notify:
|
||||||
|
- restart nrpe
|
||||||
|
tags:
|
||||||
|
- config
|
||||||
|
- nagios_client
|
||||||
|
|
||||||
|
#
|
||||||
|
# The actual items files here end in .j2 (they are templates)
|
||||||
|
# So when adding or modifying them change the .j2 version in git.
|
||||||
|
#
|
||||||
|
- name: install nrpe client configs
|
||||||
|
template: src={{ item }}.j2 dest=/etc/nrpe.d/{{ item }}
|
||||||
|
with_items:
|
||||||
|
- check_mirrorlist_cache.cfg
|
||||||
|
- check_raid.cfg
|
||||||
|
- check_ipa.cfg
|
||||||
|
- check_readonly_fs.cfg
|
||||||
|
- check_cron.cfg
|
||||||
|
- check_disk.cfg
|
||||||
|
- check_swap.cfg
|
||||||
|
- check_postfix_queue.cfg
|
||||||
|
- check_lock.cfg
|
||||||
|
- check_fedmsg_hub_proc.cfg
|
||||||
|
- check_fedmsg_irc_proc.cfg
|
||||||
|
- check_fedmsg_relay_proc.cfg
|
||||||
|
- check_fedmsg_gateway_proc.cfg
|
||||||
|
- check_fedmsg_masher_proc.cfg
|
||||||
|
- check_redis_proc.cfg
|
||||||
|
- check_autocloud_proc.cfg
|
||||||
|
- check_fedmsg_consumers.cfg
|
||||||
|
- check_supybot_fedmsg_plugin.cfg
|
||||||
|
- check_datanommer_history.cfg
|
||||||
|
- check_memcache.cfg
|
||||||
|
- check_lock_file_age.cfg
|
||||||
|
- check_basset.cfg
|
||||||
|
- check_fmn.cfg
|
||||||
|
- check_osbs.cfg
|
||||||
|
- check_koschei_polling_proc.cfg
|
||||||
|
- check_koschei_resolver_proc.cfg
|
||||||
|
- check_koschei_scheduler_proc.cfg
|
||||||
|
- check_koschei_watcher_proc.cfg
|
||||||
|
- check_testcloud.cfg
|
||||||
|
notify:
|
||||||
|
- restart nrpe
|
||||||
|
tags:
|
||||||
|
- config
|
||||||
|
- nagios_client
|
||||||
|
|
||||||
|
#
|
||||||
|
# The actual items files here end in .j2 (they are templates)
|
||||||
|
# So when adding or modifying them change the .j2 version in git.
|
||||||
|
#
|
||||||
|
- name: install nrpe bugyou fedmsg hubs check config
|
||||||
|
template: src=check_fedmsg_hub_procs_bugyou.cfg.j2 dest=/etc/nrpe.d/check_fedmsg_hub_procs_bugyou.cfg
|
||||||
|
when: inventory_hostname.startswith('bugyou01')
|
||||||
|
notify:
|
||||||
|
- restart nrpe
|
||||||
|
tags:
|
||||||
|
- nagios_client
|
||||||
|
|
||||||
|
#
|
||||||
|
# The actual items files here end in .j2 (they are templates)
|
||||||
|
# So when adding or modifying them change the .j2 version in git.
|
||||||
|
#
|
||||||
|
- name: install nrpe openvpn check config
|
||||||
|
template: src=check_openvpn_link.cfg.j2 dest=/etc/nrpe.d/check_openvpn_link.cfg
|
||||||
|
when: datacenter != 'phx2'
|
||||||
|
notify:
|
||||||
|
- restart nrpe
|
||||||
|
tags:
|
||||||
|
- nagios_client
|
||||||
|
#
|
||||||
|
# The actual items files here end in .j2 (they are templates)
|
||||||
|
# So when adding or modifying them change the .j2 version in git.
|
||||||
|
#
|
||||||
|
- name: install nrpe unbound check config
|
||||||
|
template: src=check_unbound_proc.cfg.j2 dest=/etc/nrpe.d/check_unbound_proc.cfg
|
||||||
|
when: inventory_hostname.startswith('unbound')
|
||||||
|
notify:
|
||||||
|
- restart nrpe
|
||||||
|
tags:
|
||||||
|
- nagios_client
|
||||||
|
#
|
||||||
|
# The actual items files here end in .j2 (they are templates)
|
||||||
|
# So when adding or modifying them change the .j2 version in git.
|
||||||
|
#
|
||||||
|
- name: install nrpe merged log check script on log01
|
||||||
|
template: src=check_merged_file_age.cfg.j2 dest=/etc/nrpe.d/check_merged_file_age.cfg
|
||||||
|
when: inventory_hostname.startswith('log0')
|
||||||
|
notify:
|
||||||
|
- restart nrpe
|
||||||
|
tags:
|
||||||
|
- nagios_client
|
||||||
|
#
|
||||||
|
# The actual items files here end in .j2 (they are templates)
|
||||||
|
# So when adding or modifying them change the .j2 version in git.
|
||||||
|
#
|
||||||
|
- name: install nrpe check_mysql config for mariadb servers
|
||||||
|
template: src=check_mysql.cfg.j2 dest=/etc/nrpe.d/check_mysql.cfg
|
||||||
|
when: inventory_hostname.startswith('db03')
|
||||||
|
notify:
|
||||||
|
- restart nrpe
|
||||||
|
tags:
|
||||||
|
- nagios_client
|
||||||
|
|
||||||
|
- name: install nrpe checks for proxies
|
||||||
|
template: src={{ item }}.j2 dest=/etc/nrpe.d/{{ item }}
|
||||||
|
with_items:
|
||||||
|
- check_happroxy_conns.cfg
|
||||||
|
- check_happroxy_mirrorlist.cfg
|
||||||
|
- check_varnish_proc.cfg
|
||||||
|
when: inventory_hostname.startswith('proxy')
|
||||||
|
notify:
|
||||||
|
- restart nrpe
|
||||||
|
tags:
|
||||||
|
- nagios_client
|
||||||
|
|
||||||
|
- name: nrpe service start
|
||||||
|
service: name=nrpe state=running enabled=true
|
||||||
|
tags:
|
||||||
|
- service
|
||||||
|
- nagios_client
|
||||||
|
|
||||||
|
- name: Check if the fedmsg group exists
|
||||||
|
shell: /usr/bin/getent group fedmsg | /usr/bin/wc -l | tr -d ' '
|
||||||
|
register: fedmsg_exists
|
||||||
|
check_mode: no
|
||||||
|
changed_when: "1 != 1"
|
||||||
|
tags:
|
||||||
|
- nagios_client
|
||||||
|
|
||||||
|
- name: Add nrpe user to the fedmsg group if it exists
|
||||||
|
user: name=nrpe groups=fedmsg append=yes
|
||||||
|
when: fedmsg_exists.stdout == "1"
|
||||||
|
tags:
|
||||||
|
- nagios_client
|
|
@ -0,0 +1 @@
|
||||||
|
command[check_autocloud_proc]=/usr/lib64/nagios/plugins/check_procs -c 1:1 -C 'python' -a 'autocloud_job.py' -u root
|
4
roles/nagios_client/templates/check_basset.cfg.j2
Normal file
4
roles/nagios_client/templates/check_basset.cfg.j2
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
command[check_mongo_proc]={{ libdir }}/nagios/plugins/check_procs -s RSD -u mongodb -C mongod -c 1:1
|
||||||
|
command[check_rabbitmq_proc]={{ libdir }}/nagios/plugins/check_procs -s RSD -u rabbitmq -C beam.smp -c 1:1
|
||||||
|
command[check_worker_proc]={{ libdir }}/nagios/plugins/check_procs -s RSD -u basset-worker -C basset-worker -c 1:6
|
||||||
|
command[check_basset_queue]={{ libdir }}/nagios/plugins/check_rabbitmq_size check_submission 10 20
|
1
roles/nagios_client/templates/check_cron.cfg.j2
Normal file
1
roles/nagios_client/templates/check_cron.cfg.j2
Normal file
|
@ -0,0 +1 @@
|
||||||
|
command[check_cron]={{ libdir }}/nagios/plugins/check_procs -c 1:15 -C 'crond' -u root
|
|
@ -0,0 +1,50 @@
|
||||||
|
# Checks on the datanommer history to make sure we're still receiving messages
|
||||||
|
# of all types.
|
||||||
|
#
|
||||||
|
# The following are fedmsg/datanommer checks to be run on busgateway01.
|
||||||
|
# They check for the time since the latest message in any particular category.
|
||||||
|
# The first number is the seconds elapsed until we should raise a warning.
|
||||||
|
# The second number is the seconds elapsed until we should raise an error.
|
||||||
|
# For your reference:
|
||||||
|
# 4 hours -> 14400
|
||||||
|
# 1 day -> 86400
|
||||||
|
# 3 days -> 259200
|
||||||
|
# 1 week -> 604800
|
||||||
|
# 3 weeks -> 1814400
|
||||||
|
# 1 month -> 2628000
|
||||||
|
# 3 months -> 7884000
|
||||||
|
command[check_datanommer_buildsys]={{libdir}}/nagios/plugins/check_datanommer_timesince.py buildsys 14400 86400
|
||||||
|
command[check_datanommer_git]={{libdir}}/nagios/plugins/check_datanommer_timesince.py git 86400 604800
|
||||||
|
command[check_datanommer_bodhi]={{libdir}}/nagios/plugins/check_datanommer_timesince.py bodhi 86400 604800
|
||||||
|
command[check_datanommer_wiki]={{libdir}}/nagios/plugins/check_datanommer_timesince.py wiki 259200 1814400
|
||||||
|
command[check_datanommer_compose]={{libdir}}/nagios/plugins/check_datanommer_timesince.py compose 259200 1814400
|
||||||
|
command[check_datanommer_meetbot]={{libdir}}/nagios/plugins/check_datanommer_timesince.py meetbot 604800 2628000
|
||||||
|
command[check_datanommer_fas]={{libdir}}/nagios/plugins/check_datanommer_timesince.py fas 1814400 2628000
|
||||||
|
command[check_datanommer_pkgdb]={{libdir}}/nagios/plugins/check_datanommer_timesince.py pkgdb 1814400 2628000
|
||||||
|
command[check_datanommer_fedoratagger]={{libdir}}/nagios/plugins/check_datanommer_timesince.py fedoratagger 2628000 7884000
|
||||||
|
command[check_datanommer_planet]={{libdir}}/nagios/plugins/check_datanommer_timesince.py planet 2628000 7884000
|
||||||
|
command[check_datanommer_copr]={{libdir}}/nagios/plugins/check_datanommer_timesince.py copr 21600 86400
|
||||||
|
command[check_datanommer_trac]={{libdir}}/nagios/plugins/check_datanommer_timesince.py trac 86400 259200
|
||||||
|
command[check_datanommer_askbot]={{libdir}}/nagios/plugins/check_datanommer_timesince.py askbot 86400 259200
|
||||||
|
command[check_datanommer_fedbadges]={{libdir}}/nagios/plugins/check_datanommer_timesince.py fedbadges 86400 259200
|
||||||
|
command[check_datanommer_fedocal]={{libdir}}/nagios/plugins/check_datanommer_timesince.py fedocal 7884000 23652000
|
||||||
|
command[check_datanommer_ansible]={{libdir}}/nagios/plugins/check_datanommer_timesince.py ansible 432000 604800
|
||||||
|
command[check_datanommer_summershum]={{libdir}}/nagios/plugins/check_datanommer_timesince.py summershum 604800 1814400
|
||||||
|
command[check_datanommer_jenkins]={{libdir}}/nagios/plugins/check_datanommer_timesince.py jenkins 432000 604800
|
||||||
|
command[check_datanommer_github]={{libdir}}/nagios/plugins/check_datanommer_timesince.py github 432000 604800
|
||||||
|
command[check_datanommer_kerneltest]={{libdir}}/nagios/plugins/check_datanommer_timesince.py kerneltest 604800 1814400
|
||||||
|
command[check_datanommer_fmn]={{libdir}}/nagios/plugins/check_datanommer_timesince.py fmn 604800 1814400
|
||||||
|
command[check_datanommer_anitya]={{libdir}}/nagios/plugins/check_datanommer_timesince.py anitya 604800 1814400
|
||||||
|
command[check_datanommer_fedimg]={{libdir}}/nagios/plugins/check_datanommer_timesince.py fedimg 259200 604800
|
||||||
|
command[check_datanommer_hotness]={{libdir}}/nagios/plugins/check_datanommer_timesince.py hotness 604800 1814400
|
||||||
|
command[check_datanommer_faf]={{libdir}}/nagios/plugins/check_datanommer_timesince.py faf 86400 259200
|
||||||
|
command[check_datanommer_koschei]={{libdir}}/nagios/plugins/check_datanommer_timesince.py koschei 86400 604800
|
||||||
|
command[check_datanommer_autocloud]={{libdir}}/nagios/plugins/check_datanommer_timesince.py autocloud 259200 1814400
|
||||||
|
command[check_datanommer_twoweekatomic]=/usr/lib64/nagios/plugins/check_datanommer_timesince.py org.fedoraproject.prod.releng.atomic.twoweek.complete 1296000 1382400
|
||||||
|
|
||||||
|
# This one is retired since it times out all the time. Too few messages.
|
||||||
|
#command[check_datanommer_nuancier]={{libdir}}/nagios/plugins/check_datanommer_timesince.py nuancier 23652000 31536000
|
||||||
|
|
||||||
|
# These are not actually finished and deployed yet
|
||||||
|
command[check_datanommer_mailman]={{libdir}}/nagios/plugins/check_datanommer_timesince.py mailman 14400 86400
|
||||||
|
command[check_datanommer_bugzilla]={{libdir}}/nagios/plugins/check_datanommer_timesince.py bugzilla 86400 259200
|
7
roles/nagios_client/templates/check_disk.cfg.j2
Normal file
7
roles/nagios_client/templates/check_disk.cfg.j2
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
command[check_disk_/]={{ libdir }}/nagios/plugins/check_disk -w 14% -c 10% -p /
|
||||||
|
command[check_disk_/boot]={{ libdir }}/nagios/plugins/check_disk -w 15% -c 10% -p /boot
|
||||||
|
command[check_disk_/srv/cache/lookaside]={{ libdir }}/nagios/plugins/check_disk -w 20% -c 10% -p /srv/cache/lookaside
|
||||||
|
command[check_disk_/srv]=/usr/lib64/nagios/plugins/check_disk -w 20% -c 10% -p /srv
|
||||||
|
command[check_disk_/srv/buildmaster]=/usr/lib64/nagios/plugins/check_disk -w 20% -c 10% -p /srv/buildmaster
|
||||||
|
command[check_disk_/srv/taskotron]=/usr/lib64/nagios/plugins/check_disk -w 20% -c 10% -p /srv/taskotron
|
||||||
|
command[check_disk_/var/log]=/usr/lib64/nagios/plugins/check_disk -w 20% -c 15% -p /var/log
|
63
roles/nagios_client/templates/check_fedmsg_consumers.cfg.j2
Normal file
63
roles/nagios_client/templates/check_fedmsg_consumers.cfg.j2
Normal file
|
@ -0,0 +1,63 @@
|
||||||
|
# Fedmsg checks for consumers and producers
|
||||||
|
command[check_fedmsg_cp_busgateway_hub]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-hub Nommer MonitoringProducer
|
||||||
|
command[check_fedmsg_cp_busgateway_relay]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-relay RelayConsumer MonitoringProducer
|
||||||
|
command[check_fedmsg_cp_busgateway_gateway]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-gateway GatewayConsumer MonitoringProducer
|
||||||
|
command[check_fedmsg_cp_anitya_relay]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-relay RelayConsumer MonitoringProducer
|
||||||
|
command[check_fedmsg_cp_app]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-relay RelayConsumer MonitoringProducer
|
||||||
|
command[check_fedmsg_cp_value]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-irc IRCBotConsumer MonitoringProducer
|
||||||
|
command[check_fedmsg_cp_pkgs]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-hub GenACLsConsumer MonitoringProducer
|
||||||
|
command[check_fedmsg_cp_summershum]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-hub SummerShumConsumer MonitoringProducer
|
||||||
|
command[check_fedmsg_cp_badges_backend]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-hub FedoraBadgesConsumer MonitoringProducer
|
||||||
|
command[check_fedmsg_cp_notifs_backend]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-hub FMNConsumer DigestProducer ConfirmationProducer MonitoringProducer
|
||||||
|
command[check_fedmsg_cp_bugzilla2fedmsg]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py moksha-hub BugzillaConsumer MonitoringProducer
|
||||||
|
command[check_fedmsg_cp_fedimg_backend]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-hub FedimgConsumer MonitoringProducer
|
||||||
|
command[check_fedmsg_cp_hotness_backend]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-hub BugzillaTicketFiler MonitoringProducer
|
||||||
|
command[check_fedmsg_cp_bodhi_backend01_hub]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-hub Masher MonitoringProducer
|
||||||
|
command[check_fedmsg_cp_bodhi_backend02_hub]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-hub UpdatesHandler MonitoringProducer
|
||||||
|
command[check_fedmsg_cp_autocloud_backend]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-hub AutoCloudConsumer MonitoringProducer
|
||||||
|
command[check_fedmsg_cp_packages_backend]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-hub CacheInvalidator MonitoringProducer
|
||||||
|
command[check_fedmsg_cp_bugyou_backend]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-hub BugyouConsumer MonitoringProducer
|
||||||
|
command[check_fedmsg_cp_pdc_backend]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-hub PDCUpdater MonitoringProducer
|
||||||
|
|
||||||
|
command[check_fedmsg_cexceptions_busgateway_hub]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-hub Nommer 1 10
|
||||||
|
command[check_fedmsg_cexceptions_busgateway_relay]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-relay RelayConsumer 1 10
|
||||||
|
command[check_fedmsg_cexceptions_busgateway_gateway]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-gateway GatewayConsumer 1 10
|
||||||
|
command[check_fedmsg_cexceptions_anitya_relay]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-relay RelayConsumer 1 10
|
||||||
|
command[check_fedmsg_cexceptions_app]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-relay RelayConsumer 1 10
|
||||||
|
command[check_fedmsg_cexceptions_value]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-irc IRCBotConsumer 1 10
|
||||||
|
command[check_fedmsg_cexceptions_pkgs]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-hub GenACLsConsumer 1 10
|
||||||
|
command[check_fedmsg_cexceptions_summershum]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-hub SummerShumConsumer 1 10
|
||||||
|
command[check_fedmsg_cexceptions_badges_backend]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-hub FedoraBadgesConsumer 1 10
|
||||||
|
command[check_fedmsg_cexceptions_notifs_backend]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-hub FMNConsumer 1 10
|
||||||
|
command[check_fedmsg_cexceptions_bugzilla2fedmsg]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py moksha-hub BugzillaConsumer 1 10
|
||||||
|
command[check_fedmsg_cexceptions_fedimg_backend]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-hub FedimgConsumer 1 10
|
||||||
|
command[check_fedmsg_cexceptions_hotness_backend]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-hub BugzillaTicketFiler 1 10
|
||||||
|
command[check_fedmsg_cexceptions_bodhi_backend01_hub]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-hub Masher 1 10
|
||||||
|
command[check_fedmsg_cexceptions_bodhi_backend02_hub]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-hub UpdatesHandler 1 10
|
||||||
|
command[check_fedmsg_cexceptions_autocloud_backend]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-hub AutoCloudConsumer 1 10
|
||||||
|
command[check_fedmsg_cexceptions_packages_backend]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-hub CacheInvalidator 1 10
|
||||||
|
command[check_fedmsg_cexceptions_bugyou_backend]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-hub BugyouConsumer 1 10
|
||||||
|
command[check_fedmsg_cexceptions_pdc_backend]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-hub PDCUpdater 1 10
|
||||||
|
|
||||||
|
command[check_fedmsg_cbacklog_busgateway_hub]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-hub Nommer 500 1000
|
||||||
|
command[check_fedmsg_cbacklog_busgateway_relay]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-relay RelayConsumer 10 50
|
||||||
|
command[check_fedmsg_cbacklog_busgateway_gateway]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-gateway GatewayConsumer 10 50
|
||||||
|
command[check_fedmsg_cbacklog_anitya_relay]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-relay RelayConsumer 10 50
|
||||||
|
command[check_fedmsg_cbacklog_app]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-relay RelayConsumer 10 50
|
||||||
|
command[check_fedmsg_cbacklog_value]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-irc IRCBotConsumer 10 50
|
||||||
|
command[check_fedmsg_cbacklog_pkgs]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-hub GenACLsConsumer 10 50
|
||||||
|
command[check_fedmsg_cbacklog_summershum]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-hub SummerShumConsumer 100 500
|
||||||
|
command[check_fedmsg_cbacklog_badges_backend]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-hub FedoraBadgesConsumer 7000 10000
|
||||||
|
command[check_fedmsg_cbacklog_notifs_backend]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-hub FMNConsumer 15000 20000
|
||||||
|
command[check_fedmsg_cbacklog_bugzilla2fedmsg]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py moksha-hub BugzillaConsumer 10 100
|
||||||
|
command[check_fedmsg_cbacklog_fedimg_backend]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-hub FedimgConsumer 2000 5000
|
||||||
|
command[check_fedmsg_cbacklog_hotness_backend]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-hub BugzillaTicketFiler 1000 5000
|
||||||
|
command[check_fedmsg_cbacklog_bodhi_backend01_hub]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-hub Masher 500 1000
|
||||||
|
command[check_fedmsg_cbacklog_bodhi_backend02_hub]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-hub UpdatesHandler 500 1000
|
||||||
|
command[check_fedmsg_cbacklog_autocloud_backend]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-hub AutoCloudConsumer 100 500
|
||||||
|
command[check_fedmsg_cbacklog_packages_backend]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-hub CacheInvalidator 20000 30000
|
||||||
|
command[check_fedmsg_cbacklog_bugyou_backend]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-hub BugyouConsumer 5000 10000
|
||||||
|
command[check_fedmsg_cbacklog_pdc_backend]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-hub PDCUpdater 10000 20000
|
||||||
|
|
||||||
|
command[check_fedmsg_fmn_digest_last_ran]={{libdir}}/nagios/plugins/check_fedmsg_producer_last_ran.py fedmsg-hub DigestProducer 90 600
|
||||||
|
command[check_fedmsg_fmn_confirm_last_ran]={{libdir}}/nagios/plugins/check_fedmsg_producer_last_ran.py fedmsg-hub ConfirmationProducer 90 600
|
|
@ -0,0 +1 @@
|
||||||
|
command[check_fedmsg_gateway_proc]={{ libdir }}/nagios/plugins/check_procs -c 1:1 -C 'fedmsg-gateway' -u fedmsg
|
|
@ -0,0 +1 @@
|
||||||
|
command[check_fedmsg_hub_proc]={{ libdir }}/nagios/plugins/check_procs -c 1:1 -C 'fedmsg-hub' -u fedmsg
|
|
@ -0,0 +1 @@
|
||||||
|
command[check_fedmsg_hub_procs_bugyou]={{ libdir }}/nagios/plugins/check_procs -c 3:3 -C 'fedmsg-hub' -u fedmsg
|
|
@ -0,0 +1 @@
|
||||||
|
command[check_fedmsg_irc_proc]={{ libdir }}/nagios/plugins/check_procs -c 1:1 -C 'fedmsg-irc' -u fedmsg
|
|
@ -0,0 +1 @@
|
||||||
|
command[check_fedmsg_masher_proc]={{ libdir }}/nagios/plugins/check_procs -c 1:1 -C 'fedmsg-hub' -u apache
|
|
@ -0,0 +1 @@
|
||||||
|
command[check_fedmsg_relay_proc]={{ libdir }}/nagios/plugins/check_procs -c 1:1 -C 'fedmsg-relay' -u fedmsg
|
2
roles/nagios_client/templates/check_fmn.cfg.j2
Normal file
2
roles/nagios_client/templates/check_fmn.cfg.j2
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
command[check_fmn_worker_queue]={{ libdir }}/nagios/plugins/check_rabbitmq_size workers 200 1000
|
||||||
|
command[check_fmn_backend_queue]={{ libdir }}/nagios/plugins/check_rabbitmq_size backends 100 200
|
|
@ -0,0 +1 @@
|
||||||
|
command[check_haproxy_conns]=/usr/lib64/nagios/plugins/check_haproxy_conns.py
|
|
@ -0,0 +1 @@
|
||||||
|
command[check_haproxy_mirrorlist]=/usr/lib64/nagios/plugins/check_haproxy_mirrorlist.py
|
1
roles/nagios_client/templates/check_ipa.cfg.j2
Normal file
1
roles/nagios_client/templates/check_ipa.cfg.j2
Normal file
|
@ -0,0 +1 @@
|
||||||
|
command[check_ipa_replication]={{ libdir }}/nagios/plugins/check_ipa_replication -u ldaps://localhost/
|
|
@ -0,0 +1 @@
|
||||||
|
command[check_koschei_polling_proc]={{ libdir }}/nagios/plugins/check_procs -s RSD -u koschei -C koschei-polling -c 1:1
|
|
@ -0,0 +1 @@
|
||||||
|
command[check_koschei_resolver_proc]={{ libdir }}/nagios/plugins/check_procs -s RSD -u koschei -C koschei-resolve -c 1:1
|
|
@ -0,0 +1 @@
|
||||||
|
command[check_koschei_scheduler_proc]={{ libdir }}/nagios/plugins/check_procs -s RSD -u koschei -C koschei-schedul -c 1:1
|
|
@ -0,0 +1 @@
|
||||||
|
command[check_koschei_watcher_proc]={{ libdir }}/nagios/plugins/check_procs -s RSD -u koschei -C koschei-watcher -c 1:1
|
1
roles/nagios_client/templates/check_lock.cfg.j2
Normal file
1
roles/nagios_client/templates/check_lock.cfg.j2
Normal file
|
@ -0,0 +1 @@
|
||||||
|
command[check_lock]={{ libdir }}/nagios/plugins/check_lock
|
1
roles/nagios_client/templates/check_lock_file_age.cfg.j2
Normal file
1
roles/nagios_client/templates/check_lock_file_age.cfg.j2
Normal file
|
@ -0,0 +1 @@
|
||||||
|
command[check_lock_file_age]={{ libdir }}/nagios/plugins/check_lock_file_age -w 1 -c 5 -f /var/lock/fedora-ca/lock
|
2
roles/nagios_client/templates/check_memcache.cfg.j2
Normal file
2
roles/nagios_client/templates/check_memcache.cfg.j2
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
command[check_memcache]=/usr/lib64/nagios/plugins/check_procs -c 1:1 -a '/usr/bin/memcached' -u memcached
|
||||||
|
command[check_memcache_connect]=/usr/lib64/nagios/plugins/check_memcache_connect
|
|
@ -0,0 +1 @@
|
||||||
|
command[check_merged_file_age]=/usr/lib64/nagios/plugins/check_file_age -w 120 -c 300 /var/log/merged/messages.log
|
|
@ -0,0 +1 @@
|
||||||
|
command[check_mirrorlist_cache]={{ libdir }}/nagios/plugins/check_file_age -w 14400 -c 129600 -f /var/lib/mirrormanager/mirrorlist_cache.pkl
|
1
roles/nagios_client/templates/check_mysql.cfg.j2
Normal file
1
roles/nagios_client/templates/check_mysql.cfg.j2
Normal file
|
@ -0,0 +1 @@
|
||||||
|
command[check_mysql_backup]={{ libdir }}/nagios/plugins/check_file_age -w 86400 -c 129600 -f /backups/fpo-mediawiki-latest.xz
|
1
roles/nagios_client/templates/check_openvpn_link.cfg.j2
Normal file
1
roles/nagios_client/templates/check_openvpn_link.cfg.j2
Normal file
|
@ -0,0 +1 @@
|
||||||
|
command[check_openvpn_link]={{ libdir }}/nagios/plugins/check_ping -H 192.168.1.41 -w 375.0,20% -c 500,60%
|
2
roles/nagios_client/templates/check_osbs.cfg.j2
Normal file
2
roles/nagios_client/templates/check_osbs.cfg.j2
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
command[check_osbs_builds]={{ libdir }}/nagios/plugins/check_osbs_builds.py
|
||||||
|
command[check_osbs_api]={{ libdir }}/nagios/plugins/check_osbs_api.py
|
1
roles/nagios_client/templates/check_postfix_queue.cfg.j2
Normal file
1
roles/nagios_client/templates/check_postfix_queue.cfg.j2
Normal file
|
@ -0,0 +1 @@
|
||||||
|
command[check_postfix_queue]={{ libdir }}/nagios/plugins/check_postfix_queue -w {{ nrpe_check_postfix_queue_warn }} -c {{ nrpe_check_postfix_queue_crit }}
|
1
roles/nagios_client/templates/check_raid.cfg.j2
Normal file
1
roles/nagios_client/templates/check_raid.cfg.j2
Normal file
|
@ -0,0 +1 @@
|
||||||
|
command[check_raid]={{ libdir }}/nagios/plugins/check_raid.py
|
1
roles/nagios_client/templates/check_readonly_fs.cfg.j2
Normal file
1
roles/nagios_client/templates/check_readonly_fs.cfg.j2
Normal file
|
@ -0,0 +1 @@
|
||||||
|
command[check_readonly_fs]=/usr/lib64/nagios/plugins/check_readonly_fs
|
1
roles/nagios_client/templates/check_redis_proc.cfg.j2
Normal file
1
roles/nagios_client/templates/check_redis_proc.cfg.j2
Normal file
|
@ -0,0 +1 @@
|
||||||
|
command[check_redis_proc]=/usr/lib64/nagios/plugins/check_procs -c 1:1 -C 'redis-server' -u redis
|
|
@ -0,0 +1 @@
|
||||||
|
command[check_supybot_fedmsg_plugin]={{libdir}}/nagios/plugins/check_supybot_plugin -t fedmsg
|
1
roles/nagios_client/templates/check_swap.cfg.j2
Normal file
1
roles/nagios_client/templates/check_swap.cfg.j2
Normal file
|
@ -0,0 +1 @@
|
||||||
|
command[check_swap]={{ libdir }}/nagios/plugins/check_swap -w 15% -c 10%
|
1
roles/nagios_client/templates/check_testcloud.cfg.j2
Normal file
1
roles/nagios_client/templates/check_testcloud.cfg.j2
Normal file
|
@ -0,0 +1 @@
|
||||||
|
command[check_testcloud]={{ libdir }}/nagios/plugins/check_testcloud
|
1
roles/nagios_client/templates/check_unbound_proc.cfg.j2
Normal file
1
roles/nagios_client/templates/check_unbound_proc.cfg.j2
Normal file
|
@ -0,0 +1 @@
|
||||||
|
command[check_unbound_proc]={{ libdir }}/nagios/plugins/check_procs -c 1:1 -C 'unbound' -u unbound
|
1
roles/nagios_client/templates/check_varnish_proc.cfg.j2
Normal file
1
roles/nagios_client/templates/check_varnish_proc.cfg.j2
Normal file
|
@ -0,0 +1 @@
|
||||||
|
command[check_varnish_proc]=/usr/lib64/nagios/plugins/check_procs -c 1:2 -C 'varnishd' -u varnish
|
228
roles/nagios_client/templates/nrpe.cfg.j2
Normal file
228
roles/nagios_client/templates/nrpe.cfg.j2
Normal file
|
@ -0,0 +1,228 @@
|
||||||
|
#############################################################################
|
||||||
|
# Sample NRPE Config File
|
||||||
|
# Written by: Ethan Galstad (nagios@nagios.org)
|
||||||
|
#
|
||||||
|
# Last Modified: 11-23-2007
|
||||||
|
#
|
||||||
|
# NOTES:
|
||||||
|
# This is a sample configuration file for the NRPE daemon. It needs to be
|
||||||
|
# located on the remote host that is running the NRPE daemon, not the host
|
||||||
|
# from which the check_nrpe client is being executed.
|
||||||
|
#############################################################################
|
||||||
|
|
||||||
|
|
||||||
|
# LOG FACILITY
|
||||||
|
# The syslog facility that should be used for logging purposes.
|
||||||
|
|
||||||
|
log_facility=daemon
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# PID FILE
|
||||||
|
# The name of the file in which the NRPE daemon should write it's process ID
|
||||||
|
# number. The file is only written if the NRPE daemon is started by the root
|
||||||
|
# user and is running in standalone mode.
|
||||||
|
|
||||||
|
pid_file=/var/run/nrpe/nrpe.pid
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# PORT NUMBER
|
||||||
|
# Port number we should wait for connections on.
|
||||||
|
# NOTE: This must be a non-priviledged port (i.e. > 1024).
|
||||||
|
# NOTE: This option is ignored if NRPE is running under either inetd or xinetd
|
||||||
|
|
||||||
|
server_port=5666
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# SERVER ADDRESS
|
||||||
|
# Address that nrpe should bind to in case there are more than one interface
|
||||||
|
# and you do not want nrpe to bind on all interfaces.
|
||||||
|
# NOTE: This option is ignored if NRPE is running under either inetd or xinetd
|
||||||
|
|
||||||
|
#server_address=127.0.0.1
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# NRPE USER
|
||||||
|
# This determines the effective user that the NRPE daemon should run as.
|
||||||
|
# You can either supply a username or a UID.
|
||||||
|
#
|
||||||
|
# NOTE: This option is ignored if NRPE is running under either inetd or xinetd
|
||||||
|
|
||||||
|
nrpe_user=nrpe
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# NRPE GROUP
|
||||||
|
# This determines the effective group that the NRPE daemon should run as.
|
||||||
|
# You can either supply a group name or a GID.
|
||||||
|
#
|
||||||
|
# NOTE: This option is ignored if NRPE is running under either inetd or xinetd
|
||||||
|
|
||||||
|
nrpe_group=nrpe
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# ALLOWED HOST ADDRESSES
|
||||||
|
# This is an optional comma-delimited list of IP address or hostnames
|
||||||
|
# that are allowed to talk to the NRPE daemon. Network addresses with a bit mask
|
||||||
|
# (i.e. 192.168.1.0/24) are also supported. Hostname wildcards are not currently
|
||||||
|
# supported.
|
||||||
|
#
|
||||||
|
# Note: The daemon only does rudimentary checking of the client's IP
|
||||||
|
# address. I would highly recommend adding entries in your /etc/hosts.allow
|
||||||
|
# file to allow only the specified host to connect to the port
|
||||||
|
# you are running this daemon on.
|
||||||
|
#
|
||||||
|
# NOTE: This option is ignored if NRPE is running under either inetd or xinetd
|
||||||
|
|
||||||
|
|
||||||
|
allowed_hosts=10.5.126.41,192.168.1.10,192.168.1.20,209.132.181.35
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# COMMAND ARGUMENT PROCESSING
|
||||||
|
# This option determines whether or not the NRPE daemon will allow clients
|
||||||
|
# to specify arguments to commands that are executed. This option only works
|
||||||
|
# if the daemon was configured with the --enable-command-args configure script
|
||||||
|
# option.
|
||||||
|
#
|
||||||
|
# *** ENABLING THIS OPTION IS A SECURITY RISK! ***
|
||||||
|
# Read the SECURITY file for information on some of the security implications
|
||||||
|
# of enabling this variable.
|
||||||
|
#
|
||||||
|
# Values: 0=do not allow arguments, 1=allow command arguments
|
||||||
|
|
||||||
|
dont_blame_nrpe=0
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# COMMAND PREFIX
|
||||||
|
# This option allows you to prefix all commands with a user-defined string.
|
||||||
|
# A space is automatically added between the specified prefix string and the
|
||||||
|
# command line from the command definition.
|
||||||
|
#
|
||||||
|
# *** THIS EXAMPLE MAY POSE A POTENTIAL SECURITY RISK, SO USE WITH CAUTION! ***
|
||||||
|
# Usage scenario:
|
||||||
|
# Execute restricted commmands using sudo. For this to work, you need to add
|
||||||
|
# the nagios user to your /etc/sudoers. An example entry for alllowing
|
||||||
|
# execution of the plugins from might be:
|
||||||
|
#
|
||||||
|
# nagios ALL=(ALL) NOPASSWD: /usr/lib/nagios/plugins/
|
||||||
|
#
|
||||||
|
# This lets the nagios user run all commands in that directory (and only them)
|
||||||
|
# without asking for a password. If you do this, make sure you don't give
|
||||||
|
# random users write access to that directory or its contents!
|
||||||
|
|
||||||
|
# command_prefix=/usr/bin/sudo
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# DEBUGGING OPTION
|
||||||
|
# This option determines whether or not debugging messages are logged to the
|
||||||
|
# syslog facility.
|
||||||
|
# Values: 0=debugging off, 1=debugging on
|
||||||
|
|
||||||
|
debug=0
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# COMMAND TIMEOUT
|
||||||
|
# This specifies the maximum number of seconds that the NRPE daemon will
|
||||||
|
# allow plugins to finish executing before killing them off.
|
||||||
|
|
||||||
|
command_timeout=100
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# CONNECTION TIMEOUT
|
||||||
|
# This specifies the maximum number of seconds that the NRPE daemon will
|
||||||
|
# wait for a connection to be established before exiting. This is sometimes
|
||||||
|
# seen where a network problem stops the SSL being established even though
|
||||||
|
# all network sessions are connected. This causes the nrpe daemons to
|
||||||
|
# accumulate, eating system resources. Do not set this too low.
|
||||||
|
|
||||||
|
connection_timeout=300
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# WEEK RANDOM SEED OPTION
|
||||||
|
# This directive allows you to use SSL even if your system does not have
|
||||||
|
# a /dev/random or /dev/urandom (on purpose or because the necessary patches
|
||||||
|
# were not applied). The random number generator will be seeded from a file
|
||||||
|
# which is either a file pointed to by the environment valiable $RANDFILE
|
||||||
|
# or $HOME/.rnd. If neither exists, the pseudo random number generator will
|
||||||
|
# be initialized and a warning will be issued.
|
||||||
|
# Values: 0=only seed from /dev/[u]random, 1=also seed from weak randomness
|
||||||
|
|
||||||
|
#allow_weak_random_seed=1
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# INCLUDE CONFIG FILE
|
||||||
|
# This directive allows you to include definitions from an external config file.
|
||||||
|
|
||||||
|
#include=<somefile.cfg>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# INCLUDE CONFIG DIRECTORY
|
||||||
|
# This directive allows you to include definitions from config files (with a
|
||||||
|
# .cfg extension) in one or more directories (with recursion).
|
||||||
|
|
||||||
|
include_dir=/etc/nrpe.d/
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# COMMAND DEFINITIONS
|
||||||
|
# Command definitions that this daemon will run. Definitions
|
||||||
|
# are in the following format:
|
||||||
|
#
|
||||||
|
# command[<command_name>]=<command_line>
|
||||||
|
#
|
||||||
|
# When the daemon receives a request to return the results of <command_name>
|
||||||
|
# it will execute the command specified by the <command_line> argument.
|
||||||
|
#
|
||||||
|
# Unlike Nagios, the command line cannot contain macros - it must be
|
||||||
|
# typed exactly as it should be executed.
|
||||||
|
#
|
||||||
|
# Note: Any plugins that are used in the command lines must reside
|
||||||
|
# on the machine that this daemon is running on! The examples below
|
||||||
|
# assume that you have plugins installed in a /usr/local/nagios/libexec
|
||||||
|
# directory. Also note that you will have to modify the definitions below
|
||||||
|
# to match the argument format the plugins expect. Remember, these are
|
||||||
|
# examples only!
|
||||||
|
|
||||||
|
|
||||||
|
# The following examples use hardcoded command arguments...
|
||||||
|
|
||||||
|
command[check_users]={{ libdir }}/nagios/plugins/check_users -w 5 -c 10
|
||||||
|
command[check_load]={{ libdir }}/nagios/plugins/check_load -w 15,10,5 -c 30,25,20
|
||||||
|
command[check_hda1]={{ libdir }}/nagios/plugins/check_disk -w 20% -c 10% -p /dev/hda1
|
||||||
|
{% if inventory_hostname not in groups['zombie-infested'] %}
|
||||||
|
command[check_zombie_procs]={{ libdir }}/nagios/plugins/check_procs -w 5 -c 10 -s Z
|
||||||
|
{% else %}
|
||||||
|
# This host is prone to Zombies and we do not care or want to alert on it so we make the limits very high
|
||||||
|
command[check_zombie_procs]={{ libdir }}/nagios/plugins/check_procs -w 50000 -c 100000 -s Z
|
||||||
|
{% endif %}
|
||||||
|
command[check_total_procs]={{ libdir }}/nagios/plugins/check_procs -w {{ nrpe_procs_warn }} -c {{ nrpe_procs_crit }}
|
||||||
|
|
||||||
|
|
||||||
|
# The following examples allow user-supplied arguments and can
|
||||||
|
# only be used if the NRPE daemon was compiled with support for
|
||||||
|
# command arguments *AND* the dont_blame_nrpe directive in this
|
||||||
|
# config file is set to '1'. This poses a potential security risk, so
|
||||||
|
# make sure you read the SECURITY file before doing this.
|
||||||
|
|
||||||
|
#command[check_users]=/usr/lib64/nagios/plugins/check_users -w $ARG1$ -c $ARG2$
|
||||||
|
#command[check_load]=/usr/lib64/nagios/plugins/check_load -w $ARG1$ -c $ARG2$
|
||||||
|
#command[check_disk]=/usr/lib64/nagios/plugins/check_disk -w $ARG1$ -c $ARG2$ -p $ARG3$
|
||||||
|
#command[check_procs]=/usr/lib64/nagios/plugins/check_procs -w $ARG1$ -c $ARG2$ -s $ARG3$
|
||||||
|
|
||||||
|
|
||||||
|
# NEVER ADD ANYTHING HERE - ANY ENTRIES TO NRPE SHOULD BE in .cfg files in /etc/nrpe.d/
|
||||||
|
|
||||||
|
# NEVER NEVER NEVER
|
||||||
|
#
|
78
roles/nagios_server/README.rst
Normal file
78
roles/nagios_server/README.rst
Normal file
|
@ -0,0 +1,78 @@
|
||||||
|
===================================
|
||||||
|
Nagios 4 Configuration for Fedora
|
||||||
|
===================================
|
||||||
|
|
||||||
|
The Fedora Infrastructure Nagios is built on a set of configurations
|
||||||
|
originally written for Nagios 2 and then upgraded over time to Nagios
|
||||||
|
3 and then 4.08. With additional changes made in the 4.2 series of
|
||||||
|
Nagios this needed a better rewrite as various parts came from
|
||||||
|
pre-puppet and then various puppet modules added on top.
|
||||||
|
|
||||||
|
In order to get this rewrite done, we will use as much of the original
|
||||||
|
layout of the Fedora ansible nagios module but with rewrites to better
|
||||||
|
match current Nagios configurations so that it can be maintained.
|
||||||
|
|
||||||
|
Role directory layout
|
||||||
|
=====================
|
||||||
|
The original layout branched out from
|
||||||
|
|
||||||
|
roles/nagios/client/
|
||||||
|
roles/nagios/server/
|
||||||
|
|
||||||
|
With the usual trees below this. This breaks ansible best practices
|
||||||
|
and how most new modules are set up so the rewrite uses:
|
||||||
|
|
||||||
|
roles/nagios_client/
|
||||||
|
roles/nagios_server/
|
||||||
|
|
||||||
|
=====================
|
||||||
|
Nagios Server Files
|
||||||
|
=====================
|
||||||
|
|
||||||
|
The Nagios Server Files require a large layout change. The original
|
||||||
|
Nagios system used multiple independant modes and files which caused
|
||||||
|
problems when hosts were removed. The new system will use hosts set up
|
||||||
|
from the Fedora Ansible Inventory with hostgroups set up to match
|
||||||
|
groups.
|
||||||
|
|
||||||
|
roles/nagios_server/{files,handlers,tasks,templates}
|
||||||
|
|
||||||
|
r.../n.../files/httpd ==> /etc/httpd/conf.d files
|
||||||
|
r.../n.../files/nagios ==> /etc/nagios/ files
|
||||||
|
r.../n.../files/nagios/commands command files
|
||||||
|
r.../n.../files/nagios/hosts host files
|
||||||
|
r.../n.../files/nagios/hostgroups groups made from hosts
|
||||||
|
r.../n.../files/nagios/services services
|
||||||
|
r.../n.../files/nagios/servicegroups groups made from services
|
||||||
|
r.../n.../files/nagios/contacts files for people
|
||||||
|
r.../n.../files/nagios/contactgroups groups made from contacts
|
||||||
|
|
||||||
|
similar layout for templates
|
||||||
|
handlers has the ways to restart and check configuration
|
||||||
|
tasks has the main rules for building stuff.
|
||||||
|
|
||||||
|
===================
|
||||||
|
Nagios Module Steps
|
||||||
|
===================
|
||||||
|
|
||||||
|
1. Check to see if the nagios user is configured. Someone years ago
|
||||||
|
chose that our monitoring uses UID/GID 420. Har Har.
|
||||||
|
Setup any other groups and permissions
|
||||||
|
2. Install the needed packages for the server.
|
||||||
|
3. Setup the directories on the server
|
||||||
|
/etc/nagios/{child}
|
||||||
|
4. Synchonise over the static files
|
||||||
|
/etc/nagios/commands/
|
||||||
|
/etc/nagios/services/
|
||||||
|
/etc/nagios/servicegroups/
|
||||||
|
/etc/nagios/contacts/
|
||||||
|
/etc/nagios/contactgroups/
|
||||||
|
/usr/lib64/nagios/plugins/
|
||||||
|
/usr/local/bin
|
||||||
|
/usr/share/nagios/html/
|
||||||
|
5. Build template files
|
||||||
|
/etc/nagios/commands/
|
||||||
|
/etc/nagios/hosts/{ansible-inventory, ansible-vars, other}
|
||||||
|
/etc/nagios/hostgroups/
|
||||||
|
6. Fix selinux policy
|
||||||
|
7. Restart services
|
36
roles/nagios_server/files/httpd/nagios.conf
Normal file
36
roles/nagios_server/files/httpd/nagios.conf
Normal file
|
@ -0,0 +1,36 @@
|
||||||
|
# noc1
|
||||||
|
ScriptAlias /nagios/cgi-bin/ /usr/lib64/nagios/cgi-bin/
|
||||||
|
|
||||||
|
# noc2
|
||||||
|
ScriptAlias /nagios-external/cgi-bin/ /usr/lib64/nagios/cgi-bin/
|
||||||
|
|
||||||
|
# test
|
||||||
|
ScriptAlias /nagios-just-a-test/cgi-bin/ /usr/lib64/nagios/cgi-bin/
|
||||||
|
|
||||||
|
ScriptAlias /tac.cgi /usr/lib64/nagios/cgi-bin/tac.cgi
|
||||||
|
|
||||||
|
<Location />
|
||||||
|
AuthName "Nagios GSSAPI Login"
|
||||||
|
GssapiCredStore keytab:/etc/krb5.HTTP_admin.fedoraproject.org.keytab
|
||||||
|
AuthType GSSAPI
|
||||||
|
# This is off because Apache (and thus mod_auth_gssapi) doesn't know this is proxied over TLS
|
||||||
|
GssapiSSLonly Off
|
||||||
|
GssapiLocalName on
|
||||||
|
Require valid-user
|
||||||
|
</Location>
|
||||||
|
|
||||||
|
<Location ~ "/(nagios|nagios-external|nagios-just-a-test)/cgi-bin/">
|
||||||
|
Options ExecCGI
|
||||||
|
</Location>
|
||||||
|
|
||||||
|
<Directory "/usr/share/nagios/html">
|
||||||
|
Options None
|
||||||
|
</Directory>
|
||||||
|
|
||||||
|
Alias /nagios /usr/share/nagios/html/
|
||||||
|
|
||||||
|
# This will only affect noc2 because the proxies only forward -external to it.
|
||||||
|
Alias /nagios-external /usr/share/nagios/html/
|
||||||
|
|
||||||
|
# Test
|
||||||
|
Alias /nagios-test /usr/share/nagios/html/
|
8
roles/nagios_server/files/nagios/commands/bzr.cfg
Normal file
8
roles/nagios_server/files/nagios/commands/bzr.cfg
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
# 'check_bzr' command definition
|
||||||
|
# I'd like this to actually interact with BZR, but I can't find any
|
||||||
|
# proper documentation on the protocol to craft send/expect/quit
|
||||||
|
# strings.
|
||||||
|
define command{
|
||||||
|
command_name check_bzr
|
||||||
|
command_line $USER1$/check_tcp -H $HOSTADDRESS$ -p 4155
|
||||||
|
}
|
15
roles/nagios_server/files/nagios/commands/disk.cfg
Normal file
15
roles/nagios_server/files/nagios/commands/disk.cfg
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
define command {
|
||||||
|
command_name check_by_ssh_check_raid
|
||||||
|
command_line $USER1$/check_by_ssh -t 30 -H $HOSTADDRESS$ "$USER1$/check_raid.py"
|
||||||
|
}
|
||||||
|
|
||||||
|
define command {
|
||||||
|
command_name check_by_ssh_check_disk
|
||||||
|
command_line $USER1$/check_by_ssh -t 30 -H $HOSTADDRESS$ "$USER1$/check_disk -w $ARG1$% -c $ARG2$% -p $ARG3$"
|
||||||
|
}
|
||||||
|
|
||||||
|
# 'check_postgres_conns' command definition
|
||||||
|
define command{
|
||||||
|
command_name check_postgres_conns
|
||||||
|
command_line $USER1$/check_by_ssh -t 30 -H $HOSTADDRESS$ "$USER1$/check_procs -u postgres -w $ARG1$ -c $ARG2$ -a $ARG3$"
|
||||||
|
}
|
11
roles/nagios_server/files/nagios/commands/dns.cfg
Normal file
11
roles/nagios_server/files/nagios/commands/dns.cfg
Normal file
|
@ -0,0 +1,11 @@
|
||||||
|
# 'check_dns' command definition
|
||||||
|
define command{
|
||||||
|
command_name check_dns
|
||||||
|
command_line $USER1$/check_dns -H www.yahoo.com -s $HOSTADDRESS$
|
||||||
|
}
|
||||||
|
|
||||||
|
# 'check_dns_fpo' command definition
|
||||||
|
define command{
|
||||||
|
command_name check_dns_fpo
|
||||||
|
command_line $USER1$/check_dns -t 30 -H fedoraproject.org -A -s $HOSTADDRESS$
|
||||||
|
}
|
8
roles/nagios_server/files/nagios/commands/git.cfg
Normal file
8
roles/nagios_server/files/nagios/commands/git.cfg
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
# 'check_git' command definition
|
||||||
|
# I'd like this to actually interact with GIT, but I can't find any
|
||||||
|
# proper documentation on the protocol to craft send/expect/quit
|
||||||
|
# strings.
|
||||||
|
define command{
|
||||||
|
command_name check_git
|
||||||
|
command_line $USER1$/check_tcp -H $HOSTADDRESS$ -p 9418
|
||||||
|
}
|
79
roles/nagios_server/files/nagios/commands/httpd.cfg
Normal file
79
roles/nagios_server/files/nagios/commands/httpd.cfg
Normal file
|
@ -0,0 +1,79 @@
|
||||||
|
##
|
||||||
|
## This file has the commands to check and restart general httpd services
|
||||||
|
## and websites.
|
||||||
|
##
|
||||||
|
|
||||||
|
################################################################################
|
||||||
|
# COMMAND DEFINITIONS
|
||||||
|
#
|
||||||
|
# SYNTAX:
|
||||||
|
#
|
||||||
|
# define command{
|
||||||
|
# template <templatename>
|
||||||
|
# name <objectname>
|
||||||
|
# command_name <commandname>
|
||||||
|
# command_line <commandline>
|
||||||
|
# }
|
||||||
|
#
|
||||||
|
# WHERE:
|
||||||
|
#
|
||||||
|
# <templatename> = object name of another command definition that should be
|
||||||
|
# used as a template for this definition (optional)
|
||||||
|
# <objectname> = object name of command definition, referenced by other
|
||||||
|
# command definitions that use it as a template (optional)
|
||||||
|
# <commandname> = name of the command, as recognized/used by Nagios
|
||||||
|
# <commandline> = command line
|
||||||
|
#
|
||||||
|
################################################################################
|
||||||
|
|
||||||
|
# 'reload httpd'
|
||||||
|
define command {
|
||||||
|
command_name restart_httpd
|
||||||
|
command_line $USER1$/restart_httpd $SERVICESTATE$ $SERVICESTATETYPE$ $SERVICEATTEMPT$ $HOSTADDRESS$ "$HOSTALIAS$" "$SERVICEDESC$" "$SERVICESTATE$"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
# 'check_website_publiclist' command definition
|
||||||
|
define command{
|
||||||
|
command_name check_website_publiclist
|
||||||
|
command_line $USER1$/check_http -w 60 -c 80 -I $HOSTADDRESS$ -H $ARG1$ -u "$ARG2$" -s "$ARG3$"
|
||||||
|
}
|
||||||
|
|
||||||
|
# 'check_website' command definition
|
||||||
|
define command{
|
||||||
|
command_name check_website
|
||||||
|
command_line $USER1$/check_http -w 30 -c 40 -I $HOSTADDRESS$ -H $ARG1$ -u "$ARG2$" -s "$ARG3$"
|
||||||
|
}
|
||||||
|
|
||||||
|
define command{
|
||||||
|
command_name check_website_ppc
|
||||||
|
command_line $USER1$/check_http -w 300 -c 400 -I $HOSTADDRESS$ -H $ARG1$ -u "$ARG2$" -s "$ARG3$"
|
||||||
|
}
|
||||||
|
|
||||||
|
define command{
|
||||||
|
command_name check_website_ssl
|
||||||
|
command_line $USER1$/check_http -w 30 -c 40 --ssl -I $HOSTADDRESS$ -H $ARG1$ -u $ARG2$ -s "$ARG3$"
|
||||||
|
}
|
||||||
|
|
||||||
|
define command{
|
||||||
|
command_name check_ssl_cert
|
||||||
|
command_line $USER1$/check_http -I $HOSTADDRESS$ -H $ARG1$ -C $ARG2$
|
||||||
|
}
|
||||||
|
|
||||||
|
define command{
|
||||||
|
command_name check_website_publiclist_ssl
|
||||||
|
command_line $USER1$/check_http -w 40 -c 60 --ssl -I $HOSTADDRESS$ -H $ARG1$ -u $ARG2$ -s "$ARG3$"
|
||||||
|
}
|
||||||
|
|
||||||
|
# 'check_http' command definition
|
||||||
|
define command{
|
||||||
|
command_name check_http
|
||||||
|
command_line $USER1$/check_http -H $HOSTADDRESS$
|
||||||
|
}
|
||||||
|
|
||||||
|
# 'check_https' command definition
|
||||||
|
define command{
|
||||||
|
command_name check_https
|
||||||
|
command_line $USER1$/check_http -H $HOSTADDRESS$ --ssl
|
||||||
|
}
|
29
roles/nagios_server/files/nagios/commands/koji.cfg
Normal file
29
roles/nagios_server/files/nagios/commands/koji.cfg
Normal file
|
@ -0,0 +1,29 @@
|
||||||
|
################################################################################
|
||||||
|
# COMMAND DEFINITIONS
|
||||||
|
#
|
||||||
|
# SYNTAX:
|
||||||
|
#
|
||||||
|
# define command{
|
||||||
|
# template <templatename>
|
||||||
|
# name <objectname>
|
||||||
|
# command_name <commandname>
|
||||||
|
# command_line <commandline>
|
||||||
|
# }
|
||||||
|
#
|
||||||
|
# WHERE:
|
||||||
|
#
|
||||||
|
# <templatename> = object name of another command definition that should be
|
||||||
|
# used as a template for this definition (optional)
|
||||||
|
# <objectname> = object name of command definition, referenced by other
|
||||||
|
# command definitions that use it as a template (optional)
|
||||||
|
# <commandname> = name of the command, as recognized/used by Nagios
|
||||||
|
# <commandline> = command line
|
||||||
|
#
|
||||||
|
################################################################################
|
||||||
|
|
||||||
|
# 'check_koji'
|
||||||
|
define command{
|
||||||
|
command_name check_koji
|
||||||
|
command_line $USER1$/check_koji
|
||||||
|
}
|
||||||
|
|
36
roles/nagios_server/files/nagios/commands/local.cfg
Normal file
36
roles/nagios_server/files/nagios/commands/local.cfg
Normal file
|
@ -0,0 +1,36 @@
|
||||||
|
# 'check_local_disk' command definition
|
||||||
|
define command{
|
||||||
|
command_name check_local_disk
|
||||||
|
command_line $USER1$/check_disk -w $ARG1$ -c $ARG2$ -p $ARG3$
|
||||||
|
}
|
||||||
|
|
||||||
|
# 'check_local_load' command definition
|
||||||
|
define command{
|
||||||
|
command_name check_local_load
|
||||||
|
command_line $USER1$/check_load -w $ARG1$ -c $ARG2$
|
||||||
|
}
|
||||||
|
|
||||||
|
# 'check_local_procs' command definition
|
||||||
|
define command{
|
||||||
|
command_name check_local_procs
|
||||||
|
command_line $USER1$/check_procs -w $ARG1$ -c $ARG2$ -s $ARG3$
|
||||||
|
}
|
||||||
|
|
||||||
|
# 'check_local_users' command definition
|
||||||
|
define command{
|
||||||
|
command_name check_local_users
|
||||||
|
command_line $USER1$/check_users -w $ARG1$ -c $ARG2$
|
||||||
|
}
|
||||||
|
|
||||||
|
# 'check_local_swap' command definition
|
||||||
|
define command{
|
||||||
|
command_name check_local_swap
|
||||||
|
command_line $USER1$/check_swap -w $ARG1$ -c $ARG2$
|
||||||
|
}
|
||||||
|
|
||||||
|
# 'check_local_mrtgtraf' command definition
|
||||||
|
define command{
|
||||||
|
command_name check_local_mrtgtraf
|
||||||
|
command_line $USER1$/check_mrtgtraf -F $ARG1$ -a $ARG2$ -w $ARG3$ -c $ARG4$ -e $ARG5$
|
||||||
|
}
|
||||||
|
|
96
roles/nagios_server/files/nagios/commands/misc.cfg
Normal file
96
roles/nagios_server/files/nagios/commands/misc.cfg
Normal file
|
@ -0,0 +1,96 @@
|
||||||
|
################################################################################
|
||||||
|
# COMMAND DEFINITIONS
|
||||||
|
#
|
||||||
|
# SYNTAX:
|
||||||
|
#
|
||||||
|
# define command{
|
||||||
|
# template <templatename>
|
||||||
|
# name <objectname>
|
||||||
|
# command_name <commandname>
|
||||||
|
# command_line <commandline>
|
||||||
|
# }
|
||||||
|
#
|
||||||
|
# WHERE:
|
||||||
|
#
|
||||||
|
# <templatename> = object name of another command definition that should be
|
||||||
|
# used as a template for this definition (optional)
|
||||||
|
# <objectname> = object name of command definition, referenced by other
|
||||||
|
# command definitions that use it as a template (optional)
|
||||||
|
# <commandname> = name of the command, as recognized/used by Nagios
|
||||||
|
# <commandline> = command line
|
||||||
|
#
|
||||||
|
################################################################################
|
||||||
|
|
||||||
|
define command{
|
||||||
|
command_name true
|
||||||
|
command_line /bin/true
|
||||||
|
}
|
||||||
|
|
||||||
|
define command{
|
||||||
|
command_name check_dummy
|
||||||
|
command_line $USER1$/check_dummy $ARG1$ $ARG2$
|
||||||
|
}
|
||||||
|
|
||||||
|
# 'check_tape'
|
||||||
|
define command{
|
||||||
|
command_name check_tape
|
||||||
|
command_line $USER1$/check_tape
|
||||||
|
}
|
||||||
|
|
||||||
|
# 'check_ftp' command definition
|
||||||
|
define command{
|
||||||
|
command_name check_ftp
|
||||||
|
command_line $USER1$/check_ftp -H $HOSTADDRESS$
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# 'check_hpjd' command definition
|
||||||
|
define command{
|
||||||
|
command_name check_hpjd
|
||||||
|
command_line $USER1$/check_hpjd -H $HOSTADDRESS$ -C public
|
||||||
|
}
|
||||||
|
|
||||||
|
# 'check_snmp' command definition
|
||||||
|
define command{
|
||||||
|
command_name check_snmp
|
||||||
|
command_line $USER1$/check_snmp -H $HOSTADDRESS$ $ARG1$
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# 'check_nntp' command definition
|
||||||
|
define command{
|
||||||
|
command_name check_nntp
|
||||||
|
command_line $USER1$/check_nntp -H $HOSTADDRESS$
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# 'check_telnet' command definition
|
||||||
|
define command{
|
||||||
|
command_name check_telnet
|
||||||
|
command_line $USER1$/check_tcp -H $HOSTADDRESS$ -p 23
|
||||||
|
}
|
||||||
|
|
||||||
|
# 'check_dhcp' command definition
|
||||||
|
define command{
|
||||||
|
command_name check_dhcp
|
||||||
|
command_line $USER1$/check_dhcp $ARG1$
|
||||||
|
}
|
||||||
|
|
||||||
|
# 'check_pop' command definition
|
||||||
|
define command{
|
||||||
|
command_name check_pop
|
||||||
|
command_line $USER1$/check_pop -H $HOSTADDRESS$
|
||||||
|
}
|
||||||
|
|
||||||
|
# 'check_imap' command definition
|
||||||
|
define command{
|
||||||
|
command_name check_imap
|
||||||
|
command_line $USER1$/check_imap -H $HOSTADDRESS$ $ARG1$
|
||||||
|
}
|
||||||
|
|
||||||
|
# 'check_nt' command definition
|
||||||
|
define command{
|
||||||
|
command_name check_nt
|
||||||
|
command_line $USER1$/check_nt -H $HOSTADDRESS$ -p 12489 -v $ARG1$ $ARG2$
|
||||||
|
}
|
||||||
|
|
87
roles/nagios_server/files/nagios/commands/notify.cfg
Normal file
87
roles/nagios_server/files/nagios/commands/notify.cfg
Normal file
|
@ -0,0 +1,87 @@
|
||||||
|
################################################################################
|
||||||
|
#
|
||||||
|
# SAMPLE NOTIFICATION COMMANDS
|
||||||
|
#
|
||||||
|
# These are some example notification commands. They may or may not work on
|
||||||
|
# your system without modification. As an example, some systems will require
|
||||||
|
# you to use "/usr/bin/mailx" instead of "/usr/bin/mail" in the commands below.
|
||||||
|
#
|
||||||
|
################################################################################
|
||||||
|
|
||||||
|
# 'host-notify-by-email' command definition
|
||||||
|
define command{
|
||||||
|
command_name host-notify-by-email
|
||||||
|
command_line /usr/bin/printf "%b" "***** Nagios *****\n\nNotification Type: $NOTIFICATIONTYPE$\nHost: $HOSTNAME$\nState: $HOSTSTATE$\nAddress: $HOSTADDRESS$\nInfo: $HOSTOUTPUT$\nSource: $$(hostname)\n\nDate/Time: $LONGDATETIME$\n" | /bin/mail -s "Host $HOSTSTATE$ alert for $HOSTNAME$!" $CONTACTEMAIL$
|
||||||
|
}
|
||||||
|
|
||||||
|
# 'notify-service-by-email' command definition
|
||||||
|
define command{
|
||||||
|
command_name notify-service-by-email
|
||||||
|
command_line /usr/bin/printf "%b" "***** Nagios *****\n\nNotification Type: $NOTIFICATIONTYPE$\n\nService: $SERVICEDESC$\nHost: $HOSTALIAS$\nAddress: $HOSTADDRESS$\nState: $SERVICESTATE$\n\nDate/Time: $LONGDATETIME$\n\nAdditional Info:\n\n$SERVICEOUTPUT$\n" | /usr/bin/mail -s "** $NOTIFICATIONTYPE$ Service Alert: $HOSTALIAS$/$SERVICEDESC$ is $SERVICESTATE$ **" $CONTACTEMAIL$
|
||||||
|
}
|
||||||
|
|
||||||
|
# 'notify-by-epager' command definition
|
||||||
|
define command{
|
||||||
|
command_name notify-by-epager
|
||||||
|
command_line /usr/bin/printf "%b" "Service: $SERVICEDESC$\nHost: $HOSTNAME$\nInfo: $SERVICEOUTPUT$\nSource: $$(hostname -s)\nDate: $LONGDATETIME$" | /bin/mail -s "$NOTIFICATIONTYPE$: $HOSTALIAS$/$SERVICEDESC$ is $SERVICESTATE$" $CONTACTPAGER$
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# 'host-notify-by-epager' command definition
|
||||||
|
define command{
|
||||||
|
command_name host-notify-by-epager
|
||||||
|
command_line /usr/bin/printf "%b" "Host '$HOSTALIAS$' is $HOSTSTATE$\nInfo: $HOSTOUTPUT$\nSource: $$(hostname -s)\nTime: $LONGDATETIME$" | /bin/mail -s "$NOTIFICATIONTYPE$ alert - Host $HOSTNAME$ is $HOSTSTATE$" $CONTACTPAGER$
|
||||||
|
}
|
||||||
|
|
||||||
|
# 'host-notify-by-ircbot' command definition
|
||||||
|
define command{
|
||||||
|
command_name host-notify-by-ircbot
|
||||||
|
command_line /usr/bin/printf "%b" "#fedora-noc $NOTIFICATIONTYPE$ - $HOSTALIAS$ is $HOSTSTATE$: $HOSTOUTPUT$ ($$(hostname -s)) $HOSTACKAUTHOR$ $SERVICEACKAUTHOR$" | /usr/local/bin/irc-colorize.py | nc -w 1 value01 5050
|
||||||
|
}
|
||||||
|
|
||||||
|
# 'notify-by-email' command definition
|
||||||
|
define command{
|
||||||
|
command_name notify-by-email
|
||||||
|
command_line /usr/bin/printf "%b" "***** Nagios *****\n\nNotification Type: $NOTIFICATIONTYPE$\n\nService: $SERVICEDESC$\nHost: $HOSTALIAS$\nAddress: $HOSTADDRESS$\nState: $SERVICESTATE$\nSource: $$(hostname)\n\nDate/Time: $LONGDATETIME$\n\nAdditional Info:\n\n$SERVICEOUTPUT$" | /bin/mail -s "** $NOTIFICATIONTYPE$ alert - $HOSTALIAS$/$SERVICEDESC$ is $SERVICESTATE$ **" $CONTACTEMAIL$
|
||||||
|
}
|
||||||
|
|
||||||
|
# 'notify-by-ircbot' command definition
|
||||||
|
define command{
|
||||||
|
command_name notify-by-ircbot
|
||||||
|
command_line /usr/bin/printf "%b" "#fedora-noc $NOTIFICATIONTYPE$ - $HOSTALIAS$/$SERVICEDESC$ is $SERVICESTATE$: $SERVICEOUTPUT$ ($$(hostname -s)) $HOSTACKAUTHOR$ $SERVICEACKAUTHOR$" | /usr/local/bin/irc-colorize.py | nc -w 1 value01 5050
|
||||||
|
}
|
||||||
|
|
||||||
|
# 'host-notify-by-fedmsg' command definition
|
||||||
|
define command{
|
||||||
|
command_name host-notify-by-fedmsg
|
||||||
|
command_line /usr/bin/echo '{"type": "$NOTIFICATIONTYPE$", "host": "$HOSTALIAS$", "state": "$HOSTSTATE$", "output": "$HOSTOUTPUT$", "host_ack_author": "$HOSTACKAUTHOR$", "service_ack_author": "$SERVICEACKAUTHOR$"}' | fedmsg-logger --cert-prefix nagios --modname nagios --topic host.state.change --json-input
|
||||||
|
}
|
||||||
|
|
||||||
|
# 'notify-by-epager' command definition
|
||||||
|
define command{
|
||||||
|
command_name notify-by-epager
|
||||||
|
command_line /usr/bin/printf "%b" "Service: $SERVICEDESC$\nHost: $HOSTNAME$\nInfo: $SERVICEOUTPUT$\nSource: $$(hostname -s)\nDate: $LONGDATETIME$" | /bin/mail -s "$NOTIFICATIONTYPE$: $HOSTALIAS$/$SERVICEDESC$ is $SERVICESTATE$" $CONTACTPAGER$
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# 'notify-by-fedmsg' command definition
|
||||||
|
define command{
|
||||||
|
command_name notify-by-fedmsg
|
||||||
|
command_line /usr/bin/echo '{"type": "$NOTIFICATIONTYPE$", "host": "$HOSTALIAS$", "state": "$SERVICESTATE$", "service": "$SERVICEDESC$", "output": "$SERVICEOUTPUT$", "host_ack_author": "$HOSTACKAUTHOR$", "service_ack_author": "$SERVICEACKAUTHOR$"}' | fedmsg-logger --cert-prefix nagios --modname nagios --topic service.state.change --json-input
|
||||||
|
}
|
||||||
|
|
||||||
|
# 'notify-by-xmpp' command definition
|
||||||
|
define command{
|
||||||
|
command_name notify-by-xmpp
|
||||||
|
command_line /usr/local/bin/xmppsend -a /etc/nagios/private/xmppnagios.ini "Service: $SERVICEDESC$\nHost: $HOSTNAME$\nInfo: $SERVICEOUTPUT$\nDate: $LONGDATETIME$" $CONTACTEMAIL$
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# 'host-notify-by-xmpp' command definition
|
||||||
|
define command{
|
||||||
|
command_name host-notify-by-xmpp
|
||||||
|
command_line /usr/local/bin/xmppsend -a /etc/nagios/private/xmppnagios.ini "Host '$HOSTALIAS$' is $HOSTSTATE$\nInfo: $HOSTOUTPUT$\nDate: $LONGDATETIME$" $CONTACTEMAIL$
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
17
roles/nagios_server/files/nagios/commands/nrpe.cfg
Normal file
17
roles/nagios_server/files/nagios/commands/nrpe.cfg
Normal file
|
@ -0,0 +1,17 @@
|
||||||
|
# 'test nrpe'
|
||||||
|
define command{
|
||||||
|
command_name test_nrpe
|
||||||
|
command_line $USER1$/check_nrpe -t 30 -H $HOSTADDRESS$
|
||||||
|
|
||||||
|
}
|
||||||
|
# 'check by nrpe'
|
||||||
|
define command{
|
||||||
|
command_name check_by_nrpe
|
||||||
|
command_line $USER1$/check_nrpe -t 30 -H $HOSTADDRESS$ -c $ARG1$
|
||||||
|
}
|
||||||
|
|
||||||
|
# 'check-host-alive-nrpe' is better for hosts that are on vpn.
|
||||||
|
define command{
|
||||||
|
command_name check-host-alive-nrpe
|
||||||
|
command_line $USER1$/check_nrpe -t 30 -H $HOSTADDRESS$
|
||||||
|
}
|
26
roles/nagios_server/files/nagios/commands/perfdata.cfg
Normal file
26
roles/nagios_server/files/nagios/commands/perfdata.cfg
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
################################################################################
|
||||||
|
#
|
||||||
|
# SAMPLE PERFORMANCE DATA COMMANDS
|
||||||
|
#
|
||||||
|
# These are sample performance data commands that can be used to send performance
|
||||||
|
# data output to two text files (one for hosts, another for services). If you
|
||||||
|
# plan on simply writing performance data out to a file, consider using the
|
||||||
|
# host_perfdata_file and service_perfdata_file options in the main config file.
|
||||||
|
#
|
||||||
|
################################################################################
|
||||||
|
|
||||||
|
|
||||||
|
# 'process-host-perfdata' command definition
|
||||||
|
define command{
|
||||||
|
command_name process-host-perfdata
|
||||||
|
command_line /usr/bin/printf "%b" "$LASTHOSTCHECK$\t$HOSTNAME$\t$HOSTSTATE$\t$HOSTATTEMPT$\t$HOSTSTATETYPE$\t$HOSTEXECUTIONTIME$\t$HOSTOUTPUT$\t$HOSTPERFDATA$\n" >> /var/log/nagios/host-perfdata.out
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# 'process-service-perfdata' command definition
|
||||||
|
define command{
|
||||||
|
command_name process-service-perfdata
|
||||||
|
command_line /usr/bin/printf "%b" "$LASTSERVICECHECK$\t$HOSTNAME$\t$SERVICEDESC$\t$SERVICESTATE$\t$SERVICEATTEMPT$\t$SERVICESTATETYPE$\t$SERVICEEXECUTIONTIME$\t$SERVICELATENCY$\t$SERVICEOUTPUT$\t$SERVICEPERFDATA$\n" >> /var/log/nagios/service-perfdata.out
|
||||||
|
}
|
||||||
|
|
||||||
|
|
31
roles/nagios_server/files/nagios/commands/ping.cfg
Normal file
31
roles/nagios_server/files/nagios/commands/ping.cfg
Normal file
|
@ -0,0 +1,31 @@
|
||||||
|
# This command checks to see if a host is "alive" by pinging it
|
||||||
|
# The check must result in a 100% packet loss or 5 second (3000ms) round trip
|
||||||
|
# average time to produce a critical error.
|
||||||
|
# Note: Only one ICMP echo packet is sent (determined by the '-p 1' argument)
|
||||||
|
|
||||||
|
# 'check-host-alive' command definition
|
||||||
|
define command{
|
||||||
|
command_name check-host-alive
|
||||||
|
command_line $USER1$/check_ping -H $HOSTADDRESS$ -w 3000.0,80% -c 5000.0,100% -p 5
|
||||||
|
}
|
||||||
|
|
||||||
|
define command{
|
||||||
|
command_name check-host-alive4
|
||||||
|
command_line $USER1$/check_ping -4 -H $HOSTADDRESS$ -w 3000.0,80% -c 5000.0,100% -p 2
|
||||||
|
}
|
||||||
|
|
||||||
|
define command{
|
||||||
|
command_name check-host-alive6
|
||||||
|
command_line $USER1$/check_ping -6 -H $HOSTADDRESS$ -w 3000.0,80% -c 5000.0,100% -p 2
|
||||||
|
}
|
||||||
|
|
||||||
|
# 'check_ping' command definition
|
||||||
|
define command{
|
||||||
|
command_name check_ping4
|
||||||
|
command_line $USER1$/check_ping -4 -H $HOSTADDRESS$ -w $ARG1$ -c $ARG2$ -p 5
|
||||||
|
}
|
||||||
|
|
||||||
|
define command{
|
||||||
|
command_name check_ping6
|
||||||
|
command_line $USER1$/check_ping -6 -H $HOSTADDRESS$ -w $ARG1$ -c $ARG2$ -p 5
|
||||||
|
}
|
5
roles/nagios_server/files/nagios/commands/postgres.cfg
Normal file
5
roles/nagios_server/files/nagios/commands/postgres.cfg
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
# 'pgsql'
|
||||||
|
define command{
|
||||||
|
command_name check_pgsql
|
||||||
|
command_line $USER1$/check_pgsql -H $HOSTADDRESS$ -d $ARG1$ -p '{{nagios_db_user_password}}' --logname 'nagiosuser'
|
||||||
|
}
|
28
roles/nagios_server/files/nagios/commands/rsyslog.cfg
Normal file
28
roles/nagios_server/files/nagios/commands/rsyslog.cfg
Normal file
|
@ -0,0 +1,28 @@
|
||||||
|
################################################################################
|
||||||
|
# COMMAND DEFINITIONS
|
||||||
|
#
|
||||||
|
# SYNTAX:
|
||||||
|
#
|
||||||
|
# define command{
|
||||||
|
# template <templatename>
|
||||||
|
# name <objectname>
|
||||||
|
# command_name <commandname>
|
||||||
|
# command_line <commandline>
|
||||||
|
# }
|
||||||
|
#
|
||||||
|
# WHERE:
|
||||||
|
#
|
||||||
|
# <templatename> = object name of another command definition that should be
|
||||||
|
# used as a template for this definition (optional)
|
||||||
|
# <objectname> = object name of command definition, referenced by other
|
||||||
|
# command definitions that use it as a template (optional)
|
||||||
|
# <commandname> = name of the command, as recognized/used by Nagios
|
||||||
|
# <commandline> = command line
|
||||||
|
#
|
||||||
|
################################################################################
|
||||||
|
|
||||||
|
|
||||||
|
define command {
|
||||||
|
command_name restart_rsyslog
|
||||||
|
command_line $USER1$/restart_rsyslog $SERVICESTATE$ $SERVICESTATETYPE$ $SERVICEATTEMPT$ $HOSTADDRESS$ "$HOSTALIAS$" "$SERVICEDESC$" "$SERVICESTATE$"
|
||||||
|
}
|
12
roles/nagios_server/files/nagios/commands/smtp.cfg
Normal file
12
roles/nagios_server/files/nagios/commands/smtp.cfg
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
# 'check_smtp' command definition
|
||||||
|
define command{
|
||||||
|
command_name check_smtp
|
||||||
|
command_line $USER1$/check_smtp -H $HOSTADDRESS$
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# 'check_email_delivery' command definition
|
||||||
|
define command{
|
||||||
|
command_name check_email_delivery
|
||||||
|
command_line $USER1$/check_email_delivery_epn -H $ARG1$ --mailto $ARG2$ --mailfrom $ARG3$ --username $ARG4$ --password $ARG5$ -w $ARG6$ -c $ARG7$
|
||||||
|
}
|
22
roles/nagios_server/files/nagios/commands/ssh.cfg
Normal file
22
roles/nagios_server/files/nagios/commands/ssh.cfg
Normal file
|
@ -0,0 +1,22 @@
|
||||||
|
# 'check_ssh' command definition
|
||||||
|
define command{
|
||||||
|
command_name check_ssh
|
||||||
|
command_line $USER1$/check_ssh -H $HOSTADDRESS$
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
define command {
|
||||||
|
command_name check_by_ssh_check_raid
|
||||||
|
command_line $USER1$/check_by_ssh -t 30 -H $HOSTADDRESS$ "$USER1$/check_raid.py"
|
||||||
|
}
|
||||||
|
|
||||||
|
define command {
|
||||||
|
command_name check_by_ssh_check_disk
|
||||||
|
command_line $USER1$/check_by_ssh -t 30 -H $HOSTADDRESS$ "$USER1$/check_disk -w $ARG1$% -c $ARG2$% -p $ARG3$"
|
||||||
|
}
|
||||||
|
|
||||||
|
# 'check_postgres_conns' command definition
|
||||||
|
define command{
|
||||||
|
command_name check_postgres_conns
|
||||||
|
command_line $USER1$/check_by_ssh -t 30 -H $HOSTADDRESS$ "$USER1$/check_procs -u postgres -w $ARG1$ -c $ARG2$ -a $ARG3$"
|
||||||
|
}
|
6
roles/nagios_server/files/nagios/commands/tcp.cfg
Normal file
6
roles/nagios_server/files/nagios/commands/tcp.cfg
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
|
||||||
|
# 'check_tcp' command definition
|
||||||
|
define command{
|
||||||
|
command_name check_tcp
|
||||||
|
command_line $USER1$/check_tcp -H $HOSTADDRESS$ -p $ARG1$
|
||||||
|
}
|
5
roles/nagios_server/files/nagios/commands/testcloud.cfg
Normal file
5
roles/nagios_server/files/nagios/commands/testcloud.cfg
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
# 'check_testcloud'
|
||||||
|
define command{
|
||||||
|
command_name check_testcloud
|
||||||
|
command_line $USER1$/check_testcloud
|
||||||
|
}
|
5
roles/nagios_server/files/nagios/commands/udp.cfg
Normal file
5
roles/nagios_server/files/nagios/commands/udp.cfg
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
# 'check_udp' command definition
|
||||||
|
define command{
|
||||||
|
command_name check_udp
|
||||||
|
command_line $USER1$/check_udp -H $HOSTADDRESS$ -p $ARG1$
|
||||||
|
}
|
12
roles/nagios_server/files/nagios/commands/unbound.cfg
Normal file
12
roles/nagios_server/files/nagios/commands/unbound.cfg
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
# 'check_unbound_80' command definition
|
||||||
|
define command{
|
||||||
|
command_name check_unbound_80
|
||||||
|
command_line $USER1$/check_dig -H $HOSTADDRESS$ -w 5 -c 9 -p 80 -l $ARG1$ -A "+tcp"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# 'check_unbound_443' command definition
|
||||||
|
define command{
|
||||||
|
command_name check_unbound_443
|
||||||
|
command_line $USER1$/check_dig_ssl -H $HOSTADDRESS$ -w 5 -c 9 -p 443 -L $ARG1$ -l $ARG2$ -A "+tcp"
|
||||||
|
}
|
22
roles/nagios_server/files/nagios/configs/escalations.cfg
Normal file
22
roles/nagios_server/files/nagios/configs/escalations.cfg
Normal file
|
@ -0,0 +1,22 @@
|
||||||
|
define hostescalation{
|
||||||
|
host_name *
|
||||||
|
hostgroup_name *
|
||||||
|
contact_groups fedora-sysadmin-email,fedora-sysadmin-emergency,fedora-sysadmin-ircbot
|
||||||
|
first_notification 2
|
||||||
|
last_notification 0
|
||||||
|
notification_interval 60
|
||||||
|
escalation_period 24x7
|
||||||
|
escalation_options d,u,r
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
define serviceescalation{
|
||||||
|
host_name *
|
||||||
|
service_description *
|
||||||
|
contact_groups fedora-sysadmin-email,fedora-sysadmin-emergency,fedora-sysadmin-ircbot
|
||||||
|
first_notification 2
|
||||||
|
last_notification 0
|
||||||
|
notification_interval 60
|
||||||
|
escalation_period 24x7
|
||||||
|
escalation_options w,u,c,r
|
||||||
|
}
|
362
roles/nagios_server/files/nagios/configs/minimal.cfg
Normal file
362
roles/nagios_server/files/nagios/configs/minimal.cfg
Normal file
|
@ -0,0 +1,362 @@
|
||||||
|
###############################################################################
|
||||||
|
# MINIMAL.CFG
|
||||||
|
#
|
||||||
|
# MINIMALISTIC OBJECT CONFIG FILE (Template-Based Object File Format)
|
||||||
|
#
|
||||||
|
# Last Modified: 08-10-2005
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# NOTE: This config file is intended to be used to test a Nagios installation
|
||||||
|
# that has been compiled with support for the template-based object
|
||||||
|
# configuration files.
|
||||||
|
#
|
||||||
|
# This config file is intended to servce as an *extremely* simple
|
||||||
|
# example of how you can create your object configuration file(s).
|
||||||
|
# If you're interested in more complex object configuration files for
|
||||||
|
# Nagios, look in the sample-config/template-object/ subdirectory of
|
||||||
|
# the distribution.
|
||||||
|
#
|
||||||
|
###############################################################################
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
###############################################################################
|
||||||
|
#
|
||||||
|
# TIME PERIODS
|
||||||
|
#
|
||||||
|
###############################################################################
|
||||||
|
###############################################################################
|
||||||
|
|
||||||
|
# This defines a timeperiod where all times are valid for checks,
|
||||||
|
# notifications, etc. The classic "24x7" support nightmare. :-)
|
||||||
|
|
||||||
|
define timeperiod{
|
||||||
|
timeperiod_name 24x7
|
||||||
|
alias 24 Hours A Day, 7 Days A Week
|
||||||
|
sunday 00:00-24:00
|
||||||
|
monday 00:00-24:00
|
||||||
|
tuesday 00:00-24:00
|
||||||
|
wednesday 00:00-24:00
|
||||||
|
thursday 00:00-24:00
|
||||||
|
friday 00:00-24:00
|
||||||
|
saturday 00:00-24:00
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
###############################################################################
|
||||||
|
#
|
||||||
|
# COMMANDS
|
||||||
|
#
|
||||||
|
###############################################################################
|
||||||
|
###############################################################################
|
||||||
|
|
||||||
|
# This is a sample service notification command that can be used to send email
|
||||||
|
# notifications (about service alerts) to contacts.
|
||||||
|
# 'check_ssh' command definition
|
||||||
|
define command{
|
||||||
|
command_name notify-by-email
|
||||||
|
command_line /usr/bin/printf "%b" "***** Nagios *****\n\nNotification Type: $NOTIFICATIONTYPE$\n\nService: $SERVICEDESC$\nHost: $HOSTALIAS$\nAddress: $HOSTADDRESS$\nState: $SERVICESTATE$\n\nDate/Time: $LONGDATETIME$\n\nAdditional Info:\n\n$OUTPUT$" | /bin/mail -s "** $NOTIFICATIONTYPE$ alert - $HOSTALIAS$/$SERVICEDESC$ is $SERVICESTATE$ **" $CONTACTEMAIL$
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# This is a sample host notification command that can be used to send email
|
||||||
|
# notifications (about host alerts) to contacts.
|
||||||
|
|
||||||
|
define command{
|
||||||
|
command_name host-notify-by-email
|
||||||
|
command_line /usr/bin/printf "%b" "***** Nagios *****\n\nNotification Type: $NOTIFICATIONTYPE$\nHost: $HOSTNAME$\nState: $HOSTSTATE$\nAddress: $HOSTADDRESS$\nInfo: $OUTPUT$\n\nDate/Time: $LONGDATETIME$\n" | /bin/mail -s "Host $HOSTSTATE$ alert for $HOSTNAME$!" $CONTACTEMAIL$
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# Command to check to see if a host is "alive" (up) by pinging it
|
||||||
|
|
||||||
|
define command{
|
||||||
|
command_name check-host-alive
|
||||||
|
command_line $USER1$/check_ping -4 -H $HOSTADDRESS$ -w 300,99% -c 500,100% -p 2
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# Generic command to check a device by pinging it
|
||||||
|
|
||||||
|
define command{
|
||||||
|
command_name check_ping
|
||||||
|
command_line $USER1$/check_ping -4 -H $HOSTADDRESS$ -w $ARG1$ -c $ARG2$ -p 5
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# Command used to check disk space usage on local partitions
|
||||||
|
|
||||||
|
define command{
|
||||||
|
command_name check_local_disk
|
||||||
|
command_line $USER1$/check_disk -w $ARG1$ -c $ARG2$ -p $ARG3$
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# Command used to check the number of currently logged in users on the
|
||||||
|
# local machine
|
||||||
|
|
||||||
|
define command{
|
||||||
|
command_name check_local_users
|
||||||
|
command_line $USER1$/check_users -w $ARG1$ -c $ARG2$
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# Command to check the number of running processing on the local machine
|
||||||
|
|
||||||
|
define command{
|
||||||
|
command_name check_local_procs
|
||||||
|
command_line $USER1$/check_procs -w $ARG1$ -c $ARG2$
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# Command to check the load on the local machine
|
||||||
|
|
||||||
|
define command{
|
||||||
|
command_name check_local_load
|
||||||
|
command_line $USER1$/check_load -w $ARG1$ -c $ARG2$
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
###############################################################################
|
||||||
|
#
|
||||||
|
# CONTACTS
|
||||||
|
#
|
||||||
|
###############################################################################
|
||||||
|
###############################################################################
|
||||||
|
|
||||||
|
# In this simple config file, a single contact will receive all alerts.
|
||||||
|
# This assumes that you have an account (or email alias) called
|
||||||
|
# "nagios-admin" on the local host.
|
||||||
|
|
||||||
|
define contact{
|
||||||
|
contact_name nagios-admin
|
||||||
|
alias Nagios Admin
|
||||||
|
service_notification_period 24x7
|
||||||
|
host_notification_period 24x7
|
||||||
|
service_notification_options w,u,c,r
|
||||||
|
host_notification_options d,r
|
||||||
|
service_notification_commands notify-by-email
|
||||||
|
host_notification_commands host-notify-by-email
|
||||||
|
email admin@fedoraproject.org
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
###############################################################################
|
||||||
|
#
|
||||||
|
# CONTACT GROUPS
|
||||||
|
#
|
||||||
|
###############################################################################
|
||||||
|
###############################################################################
|
||||||
|
|
||||||
|
# We only have one contact in this simple configuration file, so there is
|
||||||
|
# no need to create more than one contact group.
|
||||||
|
|
||||||
|
define contactgroup{
|
||||||
|
contactgroup_name admins
|
||||||
|
alias Nagios Administrators
|
||||||
|
members nagios-admin
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
###############################################################################
|
||||||
|
#
|
||||||
|
# HOSTS
|
||||||
|
#
|
||||||
|
###############################################################################
|
||||||
|
###############################################################################
|
||||||
|
|
||||||
|
# Generic host definition template - This is NOT a real host, just a template!
|
||||||
|
|
||||||
|
define host{
|
||||||
|
name generic-host ; The name of this host template
|
||||||
|
notifications_enabled 1 ; Host notifications are enabled
|
||||||
|
event_handler_enabled 1 ; Host event handler is enabled
|
||||||
|
flap_detection_enabled 1 ; Flap detection is enabled
|
||||||
|
failure_prediction_enabled 1 ; Failure prediction is enabled
|
||||||
|
process_perf_data 1 ; Process performance data
|
||||||
|
retain_status_information 1 ; Retain status information across program restarts
|
||||||
|
retain_nonstatus_information 1 ; Retain non-status information across program restarts
|
||||||
|
register 0 ; DONT REGISTER THIS DEFINITION - ITS NOT A REAL HOST, JUST A TEMPLATE!
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# Since this is a simple configuration file, we only monitor one host - the
|
||||||
|
# local host (this machine).
|
||||||
|
|
||||||
|
define host{
|
||||||
|
use generic-host ; Name of host template to use
|
||||||
|
host_name localhost
|
||||||
|
alias localhost
|
||||||
|
address 127.0.0.1
|
||||||
|
check_command check-host-alive
|
||||||
|
max_check_attempts 10
|
||||||
|
notification_interval 120
|
||||||
|
notification_period 24x7
|
||||||
|
notification_options d,r
|
||||||
|
contact_groups admins
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
###############################################################################
|
||||||
|
#
|
||||||
|
# HOST GROUPS
|
||||||
|
#
|
||||||
|
###############################################################################
|
||||||
|
###############################################################################
|
||||||
|
|
||||||
|
# We only have one host in our simple config file, so there is no need to
|
||||||
|
# create more than one hostgroup.
|
||||||
|
|
||||||
|
define hostgroup{
|
||||||
|
hostgroup_name test
|
||||||
|
alias Test Servers
|
||||||
|
members localhost
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
###############################################################################
|
||||||
|
#
|
||||||
|
# SERVICES
|
||||||
|
#
|
||||||
|
###############################################################################
|
||||||
|
###############################################################################
|
||||||
|
|
||||||
|
# Generic service definition template - This is NOT a real service, just a template!
|
||||||
|
|
||||||
|
define service{
|
||||||
|
name generic-service ; The 'name' of this service template
|
||||||
|
active_checks_enabled 1 ; Active service checks are enabled
|
||||||
|
passive_checks_enabled 1 ; Passive service checks are enabled/accepted
|
||||||
|
parallelize_check 1 ; Active service checks should be parallelized (disabling this can lead to major performance problems)
|
||||||
|
obsess_over_service 1 ; We should obsess over this service (if necessary)
|
||||||
|
check_freshness 0 ; Default is to NOT check service 'freshness'
|
||||||
|
notifications_enabled 1 ; Service notifications are enabled
|
||||||
|
event_handler_enabled 1 ; Service event handler is enabled
|
||||||
|
flap_detection_enabled 1 ; Flap detection is enabled
|
||||||
|
failure_prediction_enabled 1 ; Failure prediction is enabled
|
||||||
|
process_perf_data 1 ; Process performance data
|
||||||
|
retain_status_information 1 ; Retain status information across program restarts
|
||||||
|
retain_nonstatus_information 1 ; Retain non-status information across program restarts
|
||||||
|
register 0 ; DONT REGISTER THIS DEFINITION - ITS NOT A REAL SERVICE, JUST A TEMPLATE!
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# Define a service to "ping" the local machine
|
||||||
|
|
||||||
|
define service{
|
||||||
|
use generic-service ; Name of service template to use
|
||||||
|
host_name localhost
|
||||||
|
service_description PING
|
||||||
|
is_volatile 0
|
||||||
|
check_period 24x7
|
||||||
|
max_check_attempts 4
|
||||||
|
normal_check_interval 5
|
||||||
|
retry_check_interval 1
|
||||||
|
contact_groups admins
|
||||||
|
notification_options w,u,c,r
|
||||||
|
notification_interval 960
|
||||||
|
notification_period 24x7
|
||||||
|
check_command check_ping!100.0,20%!500.0,60%
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# Define a service to check the disk space of the root partition
|
||||||
|
# on the local machine. Warning if < 20% free, critical if
|
||||||
|
# < 10% free space on partition.
|
||||||
|
|
||||||
|
define service{
|
||||||
|
use generic-service ; Name of service template to use
|
||||||
|
host_name localhost
|
||||||
|
service_description Root Partition
|
||||||
|
is_volatile 0
|
||||||
|
check_period 24x7
|
||||||
|
max_check_attempts 4
|
||||||
|
normal_check_interval 5
|
||||||
|
retry_check_interval 1
|
||||||
|
contact_groups admins
|
||||||
|
notification_options w,u,c,r
|
||||||
|
notification_interval 960
|
||||||
|
notification_period 24x7
|
||||||
|
check_command check_local_disk!20%!10%!/
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Define a service to check the number of currently logged in
|
||||||
|
# users on the local machine. Warning if > 20 users, critical
|
||||||
|
# if > 50 users.
|
||||||
|
|
||||||
|
define service{
|
||||||
|
use generic-service ; Name of service template to use
|
||||||
|
host_name localhost
|
||||||
|
service_description Current Users
|
||||||
|
is_volatile 0
|
||||||
|
check_period 24x7
|
||||||
|
max_check_attempts 4
|
||||||
|
normal_check_interval 5
|
||||||
|
retry_check_interval 1
|
||||||
|
contact_groups admins
|
||||||
|
notification_options w,u,c,r
|
||||||
|
notification_interval 960
|
||||||
|
notification_period 24x7
|
||||||
|
check_command check_local_users!20!50
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# Define a service to check the number of currently running procs
|
||||||
|
# on the local machine. Warning if > 250 processes, critical if
|
||||||
|
# > 400 users.
|
||||||
|
|
||||||
|
define service{
|
||||||
|
use generic-service ; Name of service template to use
|
||||||
|
host_name localhost
|
||||||
|
service_description Total Processes
|
||||||
|
is_volatile 0
|
||||||
|
check_period 24x7
|
||||||
|
max_check_attempts 4
|
||||||
|
normal_check_interval 5
|
||||||
|
retry_check_interval 1
|
||||||
|
contact_groups admins
|
||||||
|
notification_options w,u,c,r
|
||||||
|
notification_interval 960
|
||||||
|
notification_period 24x7
|
||||||
|
check_command check_local_procs!250!400
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Define a service to check the load on the local machine.
|
||||||
|
|
||||||
|
define service{
|
||||||
|
use generic-service ; Name of service template to use
|
||||||
|
host_name localhost
|
||||||
|
service_description Current Load
|
||||||
|
is_volatile 0
|
||||||
|
check_period 24x7
|
||||||
|
max_check_attempts 4
|
||||||
|
normal_check_interval 5
|
||||||
|
retry_check_interval 1
|
||||||
|
contact_groups admins
|
||||||
|
notification_options w,u,c,r
|
||||||
|
notification_interval 960
|
||||||
|
notification_period 24x7
|
||||||
|
check_command check_local_load!5.0,4.0,3.0!10.0,6.0,4.0
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# EOF
|
1349
roles/nagios_server/files/nagios/configs/nagios.cfg
Normal file
1349
roles/nagios_server/files/nagios/configs/nagios.cfg
Normal file
File diff suppressed because it is too large
Load diff
135
roles/nagios_server/files/nagios/configs/timeperiods.cfg
Normal file
135
roles/nagios_server/files/nagios/configs/timeperiods.cfg
Normal file
|
@ -0,0 +1,135 @@
|
||||||
|
###############################################################################
|
||||||
|
# TIMEPERIODS.CFG - SAMPLE TIMEPERIOD DEFINITIONS
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# NOTES: This config file provides you with some example timeperiod definitions
|
||||||
|
# that you can reference in host, service, contact, and dependency
|
||||||
|
# definitions.
|
||||||
|
#
|
||||||
|
# You don't need to keep timeperiods in a separate file from your other
|
||||||
|
# object definitions. This has been done just to make things easier to
|
||||||
|
# understand.
|
||||||
|
#
|
||||||
|
###############################################################################
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
###############################################################################
|
||||||
|
#
|
||||||
|
# TIME PERIODS
|
||||||
|
#
|
||||||
|
###############################################################################
|
||||||
|
###############################################################################
|
||||||
|
|
||||||
|
|
||||||
|
define timeperiod{
|
||||||
|
timeperiod_name 24x7
|
||||||
|
alias 24 Hours A Day, 7 Days A Week
|
||||||
|
sunday 00:00-24:00
|
||||||
|
monday 00:00-24:00
|
||||||
|
tuesday 00:00-24:00
|
||||||
|
wednesday 00:00-24:00
|
||||||
|
thursday 00:00-24:00
|
||||||
|
friday 00:00-24:00
|
||||||
|
saturday 00:00-24:00
|
||||||
|
}
|
||||||
|
|
||||||
|
define timeperiod{
|
||||||
|
timeperiod_name 16x7
|
||||||
|
alias 15 Hours a day, 7 days a week
|
||||||
|
sunday 00:00-04:00,13:00-24:00
|
||||||
|
monday 00:00-04:00,13:00-24:00
|
||||||
|
tuesday 00:00-04:00,13:00-24:00
|
||||||
|
wednesday 00:00-04:00,13:00-24:00
|
||||||
|
thursday 00:00-04:00,13:00-24:00
|
||||||
|
friday 00:00-04:00,13:00-24:00
|
||||||
|
saturday 00:00-04:00,13:00-24:00
|
||||||
|
}
|
||||||
|
|
||||||
|
define timeperiod{
|
||||||
|
timeperiod_name 16x7-AU
|
||||||
|
alias 15 Hours a day, 7 days a week
|
||||||
|
sunday 00:00-14:00,22:00-24:00
|
||||||
|
monday 00:00-14:00,22:00-24:00
|
||||||
|
tuesday 00:00-14:00,22:00-24:00
|
||||||
|
wednesday 00:00-14:00,22:00-24:00
|
||||||
|
thursday 00:00-14:00,22:00-24:00
|
||||||
|
friday 00:00-14:00,22:00-24:00
|
||||||
|
saturday 00:00-14:00,22:00-24:00
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# Members of sysadmin-main already get nagios messages
|
||||||
|
define timeperiod{
|
||||||
|
timeperiod_name never
|
||||||
|
alias Never
|
||||||
|
}
|
||||||
|
|
||||||
|
# This defines a timeperiod where all times are valid for checks,
|
||||||
|
# notifications, etc. The classic "24x7" support nightmare. :-)
|
||||||
|
define timeperiod{
|
||||||
|
timeperiod_name 24x7
|
||||||
|
alias 24 Hours A Day, 7 Days A Week
|
||||||
|
sunday 00:00-24:00
|
||||||
|
monday 00:00-24:00
|
||||||
|
tuesday 00:00-24:00
|
||||||
|
wednesday 00:00-24:00
|
||||||
|
thursday 00:00-24:00
|
||||||
|
friday 00:00-24:00
|
||||||
|
saturday 00:00-24:00
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# 'workhours' timeperiod definition
|
||||||
|
define timeperiod{
|
||||||
|
timeperiod_name workhours
|
||||||
|
alias Normal Work Hours
|
||||||
|
monday 09:00-17:00
|
||||||
|
tuesday 09:00-17:00
|
||||||
|
wednesday 09:00-17:00
|
||||||
|
thursday 09:00-17:00
|
||||||
|
friday 09:00-17:00
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# 'none' timeperiod definition
|
||||||
|
define timeperiod{
|
||||||
|
timeperiod_name none
|
||||||
|
alias No Time Is A Good Time
|
||||||
|
}
|
||||||
|
|
||||||
|
# Some U.S. holidays
|
||||||
|
# Note: The timeranges for each holiday are meant to *exclude* the holidays from being
|
||||||
|
# treated as a valid time for notifications, etc. You probably don't want your pager
|
||||||
|
# going off on New Year's. Although you're employer might... :-)
|
||||||
|
define timeperiod{
|
||||||
|
name us-holidays
|
||||||
|
timeperiod_name us-holidays
|
||||||
|
alias U.S. Holidays
|
||||||
|
|
||||||
|
january 1 00:00-00:00 ; New Years
|
||||||
|
monday -1 may 00:00-00:00 ; Memorial Day (last Monday in May)
|
||||||
|
july 4 00:00-00:00 ; Independence Day
|
||||||
|
monday 1 september 00:00-00:00 ; Labor Day (first Monday in September)
|
||||||
|
thursday 4 november 00:00-00:00 ; Thanksgiving (4th Thursday in November)
|
||||||
|
december 25 00:00-00:00 ; Christmas
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# This defines a modified "24x7" timeperiod that covers every day of the
|
||||||
|
# year, except for U.S. holidays (defined in the timeperiod above).
|
||||||
|
define timeperiod{
|
||||||
|
timeperiod_name 24x7_sans_holidays
|
||||||
|
alias 24x7 Sans Holidays
|
||||||
|
|
||||||
|
use us-holidays ; Get holiday exceptions from other timeperiod
|
||||||
|
|
||||||
|
sunday 00:00-24:00
|
||||||
|
monday 00:00-24:00
|
||||||
|
tuesday 00:00-24:00
|
||||||
|
wednesday 00:00-24:00
|
||||||
|
thursday 00:00-24:00
|
||||||
|
friday 00:00-24:00
|
||||||
|
saturday 00:00-24:00
|
||||||
|
}
|
5
roles/nagios_server/files/nagios/contactgroups/bodhi.cfg
Normal file
5
roles/nagios_server/files/nagios/contactgroups/bodhi.cfg
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
define contactgroup {
|
||||||
|
contactgroup_name bodhi
|
||||||
|
alias Bodhi Notifications
|
||||||
|
members bowlofeggs
|
||||||
|
}
|
|
@ -0,0 +1,5 @@
|
||||||
|
#define contactgroup{
|
||||||
|
# contactgroup_name build-sysadmin-email
|
||||||
|
# alias Build Sysadmin Email Contacts
|
||||||
|
# members kevin,aditya
|
||||||
|
# }
|
|
@ -0,0 +1,5 @@
|
||||||
|
define contactgroup{
|
||||||
|
contactgroup_name fedora-sysadmin-email
|
||||||
|
alias Fedora Sysadmin Email Contacts
|
||||||
|
members admin,kevin,puiterwijkp,smooge,ausil,jcollie,nb,rigeld2,codeblock,hvivani
|
||||||
|
}
|
|
@ -0,0 +1,5 @@
|
||||||
|
define contactgroup{
|
||||||
|
contactgroup_name fedora-sysadmin-ircbot
|
||||||
|
alias Fedora Sysadmin irc Contacts
|
||||||
|
members ircbot,fedmsg
|
||||||
|
}
|
|
@ -0,0 +1,10 @@
|
||||||
|
define contactgroup{
|
||||||
|
contactgroup_name fedora-sysadmin-pager
|
||||||
|
alias Fedora Sysadmin Pager Contacts
|
||||||
|
members smoogep,kevinp,puiterwijkp
|
||||||
|
}
|
||||||
|
define contactgroup{
|
||||||
|
contactgroup_name fedora-sysadmin-emergency
|
||||||
|
alias Fedora Sysadmin Pager Contacts
|
||||||
|
members smooge-emergency,kevin-emergency,puiterwijk-emergency
|
||||||
|
}
|
5
roles/nagios_server/files/nagios/contactgroups/null.cfg
Normal file
5
roles/nagios_server/files/nagios/contactgroups/null.cfg
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
define contactgroup{
|
||||||
|
contactgroup_name null
|
||||||
|
alias null
|
||||||
|
members null
|
||||||
|
}
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue