put in the first run at new nagios configs

2017-01-05 00:55:16 +00:00 · 2017-01-05 00:55:16 +00:00 · 8cf72ff116
commit 8cf72ff116
parent a1957d29d4
310 changed files with 13255 additions and 26 deletions
--- a/inventory/inventory
+++ b/inventory/inventory
@ -1264,6 +1264,12 @@ docker-candidate-registry01.phx2.fedoraproject.org
 docker-registry01.stg.phx2.fedoraproject.org
 docker-candidate-registry01.stg.phx2.fedoraproject.org

+[webservers:children]
+proxies
+ipsilon
+ipa
+fas
+
 #
 # Hosts in this group have zombie processes for various reasons
 # and we want to not alert on those, so to the client nrpe.conf uses
@ -1276,3 +1282,4 @@ pkgs02.phx2.fedoraproject.org
 fed-cloud09.cloud.fedoraproject.org
 # Ansible from time to time in large runs has zombie threads
 batcave01.phx2.fedoraproject.org
+
--- a/roles/nagios/server/files/nagios-external/contacts/jstanley.cfg
+++ b/roles/nagios/server/files/nagios-external/contacts/jstanley.cfg
@ -35,4 +35,3 @@ define contact{
 	email				9178159801@vtext.com
 	pager				9178159801@vtext.com
 }
-
--- a/roles/nagios/server/files/nagios-external/contacts/nb.cfg
+++ b/roles/nagios/server/files/nagios-external/contacts/nb.cfg
@ -10,29 +10,29 @@ define contact{
 	email                           nick@bebout.net
 }

-define contact{
-	contact_name		nb-emergency
-	alias			Nick Bebout
-	service_notification_period     never
-	host_notification_period        never
-	service_notification_options    w,u,c,r
-	host_notification_options       d,u,r
-	service_notification_commands   notify-by-epager
-	host_notification_commands      host-notify-by-epager
-	email				nb5@txt.att.net
-	pager				nb5@txt.att.net
-}
+#define contact{
+#	contact_name		nb-emergency
+#	alias			Nick Bebout
+#	service_notification_period     never
+#	host_notification_period        never
+#	service_notification_options    w,u,c,r
+#	host_notification_options       d,u,r
+#	service_notification_commands   notify-by-epager
+#	host_notification_commands      host-notify-by-epager
+#	email				nb5@txt.att.net
+#	pager				nb5@txt.att.net
+#}

-define contact{
-	contact_name		nbp
-	alias			Nick Bebout
-	service_notification_period     never
-	host_notification_period        never
-	service_notification_options    w,u,c,r
-	host_notification_options       d,u,r
-	service_notification_commands   notify-by-epager
-	host_notification_commands      host-notify-by-epager
-	email				nb5@txt.att.net
-	pager				nb5@txt.att.net
-}
+#define contact{
+#	contact_name		nbp
+#	alias			Nick Bebout
+#	service_notification_period     never
+#	host_notification_period        never
+#	service_notification_options    w,u,c,r
+#	host_notification_options       d,u,r
+#	service_notification_commands   notify-by-epager
+#	host_notification_commands      host-notify-by-epager
+#	email				nb5@txt.att.net
+#	pager				nb5@txt.att.net
+#}

--- a/roles/nagios/server/files/nagios-external/contacts/skvidal.cfg
+++ b/roles/nagios/server/files/nagios-external/contacts/skvidal.cfg
@ -11,7 +11,19 @@
 #}
 #
 #define contact{
-#	contact_name		skvidalp
+#	contact_name		skvidal_xmpp
+#	alias			Seth Vidal
+#	service_notification_period     24x7
+#	host_notification_period        24x7
+#	service_notification_options    w,u,c,r
+#	host_notification_options       d,u,r
+#	service_notification_commands   notify-by-xmpp
+#	host_notification_commands      host-notify-by-xmpp
+#	email                           skvidal@jabber.org
+#}
+#
+#define contact{
+#	contact_name		skvidal-emergency
 #	alias			Seth Vidal
 #	service_notification_period     24x7
 #	host_notification_period        24x7
@ -20,5 +32,17 @@
 #	service_notification_commands   notify-by-epager
 #	host_notification_commands      host-notify-by-epager
 #	email				page-seth-vidal@sethdot.org
+#}
+#
+#define contact{
+#	contact_name		skvidalp
+#	alias			Seth Vidal
+#	service_notification_period     16x7
+#	host_notification_period        16x7
+#	service_notification_options    w,u,c,r
+#	host_notification_options       d,u,r
+#	service_notification_commands   notify-by-epager
+#	host_notification_commands      host-notify-by-epager
+#	email				page-seth-vidal@sethdot.org
 #	pager				page-seth-vidal@sethdot.org
 #}
--- a/roles/nagios_client/README.rst
+++ b/roles/nagios_client/README.rst
@ -0,0 +1,36 @@
+===================================
+ Nagios 4 Configuration for Fedora
+===================================
+
+The Fedora Infrastructure Nagios is built on a set of configurations
+originally written for Nagios 2 and then upgraded over time to Nagios
+3 and then 4.08. With additional changes made in the 4.2 series of
+Nagios this needed a better rewrite as various parts came from
+pre-puppet and then various puppet modules added on top. 
+
+In order to get this rewrite done, we will use as much of the original
+layout of the Fedora ansible nagios module but with rewrites to better
+match current Nagios configurations so that it can be maintained.
+
+Role directory layout
+=====================
+The original layout branched out from 
+
+  roles/nagios/client/
+  roles/nagios/server/
+
+With the usual trees below this. This breaks ansible best practices
+and how most new modules are set up so the rewrite uses:
+
+  roles/nagios_client/
+  roles/nagios_server/
+
+=====================
+ Nagios Client Files
+=====================
+
+For the most part the Nagios Client files seem to work from the
+original layout to the new site. Changes will only need to be made to
+playbooks for the initial changes.
+
+
--- a/roles/nagios_client/files/scripts/check_datanommer_timesince.py
+++ b/roles/nagios_client/files/scripts/check_datanommer_timesince.py
@ -0,0 +1,72 @@
+#!/usr/bin/env python
+""" NRPE check for datanommer/fedmsg health.
+Given a category like 'bodhi', 'buildsys', or 'git', return an error if
+datanommer hasn't seen a message of that type in such and such time.
+You can alternatively provide a 'topic' which might look like
+org.fedoraproject.prod.bodhi.update.comment.
+
+Requires:  python-dateutil
+
+Usage:
+
+    $ check_datanommer_timesince CATEGORY WARNING_THRESH CRITICAL_THRESH
+
+:Author: Ralph Bean <rbean@redhat.com>
+
+"""
+
+import dateutil.relativedelta
+import subprocess
+import sys
+import json
+
+
+def query_timesince(identifier):
+    # If it has a '.', then assume it is a topic.
+    if '.' in identifier:
+        cmd = 'datanommer-latest --topic %s --timesince' % identifier
+    else:
+        cmd = 'datanommer-latest --category %s --timesince' % identifier
+    sys.stderr.write("Running %r\n" % cmd)
+    process = subprocess.Popen(cmd.split(), shell=False,
+                               stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    stdout, stderr = process.communicate()
+    prefix, stdout = stdout.split("INFO] ", 1)
+    data = json.loads(stdout)
+    return float(data[0])
+
+
+def main():
+    identifier, warning_threshold, critical_threshold = sys.argv[-3:]
+    timesince = query_timesince(identifier)
+    warning_threshold = int(warning_threshold)
+    critical_threshold = int(critical_threshold)
+
+    time_strings = []
+    rd = dateutil.relativedelta.relativedelta(seconds=timesince)
+    for denomination in ['years', 'months', 'days', 'hours', 'minutes', 'seconds']:
+        value = getattr(rd, denomination, 0)
+        if value:
+            time_strings.append("%d %s" % (value, denomination))
+
+    string = ", ".join(time_strings)
+    reason = "datanommer has not seen a %r message in %s" % (identifier, string)
+
+    if timesince > critical_threshold:
+        print "CRIT: ", reason
+        sys.exit(2)
+
+    if timesince > warning_threshold:
+        print "WARN: ", reason
+        sys.exit(1)
+
+    print "OK: ", reason
+    sys.exit(0)
+
+
+if __name__ == '__main__':
+    try:
+        main()
+    except Exception as e:
+        print "UNKNOWN: ", str(e)
+        sys.exit(3)
--- a/roles/nagios_client/files/scripts/check_fcomm_queue
+++ b/roles/nagios_client/files/scripts/check_fcomm_queue
@ -0,0 +1,23 @@
+#!/usr/bin/env python
+import sys
+
+try:
+    import retask.queue
+
+    queue = retask.queue.Queue('fedora-packages')
+    queue.connect()
+
+    items = queue.length
+    if items > 500:
+        print "CRITICAL:  %i tasks in fcomm queue" % items
+        sys.exit(2)
+    elif items > 250:
+        print "WARNING:  %i tasks in fcomm queue" % items
+        sys.exit(1)
+    else:
+        print "OK:  %i tasks in fcomm queue" % items
+        sys.exit(0)
+
+except Exception as e:
+    print "UNKNOWN:", str(e)
+    sys.exit(3)
--- a/roles/nagios_client/files/scripts/check_fedmsg_consumer_backlog.py
+++ b/roles/nagios_client/files/scripts/check_fedmsg_consumer_backlog.py
@ -0,0 +1,62 @@
+#!/usr/bin/env python
+
+import json
+import os
+import socket
+import sys
+import zmq
+
+try:
+    service = sys.argv[1]
+    check_consumer = sys.argv[2]
+    backlog_warning = int(sys.argv[3])
+    backlog_critical = int(sys.argv[4])
+    fname = '/var/run/fedmsg/monitoring-%s.socket' % service
+
+    if not os.path.exists(fname):
+        print "UNKNOWN - %s does not exist" % fname
+        sys.exit(3)
+
+    if not os.access(fname, os.W_OK):
+        print "UNKNOWN - cannot write to %s" % fname
+        sys.exit(3)
+
+    connect_to = "ipc:///%s" % fname
+    ctx = zmq.Context()
+    s = ctx.socket(zmq.SUB)
+    s.connect(connect_to)
+    s.setsockopt(zmq.SUBSCRIBE, '')
+
+    poller = zmq.Poller()
+    poller.register(s, zmq.POLLIN)
+
+    timeout = 20000
+
+    events = dict(poller.poll(timeout))
+    if s in events and events[s] == zmq.POLLIN:
+        msg = s.recv()
+        msg = json.loads(msg)
+    else:
+       print 'UNKNOWN - ZMQ timeout.  No message received in %i ms' % timeout
+       sys.exit(3)
+
+    for consumer in msg['consumers']:
+        if consumer['name'] == check_consumer:
+            if consumer['backlog'] is None:
+                print 'ERROR: fedmsg consumer %s is not initialized' % consumer['name']
+                sys.exit(3)
+            elif consumer['backlog'] > backlog_critical:
+                print 'CRITICAL: fedmsg consumer %s backlog value is %i' % (consumer['name'],consumer['backlog'])
+                sys.exit(2)
+            elif consumer['backlog'] > backlog_warning:
+                print 'WARNING: fedmsg consumer %s backlog value is %i' % (consumer['name'],consumer['backlog'])
+                sys.exit(1)
+            else:
+                print 'OK: fedmsg consumer %s backlog value is %i' % (consumer['name'],consumer['backlog'])
+                sys.exit(0)
+
+    print "UNKNOWN: fedmsg consumer %s not found" % check_consumer
+    sys.exit(3)
+except Exception as err:
+    print "UNKNOWN:", str(err)
+    sys.exit(3)
--- a/roles/nagios_client/files/scripts/check_fedmsg_consumer_exceptions.py
+++ b/roles/nagios_client/files/scripts/check_fedmsg_consumer_exceptions.py
@ -0,0 +1,58 @@
+#!/usr/bin/env python
+
+import json
+import os
+import socket
+import sys
+import zmq
+
+try:
+    service = sys.argv[1]
+    check_consumer = sys.argv[2]
+    exceptions_warning = int(sys.argv[3])
+    exceptions_critical = int(sys.argv[4])
+    fname = '/var/run/fedmsg/monitoring-%s.socket' % service
+
+    if not os.path.exists(fname):
+        print "UNKNOWN - %s does not exist" % fname
+        sys.exit(3)
+
+    if not os.access(fname, os.W_OK):
+        print "UNKNOWN - cannot write to %s" % fname
+        sys.exit(3)
+
+    connect_to = "ipc:///%s" % fname
+    ctx = zmq.Context()
+    s = ctx.socket(zmq.SUB)
+    s.connect(connect_to)
+    s.setsockopt(zmq.SUBSCRIBE, '')
+    poller = zmq.Poller()
+    poller.register(s, zmq.POLLIN)
+
+    timeout = 20000
+
+    events = dict(poller.poll(timeout))
+    if s in events and events[s] == zmq.POLLIN:
+        msg = s.recv()
+        msg = json.loads(msg)
+    else:
+       print 'UNKNOWN - ZMQ timeout.  No message received in %i ms' % timeout
+       sys.exit(3)
+
+    for consumer in msg['consumers']:
+        if consumer['name'] == check_consumer:
+            if consumer['exceptions'] > exceptions_critical:
+                print 'CRITICAL: fedmsg consumer %s exceptions value is %i' % (consumer['name'],consumer['exceptions'])
+                sys.exit(2)
+            elif consumer['exceptions'] > exceptions_warning:
+                print 'WARNING: fedmsg consumer %s exceptions value is %i' % (consumer['name'],consumer['exceptions'])
+                sys.exit(1)
+            else:
+                print 'OK: fedmsg consumer %s exceptions value is %i' % (consumer['name'],consumer['exceptions'])
+                sys.exit(0)
+
+    print "UNKNOWN: fedmsg consumers %s not found" % check_consumer
+    sys.exit(3)
+except Exception as err:
+    print "UNKNOWN:", str(err)
+    sys.exit(3)
--- a/roles/nagios_client/files/scripts/check_fedmsg_producer_last_ran.py
+++ b/roles/nagios_client/files/scripts/check_fedmsg_producer_last_ran.py
@ -0,0 +1,69 @@
+#!/usr/bin/env python
+
+import arrow
+import json
+import os
+import socket
+import sys
+import time
+import zmq
+
+try:
+    service = sys.argv[1]
+    check_producer = sys.argv[2]
+    elapsed_warning = int(sys.argv[3])
+    elapsed_critical = int(sys.argv[4])
+    fname = '/var/run/fedmsg/monitoring-%s.socket' % service
+
+    if not os.path.exists(fname):
+        print "UNKNOWN - %s does not exist" % fname
+        sys.exit(3)
+
+    if not os.access(fname, os.W_OK):
+        print "UNKNOWN - cannot write to %s" % fname
+        sys.exit(3)
+
+    connect_to = "ipc:///%s" % fname
+    ctx = zmq.Context()
+    s = ctx.socket(zmq.SUB)
+    s.connect(connect_to)
+    s.setsockopt(zmq.SUBSCRIBE, '')
+
+    poller = zmq.Poller()
+    poller.register(s, zmq.POLLIN)
+
+    timeout = 20000
+
+    events = dict(poller.poll(timeout))
+    if s in events and events[s] == zmq.POLLIN:
+        msg = s.recv()
+        msg = json.loads(msg)
+    else:
+       print 'UNKNOWN - ZMQ timeout.  No message received in %i ms' % timeout
+       sys.exit(3)
+
+    now = time.time()
+
+    for prod in msg['producers']:
+        if prod['name'] != check_producer:
+            continue
+        diff = now - prod['last_ran']
+        then = arrow.get(prod['last_ran']).humanize()
+        if diff > elapsed_critical:
+            print "CRITICAL: %s last ran %s (%i seconds ago)" % (
+                check_producer, then, diff)
+            sys.exit(2)
+        elif diff > elapsed_warning:
+            print "WARNING: %s last ran %s (%i seconds ago)" % (
+                check_producer, then, diff)
+            sys.exit(1)
+        else:
+            print "OK: %s last ran %s (%i seconds ago)" % (
+                check_producer, then, diff)
+            sys.exit(0)
+
+    print "UNKNOWN: fedmsg producer %s not found" % check_producer
+    sys.exit(3)
+except Exception as err:
+    print "UNKNOWN:", str(err)
+    sys.exit(3)
--- a/roles/nagios_client/files/scripts/check_fedmsg_producers_consumers.py
+++ b/roles/nagios_client/files/scripts/check_fedmsg_producers_consumers.py
@ -0,0 +1,64 @@
+#!/usr/bin/env python
+
+import json
+import os
+import socket
+import sys
+import zmq
+
+try:
+    service = sys.argv[1]
+    check_list = frozenset(sys.argv[2:])
+    fname = '/var/run/fedmsg/monitoring-%s.socket' % service
+
+    if not check_list:
+        print "UNKNOWN - empty list of fedmsg consumers and producers to check"
+        sys.exit(3)
+
+    if not os.path.exists(fname):
+        print "UNKNOWN - %s does not exist" % fname
+        sys.exit(3)
+
+    if not os.access(fname, os.W_OK):
+        print "UNKNOWN - cannot write to %s" % fname
+        sys.exit(3)
+
+    connect_to = "ipc:///%s" % fname
+    ctx = zmq.Context()
+    s = ctx.socket(zmq.SUB)
+    s.connect(connect_to)
+    s.setsockopt(zmq.SUBSCRIBE, '')
+    poller = zmq.Poller()
+    poller.register(s, zmq.POLLIN)
+
+    timeout = 20000
+
+    events = dict(poller.poll(timeout))
+    if s in events and events[s] == zmq.POLLIN:
+        msg = s.recv()
+        msg = json.loads(msg)
+    else:
+       print 'UNKNOWN - ZMQ timeout.  No message received in %i ms' % timeout
+       sys.exit(3)
+
+    for consumer in msg['consumers']:
+        if consumer['name'] in check_list and not consumer['initialized']:
+            print 'ERROR: fedmsg consumer %s is not initialized' % consumer['name']
+            sys.exit(2)
+
+    for producer in msg['producers']:
+        if producer['name'] in check_list and not producer['initialized']:
+            print 'ERROR: fedmsg producer %s is not initialized' % producer['name']
+            sys.exit(2)
+
+    for item in check_list:
+        if item not in [p['name'] for p in msg['producers'] + msg['consumers']]:
+            print 'ERROR: %s not found among installed plugins' % item
+            sys.exit(2)
+
+    print "OK: fedmsg consumer(s) and producer(s) initialized"
+    sys.exit(0)
+
+except Exception as err:
+    print "UNKNOWN:", str(err)
+    sys.exit(3)
--- a/roles/nagios_client/files/scripts/check_haproxy_conns.py
+++ b/roles/nagios_client/files/scripts/check_haproxy_conns.py
@ -0,0 +1,76 @@
+#!/usr/bin/env python
+""" Nagios check for haproxy over-subscription.
+
+fedmsg-gateway is the primary concern as it can eat up a ton of simultaneous
+connections.
+
+:Author:  Ralph Bean <rbean@redhat.com>
+"""
+
+import socket
+import sys
+
+
+def _numeric(value):
+    """ Type casting utility """
+    try:
+        return int(value)
+    except ValueError:
+        try:
+            return float(value)
+        except ValueError:
+            return value
+
+
+def query(sockname="/var/run/haproxy-stat"):
+    """ Read stats from the haproxy socket and return a dict """
+    s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+    s.connect("/var/run/haproxy-stat")
+    s.send('show info\n')
+    try:
+        response = s.recv(1024).strip()
+        lines = response.split('\n')
+        data = dict([map(str.strip, line.split(':')) for line in lines])
+        data = dict([(k, _numeric(v)) for k, v in data.items()])
+        return data
+    except Exception, e:
+        print str(e)
+    finally:
+        s.close()
+
+    return None
+
+
+def nagios_check(data):
+    """ Print warnings and return nagios exit codes. """
+
+    current = data['CurrConns']
+    maxconn = data['Maxconn']
+    percent = 100 * float(current) / float(maxconn)
+    details = "%.2f%% subscribed.  %i current of %i maxconn." % (
+        percent, current, maxconn,
+    )
+
+    if percent < 50:
+        print "HAPROXY SUBS OK: " + details
+        return 0
+
+    if percent < 75:
+        print "HAPROXY SUBS WARN: " + details
+        return 1
+
+    if percent <= 100:
+        print "HAPROXY SUBS CRIT: " + details
+        return 2
+
+    print "HAPROXY SUBS UNKNOWN: " + details
+    return 3
+
+
+if __name__ == '__main__':
+    try:
+        data = query(sockname="/var/run/haproxy-stat")
+    except Exception as e:
+        print "HAPROXY SUBS UNKNOWN: " + str(e)
+        sys.exit(3)
+    sys.exit(nagios_check(data))
--- a/roles/nagios_client/files/scripts/check_haproxy_mirrorlist.py
+++ b/roles/nagios_client/files/scripts/check_haproxy_mirrorlist.py
@ -0,0 +1,59 @@
+#!/usr/bin/env python
+
+import socket
+import sys
+
+
+try:
+
+    unixsocket="/var/run/haproxy-stat"
+    s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+    s.connect(unixsocket)
+    s.send('show stat\n')
+
+    try:
+
+        output = s.recv(16384).strip().split('\n')
+	fields = output.pop(0).split(',')
+	fields[0]=fields[0].replace('# ','')
+	proxies = list()
+	for line in output:
+	  proxies.append(dict(zip(fields,line.split(','))))
+
+    except Exception, e:
+      print str(e)
+    finally:
+      s.close()
+
+except Exception as e:
+  print "MIRRORLIST STATE UNKNOWN: " + str(e)
+  sys.exit(3)
+
+total=0
+downcount=0
+downlist=""
+for proxy in proxies:
+ if proxy['svname'] == "FRONTEND" or proxy['svname'] == "BACKEND":
+   continue
+ if proxy['pxname'] == "mirror-lists":
+   total+=1
+   if proxy['status'] == "DOWN":
+     downlist+=proxy["svname"]+" "
+     downcount+=1
+
+unavailability = 100 * float(downcount) / float(total)
+
+if unavailability == 0:
+  print "MIRRORLIST STATE OK: " + downlist
+  sys.exit(0)
+
+if unavailability < 50:
+  print "MIRRORLIST STATE WARN: " + downlist
+  sys.exit(1)
+
+if unavailability >= 50:
+  print "MIRRORLIST STATE CRIT: " + downlist
+  sys.exit(2)
+
+print "MIRRORLIST STATE UNKNOWN: " + downlist
+sys.exit(3)
--- a/roles/nagios_client/files/scripts/check_ipa_replication
+++ b/roles/nagios_client/files/scripts/check_ipa_replication
@ -0,0 +1,74 @@
+#!/usr/bin/python 
+# Source: https://github.com/opinkerfi/nagios-plugins/blob/master/check_ipa/check_ipa_replication
+# Copyright 2013, Tomas Edwardsson 
+# Copyright 2016, Patrick Uiterwijk
+#
+# This script is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This script is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+import ldap
+from pynag.Plugins import PluginHelper, critical, warning, ok
+
+plugin = PluginHelper()
+
+plugin.parser.add_option('-u', help="ldap uri", dest="uri")
+plugin.parser.add_option('-D', help="bind DN", dest="binddn")
+plugin.parser.add_option('-w', help="bind password", dest="bindpw")
+plugin.parse_arguments()
+
+if not plugin.options.uri:
+    plugin.parser.error('-u (uri) argument is required')
+
+try:
+    l = ldap.initialize(plugin.options.uri)
+
+    if plugin.options.binddn:
+        l.bind_s(plugin.options.binddn, plugin.options.bindpw)
+
+    replication = l.search_s('cn=config', 
+        ldap.SCOPE_SUBTREE, 
+        '(objectclass=nsds5replicationagreement)',
+        ['nsDS5ReplicaHost', 'nsds5replicaLastUpdateStatus'])
+except Exception, e:
+    plugin.status(critical)
+    plugin.add_summary("Unable to initialize ldap connection: %s" % (e))
+    plugin.exit()
+
+
+# Loop through replication agreements
+for rhost in replication:
+    plugin.add_summary("Replica %s Status: %s" % (rhost[1]['nsDS5ReplicaHost'][0], rhost[1]['nsds5replicaLastUpdateStatus'][0]))
+
+    status = rhost[1]['nsds5replicaLastUpdateStatus'][0]
+    code = status[:2]
+    if status.startswith('Error ('):
+        # IPA >=4.4.0
+        code = status[status.find('(')+1:status.find(')')]
+    else:
+        # IPA <4.4.0
+        code = status[:status.find(' ')]
+
+    if code == '0':
+        plugin.status(ok)
+    elif code == '1':
+        # Busy Replica is not an error, its "unknown" (but its "ok" for now)
+        plugin.status(ok)
+    else:
+        plugin.status(critical)
+
+if not len(replication):
+    plugin.add_summary("Warning: No replicas found")
+    plugin.status(warning)
+
+plugin.exit()
+
--- a/roles/nagios_client/files/scripts/check_lock
+++ b/roles/nagios_client/files/scripts/check_lock
@ -0,0 +1,17 @@
+#!/usr/bin/python
+
+import fcntl
+import sys
+
+try:
+    f = open('/mnt/koji/.nagios_test', 'r')
+    f.close()
+    f = open('/mnt/koji/.nagios_test', 'w')
+except IOError:
+    print "Could not create file"
+    sys.exit(2)
+    
+fcntl.flock(f, fcntl.LOCK_EX)
+f.close()
+print "File Locked Successfully"
+sys.exit(0)
--- a/roles/nagios_client/files/scripts/check_lock_file_age
+++ b/roles/nagios_client/files/scripts/check_lock_file_age
@ -0,0 +1,123 @@
+#! /usr/bin/perl -w
+
+# check_lock_file_age.pl Copyright (C) 2010 Ricky Elrod <codeblock@fedoraproject.org>
+#
+# Fork of check_file_age.pl
+#
+# Checks a lock file's size and modification time to make sure it's not empty
+# and that it's sufficiently recent.
+#
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty
+# of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# you should have received a copy of the GNU General Public License
+# along with this program (or with Nagios);  if not, write to the
+# Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+# Boston, MA 02111-1307, USA
+
+use strict;
+use English;
+use Getopt::Long;
+use File::stat;
+use vars qw($PROGNAME);
+use lib "/usr/lib64/nagios/plugins";
+use utils qw (%ERRORS &print_revision &support);
+
+sub print_help ();
+sub print_usage ();
+
+my ($opt_c, $opt_f, $opt_w, $opt_h, $opt_V);
+my ($result, $message, $age, $size, $st);
+
+$PROGNAME="check_lock_file_age";
+
+$opt_w = 1;
+$opt_c = 5;
+$opt_f = "";
+
+Getopt::Long::Configure('bundling');
+GetOptions(
+	"V"   => \$opt_V, "version"	=> \$opt_V,
+	"h"   => \$opt_h, "help"	=> \$opt_h,
+	"f=s" => \$opt_f, "file"	=> \$opt_f,
+	"w=f" => \$opt_w, "warning-age=f" => \$opt_w,
+	"c=f" => \$opt_c, "critical-age=f" => \$opt_c);
+
+if ($opt_V) {
+	print_revision($PROGNAME, '1.4.14');
+	exit $ERRORS{'OK'};
+}
+
+if ($opt_h) {
+	print_help();
+	exit $ERRORS{'OK'};
+}
+
+if (($opt_c and $opt_w) and ($opt_c < $opt_w)) {
+        print "Warning time must be less than Critical time.\n";
+        exit $ERRORS{'UNKNOWN'};
+}
+
+$opt_f = shift unless ($opt_f);
+
+if (! $opt_f) {
+	print "LOCK_FILE_AGE UNKNOWN: No file specified\n";
+	exit $ERRORS{'UNKNOWN'};
+}
+
+# Check that file exists (can be directory or link)
+unless (-e $opt_f) {
+	print "LOCK_FILE_AGE OK: File not found (Lock file removed) - $opt_f\n";
+	exit $ERRORS{'OK'};
+}
+
+$st = File::stat::stat($opt_f);
+$age = time - $st->mtime;
+
+$result = 'OK';
+
+# Convert minutes to seconds
+if($opt_c) { $opt_c *= 60; }
+if($opt_w) { $opt_w *= 60; }
+
+if ($opt_c and $age > $opt_c) {
+	$result = 'CRITICAL';
+}
+elsif ($opt_w and $age > $opt_w) {
+	$result = 'WARNING';
+}
+
+# If the age is higher than 2 minutes, convert seconds -> minutes
+# If it's higher than a day, use days.
+# Just a nicety, to make people not have to do math ;)
+if($age > 86400) { $age = int(($age/86400))." days"; }
+elsif($age > 120) { $age = int(($age/60))." minutes"; }
+else { $age = "$age seconds"; }
+
+print "LOCK_FILE_AGE $result: $opt_f is $age old.\n";
+exit $ERRORS{$result};
+
+sub print_usage () {
+	print "Usage:\n";
+	print "  $PROGNAME [-w <secs>] [-c <secs>] -f <file>\n";
+	print "  $PROGNAME [-h | --help]\n";
+	print "  $PROGNAME [-V | --version]\n";
+}
+
+sub print_help () {
+	print_revision($PROGNAME, '1.4.14');
+	print "Copyright (c) 2010 Ricky Elrod\n\n";
+	print_usage();
+	print "\n";
+	print "  <mins>  File must be no more than this many minutes old (default: warn 1m, crit 5m)\n";
+	print "\n";
+	support();
+}
--- a/roles/nagios_client/files/scripts/check_memcache_connect
+++ b/roles/nagios_client/files/scripts/check_memcache_connect
@ -0,0 +1,24 @@
+#!/bin/bash
+#
+# 2014-11-19
+# Author: Ralph Bean <rbean@redhat.com>
+
+# exit codes
+ok=0
+warn=1
+crit=2
+unkn=3
+
+# Right now we just check to see if we can even run this command without
+# hanging and timing out.  In the future, we could parse stdout for more
+# fine-grained information.
+echo stats | nc 127.0.0.1 11211 > /dev/null
+status=$?
+
+if [ $status -ne 0 ]; then
+    echo "CRIT:  stats command got status code $status"
+    exit $crit
+else
+    echo "OK:  stats command got status code $status"
+    exit $ok
+fi
--- a/roles/nagios_client/files/scripts/check_osbs_api.py
+++ b/roles/nagios_client/files/scripts/check_osbs_api.py
@ -0,0 +1,14 @@
+#!/usr/bin/python
+
+import requests
+import sys
+
+r =  requests.get("https://localhost:8443/", verify=False)
+
+if 'paths' in r.json().keys():
+    print "OK: OSBS API endpoint is responding with path data"
+    sys.exit(0)
+else:
+    print "CRITICAL: OSBS API not responding properly"
+    sys.exit(2)
+
--- a/roles/nagios_client/files/scripts/check_osbs_builds.py
+++ b/roles/nagios_client/files/scripts/check_osbs_builds.py
@ -0,0 +1,23 @@
+#!/usr/bin/python
+
+import subprocess
+import sys
+
+sp = subprocess.Popen(
+    ["osbs", "list-builds"],
+    stdout=subprocess.PIPE,
+    stderr=subprocess.PIPE,
+    stdin=subprocess.PIPE
+)
+sp_out, sp_err = sp.communicate()
+sp_err = sp_err.split('\n')
+
+if 'not attached to terminal' in sp_err[0]:
+    sp_err = sp_err[1:]
+
+if sp_err[0].split()[0] == 'BUILD':
+    print "OK: OSBS is responsive to 'osbs list-builds'"
+    sys.exit(0)
+else:
+    print "CRITICAL: OSBS UNRESPONSIVE"
+    sys.exit(2)
--- a/roles/nagios_client/files/scripts/check_postfix_queue
+++ b/roles/nagios_client/files/scripts/check_postfix_queue
@ -0,0 +1,49 @@
+#!/bin/bash
+#
+# 19-07-2010
+# Author: Cherwin Nooitmeer <cherwin@gmail.com>
+#
+
+# exit codes
+e_ok=0
+e_warning=1
+e_critical=2
+e_unknown=3
+
+# regular expression that matches queue IDs (e.g. D71EF7AC80F8)
+queue_id='^[A-F0-9][A-F0-9][A-F0-9][A-F0-9][A-F0-9][A-F0-9][A-F0-9][A-F0-9][A-F0-9][A-F0-9]'
+
+usage="Invalid command line usage"
+
+if [ -z $1 ]; then
+    echo $usage
+    exit $e_unknown
+fi
+
+while getopts ":w:c:" options
+do
+    case $options in
+        w ) warning=$OPTARG ;;
+        c ) critical=$OPTARG ;;
+        * ) echo $usage
+            exit $e_unknown ;;
+    esac
+done
+
+# determine queue size
+qsize=$(mailq | egrep -c $queue_id)
+if [ -z $qsize ]
+then
+    exit $e_unknown
+fi
+
+if [ $qsize -ge $critical ]; then
+    retval=$e_critical
+elif [ $qsize -ge $warning ]; then
+    retval=$e_warning
+elif [ $qsize -lt $warning ]; then
+    retval=$e_ok
+fi
+
+echo "$qsize mail(s) in queue | mail_queue=$qsize"
+exit $retval
--- a/roles/nagios_client/files/scripts/check_rabbitmq_size
+++ b/roles/nagios_client/files/scripts/check_rabbitmq_size
@ -0,0 +1,26 @@
+#!/bin/python
+import sys
+import requests
+
+url = 'http://localhost:15672/api/queues/%%2f/%s' % (sys.argv[1])
+
+r = requests.get(url, auth=('guest', 'guest')).json()
+consumers = r['consumers']
+messages = r['messages']
+
+msg = 'Messages in queue: %i (%i consumers)' % (messages, consumers)
+
+if consumers < 1:
+    print 'CRITICAL: No consumers: %s' % msg
+    sys.exit(2)
+
+if messages > sys.argv[2]:
+    print 'CRITICAL: %s' % msg
+    sys.exit(2)
+
+if messages > sys.argv[3]:
+    print 'WARNING: %s' % msg
+    sys.exit(1)
+
+print 'OK: %s' % msg
+sys.exit(0)
--- a/roles/nagios_client/files/scripts/check_raid.py
+++ b/roles/nagios_client/files/scripts/check_raid.py
@ -0,0 +1,45 @@
+#!/usr/bin/env python
+#
+# very simple python script to parse out /proc/mdstat
+# and give results for nagios to monitor
+#
+
+import sys
+import string
+
+devices = []
+
+try:
+    mdstat = string.split(open('/proc/mdstat').read(), '\n')
+except IOError:
+    # seems we have no software raid on this machines
+    sys.exit(0)
+
+error = ""
+i = 0
+for line in mdstat:
+    if line[0:2] == 'md':
+        device = string.split(line)[0]
+        devices.append(device)
+        status = string.split(mdstat[i+1])[3]
+        if string.count(status, "_"):
+            # see if we can figure out what's going on
+            err = string.split(mdstat[i+2])
+            msg = "device=%s status=%s" % (device, status)
+            if len(err) > 0:
+                msg = msg + " rebuild=%s" % err[0]
+
+            if not error:
+                error = msg
+            else:
+                error = error + ", " + msg
+    i = i + 1
+
+if not error:
+    print "DEVICES %s OK" % " ".join(devices)
+    sys.exit(0)
+
+else:
+    print error
+    sys.exit(2)
+
--- a/roles/nagios_client/files/scripts/check_readonly_fs
+++ b/roles/nagios_client/files/scripts/check_readonly_fs
@ -0,0 +1,84 @@
+#!/bin/bash
+
+# check_readonlyfs: Check for readonly filesystems
+# Copyright (C) 2010 Davide Madrisan <davide.madrisan@gmail.com>
+
+PROGNAME=`/bin/basename $0`
+PROGPATH=`echo $0 | sed -e 's,[\\/][^\\/][^\\/]*$,,'`
+REVISION=`echo '$Revision: 1 $' | sed -e 's/[^0-9.]//g'`
+
+. $PROGPATH/utils.sh
+
+print_usage() {
+    echo "Usage: $PROGNAME --no-network-fs"
+    echo "Usage: $PROGNAME --help"
+    echo "Usage: $PROGNAME --version"
+}
+
+print_help() {
+    print_revision $PROGNAME $REVISION
+    echo ""
+    print_usage
+    echo ""
+    echo "readonly filesystem checker plugin for Nagios"
+    echo ""
+    support
+}
+
+NETFS=1
+
+# Grab the command line arguments
+
+exitstatus=$STATE_WARNING #default
+
+while test -n "$1"; do
+    case "$1" in
+        --help|-h)
+            print_help
+            exit $STATE_OK
+            ;;
+        --version|-V)
+            print_revision $PROGNAME $REVISION
+            exit $STATE_OK
+            ;;
+        --no-network-fs|-n)
+            NETFS="0"
+            ;;
+        *)
+            echo "Unknown argument: $1"
+            print_usage
+            exit $STATE_UNKNOWN
+            ;;
+    esac
+    shift
+done
+
+[ -r /proc/mounts ] || { echo "cannot read /proc/mounts!"; exit $STATE_UNKNOWN; }
+
+nerr=0
+IFS_SAVE="$IFS"
+
+rofs_list=""
+while read dev mp fs mopt ignore; do
+   [ "$dev" = none ] && continue
+   case $fs in binfmt_misc|devpts|iso9660|proc|selinuxfs|rpc_pipefs|sysfs|tmpfs|usbfs)
+      continue ;;
+   esac
+   case $fs in autofs|nfs|nfs4|smbfs)
+      # skip the network filesystems
+      [ "$NETFS" = 0 ] && continue ;;
+   esac
+
+   IFS=","; set -- $mopt; IFS="$IFS_SAVE"
+   while :; do
+   case "$1" in
+      ro) rofs_list="$rofs_list $mp"; nerr=$(( $nerr + 1 )) ;;
+      "") shift; break ;;
+   esac
+   shift
+   done
+done < <(LC_ALL=C /bin/cat /proc/mounts 2>/dev/null)
+
+[ $nerr -eq 0 ] && { echo OK; exit $STATE_OK; } || echo "$rofs_list: read only fs"
+
+exit $exitstatus
--- a/roles/nagios_client/files/scripts/check_supybot_plugin
+++ b/roles/nagios_client/files/scripts/check_supybot_plugin
@ -0,0 +1,108 @@
+#!/usr/bin/env python
+""" check_supybot_plugin -- ensure that a plugin is loaded by supybot.
+
+Run like:
+
+    check_supybot_plugin --target fedmsg
+    check_supybot_plugin --target koji --debug
+
+"""
+
+import argparse
+import sys
+import socket
+import string
+import uuid
+
+
+def process_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '-t', '--target', default=None, dest='target',
+        help="Required.  The plugin we're looking for."
+    )
+    parser.add_argument(
+        '-n', '--nick', default=None, dest='nick',
+        help="NICK to use when connecting to freenode.",
+    )
+    parser.add_argument(
+        '-d', '--debug', default=False, action='store_true',
+        help='Print out debug information.', dest='debug',
+    )
+    parser.add_argument(
+        '-H', '--host', default='irc.freenode.net',
+        help='Host to connect to.', dest='host',
+    )
+    parser.add_argument(
+        '-p', '--port', default=6667, type=int,
+        help='Host to connect to.', dest='port',
+    )
+    return parser.parse_args()
+
+args = process_args()
+
+# Use a random nick so people can't mess with us
+if not args.nick:
+    args.nick = 'nrpe-' + str(uuid.uuid4()).split('-')[0]
+
+name = "NRPE Bot"
+readbuffer = ""
+
+if not args.target:
+    print "UNKNOWN:  No 'target' specified."
+    sys.exit(3)
+
+args.target = args.target.lower()
+
+if args.debug:
+    print "connecting to %s/%i" % (args.host, args.port)
+
+try:
+    s = socket.socket()
+    s.connect((args.host, args.port))
+
+    if args.debug:
+        print "as %s/%s (%s)" % (args.nick, args.nick, name)
+
+    s.send("nick %s\r\n" % args.nick)
+    s.send("USER %s %s bla :%s\r\n" % (args.nick, args.host, name))
+
+    while 1:
+        readbuffer = readbuffer+s.recv(1024)
+        temp = string.split(readbuffer, "\n")
+        readbuffer = temp.pop()
+
+        for line in temp:
+            line = string.rstrip(line)
+
+            if args.debug:
+                print " * ", line
+
+            line = string.split(line)
+
+            if line[1] == 'MODE':
+                msg = "privmsg zodbot :list\r\n"
+                if args.debug:
+                    print "sending:"
+                    print " ->", msg
+                s.send(msg)
+
+            if line[1] == 'PRIVMSG':
+                if args.debug:
+                    print "Got our response.."
+
+                plugins = map(str.lower, ' '.join(line[3:][1:]).split(', '))
+
+                if args.target in plugins:
+                    print "OK"
+                    s.send("QUIT")
+                    sys.exit(0)
+                else:
+                    print "CRITICAL: %r not loaded by supybot" % args.target
+                    s.send("QUIT")
+                    sys.exit(2)
+except Exception as e:
+    print "UNKNOWN: ", str(e)
+    if args.debug:
+        raise
+    sys.exit(3)
--- a/roles/nagios_client/files/scripts/check_testcloud
+++ b/roles/nagios_client/files/scripts/check_testcloud
@ -0,0 +1,19 @@
+#!/bin/bash
+
+RUNNING_VMS=`testcloud instance list | grep -i 'running' | wc -l`
+CRITICAL=20
+WARNING=15
+
+
+if [ $RUNNING_VMS -gt $CRITICAL ]
+then
+    echo "Testcloud: CRITICAL Number of VMs running: $RUNNING_VMS"
+    exit 2
+elif [ $RUNNING_VMS -gt $WARNING ]
+then
+    echo "Testcloud: WARNING Number of VMs running: $RUNNING_VMS"
+    exit 1
+else
+    echo "Testcloud: OK Number of VMs running: $RUNNING_VMS"
+    exit 0
+fi
--- a/roles/nagios_client/files/selinux/fi-nrpe.mod
+++ b/roles/nagios_client/files/selinux/fi-nrpe.mod
--- a/roles/nagios_client/files/selinux/fi-nrpe.pp
+++ b/roles/nagios_client/files/selinux/fi-nrpe.pp
--- a/roles/nagios_client/files/selinux/fi-nrpe.te
+++ b/roles/nagios_client/files/selinux/fi-nrpe.te
@ -0,0 +1,11 @@
+module fi-nrpe 1.0;
+
+require {
+    type nagios_system_plugin_t;
+    type nrpe_exec_t;
+    class file getattr;
+}
+
+#============= nagios_system_plugin_t ==============
+allow nagios_system_plugin_t nrpe_exec_t:file getattr;
+
--- a/roles/nagios_client/handlers/main.yml
+++ b/roles/nagios_client/handlers/main.yml
@ -0,0 +1,3 @@
+---
+- name: restart nrpe
+  service: name=nrpe state=restarted
--- a/roles/nagios_client/tasks/main.yml
+++ b/roles/nagios_client/tasks/main.yml
@ -0,0 +1,228 @@
+# nagios-client/nrpe
+
+---
+# install pkgs:
+- name: install nagios client pkgs
+  yum: name={{ item }} state=present
+  with_items:
+  - nrpe
+  - nagios-plugins
+  - nagios-plugins-disk
+  - nagios-plugins-file_age
+  - nagios-plugins-users
+  - nagios-plugins-procs
+  - nagios-plugins-swap
+  - nagios-plugins-load
+  - nagios-plugins-ping
+  tags:
+  - packages
+  - nagios_client
+  when: ansible_distribution_major_version|int < 22
+
+# install pkgs:
+- name: install nagios client pkgs
+  dnf: name={{ item }} state=present
+  with_items:
+  - nrpe
+  - nagios-plugins
+  - nagios-plugins-disk
+  - nagios-plugins-file_age
+  - nagios-plugins-users
+  - nagios-plugins-procs
+  - nagios-plugins-swap
+  - nagios-plugins-load
+  - nagios-plugins-ping
+  tags:
+  - packages
+  - nagios_client
+  when: ansible_distribution_major_version|int > 21
+
+- name: install local nrpe check scripts that are not packaged
+  copy: src="scripts/{{ item }}" dest="{{ libdir }}/nagios/plugins/{{ item }}" mode=0755 owner=nagios group=nagios
+  with_items:
+  - check_haproxy_conns.py
+  - check_haproxy_mirrorlist.py
+  - check_postfix_queue
+  - check_raid.py
+  - check_lock
+  - check_fcomm_queue
+  - check_fedmsg_consumer_backlog.py
+  - check_fedmsg_consumer_exceptions.py
+  - check_fedmsg_producer_last_ran.py
+  - check_fedmsg_producers_consumers.py
+  - check_supybot_plugin
+  - check_rabbitmq_size
+  - check_datanommer_timesince.py
+  - check_memcache_connect
+  - check_readonly_fs
+  - check_lock_file_age
+  - check_testcloud
+  - check_osbs_builds.py
+  - check_osbs_api.py
+  - check_ipa_replication
+  when: not inventory_hostname.startswith('noc')
+  tags:
+  - nagios_client
+
+# create dirs
+# puppet used to make /var/spool/nagios (owned by nagios.nagios) mode 750
+# and /usr/lib/nagios/plugins (owned by root) mode 755 - but we don't know WHY
+# then stuff it with plugins from the plugins dir in the nagios module
+# then we symlinked that to /usr/lib64/nagios/plugins
+# it was a nightmare - don't do that - my ghost will haunt you if you do
+# skvidal 2013-05-21
+
+
+# Three tasks for handling our custom selinux module
+- name: ensure a directory exists for our custom selinux module
+  file: dest=/usr/share/nrpe state=directory
+
+- name: copy over our custom selinux module
+  copy: src=selinux/fi-nrpe.pp dest=/usr/share/nrpe/fi-nrpe.pp
+  register: selinux_module
+
+- name: install our custom selinux module
+  command: semodule -i /usr/share/nrpe/fi-nrpe.pp
+  when: ansible_distribution_major_version|int == 7 and selinux_module|changed
+
+
+# Set up our base config.
+- name: /etc/nagios/nrpe.cfg
+  template: src=nrpe.cfg.j2 dest=/etc/nagios/nrpe.cfg
+  when: not inventory_hostname.startswith('noc')
+  notify:
+  - restart nrpe
+  tags:
+  - config
+  - nagios_client
+
+#
+# The actual items files here end in .j2 (they are templates)
+# So when adding or modifying them change the .j2 version in git.
+#
+- name: install nrpe client configs
+  template: src={{ item }}.j2 dest=/etc/nrpe.d/{{ item }}
+  with_items:
+  - check_mirrorlist_cache.cfg
+  - check_raid.cfg
+  - check_ipa.cfg
+  - check_readonly_fs.cfg
+  - check_cron.cfg
+  - check_disk.cfg
+  - check_swap.cfg
+  - check_postfix_queue.cfg
+  - check_lock.cfg
+  - check_fedmsg_hub_proc.cfg
+  - check_fedmsg_irc_proc.cfg
+  - check_fedmsg_relay_proc.cfg
+  - check_fedmsg_gateway_proc.cfg
+  - check_fedmsg_masher_proc.cfg
+  - check_redis_proc.cfg
+  - check_autocloud_proc.cfg
+  - check_fedmsg_consumers.cfg
+  - check_supybot_fedmsg_plugin.cfg
+  - check_datanommer_history.cfg
+  - check_memcache.cfg
+  - check_lock_file_age.cfg
+  - check_basset.cfg
+  - check_fmn.cfg
+  - check_osbs.cfg
+  - check_koschei_polling_proc.cfg
+  - check_koschei_resolver_proc.cfg
+  - check_koschei_scheduler_proc.cfg
+  - check_koschei_watcher_proc.cfg
+  - check_testcloud.cfg
+  notify:
+  - restart nrpe
+  tags:
+  - config
+  - nagios_client
+
+#
+# The actual items files here end in .j2 (they are templates)
+# So when adding or modifying them change the .j2 version in git.
+#
+- name: install nrpe bugyou fedmsg hubs check config
+  template: src=check_fedmsg_hub_procs_bugyou.cfg.j2 dest=/etc/nrpe.d/check_fedmsg_hub_procs_bugyou.cfg
+  when: inventory_hostname.startswith('bugyou01')
+  notify:
+  - restart nrpe
+  tags:
+  - nagios_client
+
+#
+# The actual items files here end in .j2 (they are templates)
+# So when adding or modifying them change the .j2 version in git.
+#
+- name: install nrpe openvpn check config
+  template: src=check_openvpn_link.cfg.j2 dest=/etc/nrpe.d/check_openvpn_link.cfg
+  when: datacenter != 'phx2'
+  notify:
+  - restart nrpe
+  tags:
+  - nagios_client
+#
+# The actual items files here end in .j2 (they are templates)
+# So when adding or modifying them change the .j2 version in git.
+#
+- name: install nrpe unbound check config
+  template: src=check_unbound_proc.cfg.j2 dest=/etc/nrpe.d/check_unbound_proc.cfg
+  when: inventory_hostname.startswith('unbound')
+  notify:
+  - restart nrpe
+  tags:
+  - nagios_client
+#
+# The actual items files here end in .j2 (they are templates)
+# So when adding or modifying them change the .j2 version in git.
+#
+- name: install nrpe merged log check script on log01
+  template: src=check_merged_file_age.cfg.j2 dest=/etc/nrpe.d/check_merged_file_age.cfg
+  when: inventory_hostname.startswith('log0')
+  notify:
+  - restart nrpe
+  tags:
+  - nagios_client
+#
+# The actual items files here end in .j2 (they are templates)
+# So when adding or modifying them change the .j2 version in git.
+#
+- name: install nrpe check_mysql config for mariadb servers
+  template: src=check_mysql.cfg.j2 dest=/etc/nrpe.d/check_mysql.cfg
+  when: inventory_hostname.startswith('db03')
+  notify:
+  - restart nrpe
+  tags:
+  - nagios_client
+
+- name: install nrpe checks for proxies
+  template: src={{ item }}.j2 dest=/etc/nrpe.d/{{ item }}
+  with_items:
+  - check_happroxy_conns.cfg
+  - check_happroxy_mirrorlist.cfg
+  - check_varnish_proc.cfg
+  when: inventory_hostname.startswith('proxy')
+  notify:
+  - restart nrpe
+  tags:
+  - nagios_client
+
+- name: nrpe service start
+  service: name=nrpe state=running enabled=true
+  tags:
+  - service
+  - nagios_client
+
+- name: Check if the fedmsg group exists
+  shell: /usr/bin/getent group fedmsg | /usr/bin/wc -l | tr -d ' '
+  register: fedmsg_exists
+  check_mode: no
+  changed_when: "1 != 1"
+  tags:
+  - nagios_client
+
+- name: Add nrpe user to the fedmsg group if it exists
+  user: name=nrpe groups=fedmsg append=yes
+  when: fedmsg_exists.stdout == "1"
+  tags:
+  - nagios_client
--- a/roles/nagios_client/templates/check_autocloud_proc.cfg.j2
+++ b/roles/nagios_client/templates/check_autocloud_proc.cfg.j2
@ -0,0 +1 @@
+command[check_autocloud_proc]=/usr/lib64/nagios/plugins/check_procs -c 1:1 -C 'python' -a 'autocloud_job.py' -u root
--- a/roles/nagios_client/templates/check_basset.cfg.j2
+++ b/roles/nagios_client/templates/check_basset.cfg.j2
@ -0,0 +1,4 @@
+command[check_mongo_proc]={{ libdir }}/nagios/plugins/check_procs -s RSD -u mongodb -C mongod -c 1:1
+command[check_rabbitmq_proc]={{ libdir }}/nagios/plugins/check_procs -s RSD -u rabbitmq -C beam.smp -c 1:1
+command[check_worker_proc]={{ libdir }}/nagios/plugins/check_procs -s RSD -u basset-worker -C basset-worker -c 1:6
+command[check_basset_queue]={{ libdir }}/nagios/plugins/check_rabbitmq_size check_submission 10 20
--- a/roles/nagios_client/templates/check_cron.cfg.j2
+++ b/roles/nagios_client/templates/check_cron.cfg.j2
@ -0,0 +1 @@
+command[check_cron]={{ libdir }}/nagios/plugins/check_procs -c 1:15 -C 'crond' -u root
--- a/roles/nagios_client/templates/check_datanommer_history.cfg.j2
+++ b/roles/nagios_client/templates/check_datanommer_history.cfg.j2
@ -0,0 +1,50 @@
+# Checks on the datanommer history to make sure we're still receiving messages
+# of all types.
+#
+# The following are fedmsg/datanommer checks to be run on busgateway01.
+# They check for the time since the latest message in any particular category.
+# The first number is the seconds elapsed until we should raise a warning.
+# The second number is the seconds elapsed until we should raise an error.
+#   For your reference:
+#       4 hours ->    14400
+#       1 day ->      86400
+#       3 days ->     259200
+#       1 week ->     604800
+#       3 weeks ->    1814400
+#       1 month ->    2628000
+#       3 months ->   7884000
+command[check_datanommer_buildsys]={{libdir}}/nagios/plugins/check_datanommer_timesince.py buildsys 14400 86400
+command[check_datanommer_git]={{libdir}}/nagios/plugins/check_datanommer_timesince.py git 86400 604800
+command[check_datanommer_bodhi]={{libdir}}/nagios/plugins/check_datanommer_timesince.py bodhi 86400 604800
+command[check_datanommer_wiki]={{libdir}}/nagios/plugins/check_datanommer_timesince.py wiki 259200 1814400
+command[check_datanommer_compose]={{libdir}}/nagios/plugins/check_datanommer_timesince.py compose 259200 1814400
+command[check_datanommer_meetbot]={{libdir}}/nagios/plugins/check_datanommer_timesince.py meetbot 604800 2628000
+command[check_datanommer_fas]={{libdir}}/nagios/plugins/check_datanommer_timesince.py fas 1814400 2628000
+command[check_datanommer_pkgdb]={{libdir}}/nagios/plugins/check_datanommer_timesince.py pkgdb 1814400 2628000
+command[check_datanommer_fedoratagger]={{libdir}}/nagios/plugins/check_datanommer_timesince.py fedoratagger 2628000 7884000
+command[check_datanommer_planet]={{libdir}}/nagios/plugins/check_datanommer_timesince.py planet 2628000 7884000
+command[check_datanommer_copr]={{libdir}}/nagios/plugins/check_datanommer_timesince.py copr 21600 86400
+command[check_datanommer_trac]={{libdir}}/nagios/plugins/check_datanommer_timesince.py trac 86400 259200
+command[check_datanommer_askbot]={{libdir}}/nagios/plugins/check_datanommer_timesince.py askbot 86400 259200
+command[check_datanommer_fedbadges]={{libdir}}/nagios/plugins/check_datanommer_timesince.py fedbadges 86400 259200
+command[check_datanommer_fedocal]={{libdir}}/nagios/plugins/check_datanommer_timesince.py fedocal 7884000 23652000
+command[check_datanommer_ansible]={{libdir}}/nagios/plugins/check_datanommer_timesince.py ansible 432000 604800
+command[check_datanommer_summershum]={{libdir}}/nagios/plugins/check_datanommer_timesince.py summershum 604800 1814400
+command[check_datanommer_jenkins]={{libdir}}/nagios/plugins/check_datanommer_timesince.py jenkins 432000 604800
+command[check_datanommer_github]={{libdir}}/nagios/plugins/check_datanommer_timesince.py github 432000 604800
+command[check_datanommer_kerneltest]={{libdir}}/nagios/plugins/check_datanommer_timesince.py kerneltest 604800 1814400
+command[check_datanommer_fmn]={{libdir}}/nagios/plugins/check_datanommer_timesince.py fmn 604800 1814400
+command[check_datanommer_anitya]={{libdir}}/nagios/plugins/check_datanommer_timesince.py anitya 604800 1814400
+command[check_datanommer_fedimg]={{libdir}}/nagios/plugins/check_datanommer_timesince.py fedimg 259200 604800
+command[check_datanommer_hotness]={{libdir}}/nagios/plugins/check_datanommer_timesince.py hotness 604800 1814400
+command[check_datanommer_faf]={{libdir}}/nagios/plugins/check_datanommer_timesince.py faf 86400 259200
+command[check_datanommer_koschei]={{libdir}}/nagios/plugins/check_datanommer_timesince.py koschei 86400 604800
+command[check_datanommer_autocloud]={{libdir}}/nagios/plugins/check_datanommer_timesince.py autocloud 259200 1814400
+command[check_datanommer_twoweekatomic]=/usr/lib64/nagios/plugins/check_datanommer_timesince.py org.fedoraproject.prod.releng.atomic.twoweek.complete 1296000 1382400
+
+# This one is retired since it times out all the time.  Too few messages.
+#command[check_datanommer_nuancier]={{libdir}}/nagios/plugins/check_datanommer_timesince.py nuancier 23652000 31536000
+
+# These are not actually finished and deployed yet
+command[check_datanommer_mailman]={{libdir}}/nagios/plugins/check_datanommer_timesince.py mailman 14400 86400
+command[check_datanommer_bugzilla]={{libdir}}/nagios/plugins/check_datanommer_timesince.py bugzilla 86400 259200
--- a/roles/nagios_client/templates/check_disk.cfg.j2
+++ b/roles/nagios_client/templates/check_disk.cfg.j2
@ -0,0 +1,7 @@
+command[check_disk_/]={{ libdir }}/nagios/plugins/check_disk -w 14% -c 10% -p /
+command[check_disk_/boot]={{ libdir }}/nagios/plugins/check_disk -w 15% -c 10% -p /boot
+command[check_disk_/srv/cache/lookaside]={{ libdir }}/nagios/plugins/check_disk -w 20% -c 10% -p /srv/cache/lookaside
+command[check_disk_/srv]=/usr/lib64/nagios/plugins/check_disk -w 20% -c 10% -p /srv
+command[check_disk_/srv/buildmaster]=/usr/lib64/nagios/plugins/check_disk -w 20% -c 10% -p /srv/buildmaster
+command[check_disk_/srv/taskotron]=/usr/lib64/nagios/plugins/check_disk -w 20% -c 10% -p /srv/taskotron
+command[check_disk_/var/log]=/usr/lib64/nagios/plugins/check_disk -w 20% -c 15% -p /var/log
--- a/roles/nagios_client/templates/check_fedmsg_consumers.cfg.j2
+++ b/roles/nagios_client/templates/check_fedmsg_consumers.cfg.j2
@ -0,0 +1,63 @@
+# Fedmsg checks for consumers and producers
+command[check_fedmsg_cp_busgateway_hub]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-hub Nommer MonitoringProducer
+command[check_fedmsg_cp_busgateway_relay]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-relay RelayConsumer MonitoringProducer
+command[check_fedmsg_cp_busgateway_gateway]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-gateway GatewayConsumer MonitoringProducer
+command[check_fedmsg_cp_anitya_relay]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-relay RelayConsumer MonitoringProducer
+command[check_fedmsg_cp_app]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-relay RelayConsumer MonitoringProducer
+command[check_fedmsg_cp_value]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-irc IRCBotConsumer MonitoringProducer
+command[check_fedmsg_cp_pkgs]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-hub GenACLsConsumer MonitoringProducer
+command[check_fedmsg_cp_summershum]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-hub SummerShumConsumer MonitoringProducer
+command[check_fedmsg_cp_badges_backend]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-hub FedoraBadgesConsumer MonitoringProducer
+command[check_fedmsg_cp_notifs_backend]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-hub FMNConsumer DigestProducer ConfirmationProducer MonitoringProducer
+command[check_fedmsg_cp_bugzilla2fedmsg]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py moksha-hub BugzillaConsumer MonitoringProducer
+command[check_fedmsg_cp_fedimg_backend]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-hub FedimgConsumer MonitoringProducer
+command[check_fedmsg_cp_hotness_backend]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-hub BugzillaTicketFiler MonitoringProducer
+command[check_fedmsg_cp_bodhi_backend01_hub]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-hub Masher MonitoringProducer
+command[check_fedmsg_cp_bodhi_backend02_hub]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-hub UpdatesHandler MonitoringProducer
+command[check_fedmsg_cp_autocloud_backend]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-hub AutoCloudConsumer MonitoringProducer
+command[check_fedmsg_cp_packages_backend]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-hub CacheInvalidator MonitoringProducer
+command[check_fedmsg_cp_bugyou_backend]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-hub BugyouConsumer MonitoringProducer
+command[check_fedmsg_cp_pdc_backend]={{libdir}}/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-hub PDCUpdater MonitoringProducer
+
+command[check_fedmsg_cexceptions_busgateway_hub]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-hub Nommer 1 10
+command[check_fedmsg_cexceptions_busgateway_relay]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-relay RelayConsumer 1 10
+command[check_fedmsg_cexceptions_busgateway_gateway]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-gateway GatewayConsumer 1 10
+command[check_fedmsg_cexceptions_anitya_relay]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-relay RelayConsumer 1 10
+command[check_fedmsg_cexceptions_app]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-relay RelayConsumer 1 10
+command[check_fedmsg_cexceptions_value]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-irc IRCBotConsumer 1 10
+command[check_fedmsg_cexceptions_pkgs]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-hub GenACLsConsumer 1 10
+command[check_fedmsg_cexceptions_summershum]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-hub SummerShumConsumer 1 10
+command[check_fedmsg_cexceptions_badges_backend]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-hub FedoraBadgesConsumer 1 10
+command[check_fedmsg_cexceptions_notifs_backend]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-hub FMNConsumer 1 10
+command[check_fedmsg_cexceptions_bugzilla2fedmsg]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py moksha-hub BugzillaConsumer 1 10
+command[check_fedmsg_cexceptions_fedimg_backend]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-hub FedimgConsumer 1 10
+command[check_fedmsg_cexceptions_hotness_backend]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-hub BugzillaTicketFiler 1 10
+command[check_fedmsg_cexceptions_bodhi_backend01_hub]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-hub Masher 1 10
+command[check_fedmsg_cexceptions_bodhi_backend02_hub]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-hub UpdatesHandler 1 10
+command[check_fedmsg_cexceptions_autocloud_backend]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-hub AutoCloudConsumer 1 10
+command[check_fedmsg_cexceptions_packages_backend]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-hub CacheInvalidator 1 10
+command[check_fedmsg_cexceptions_bugyou_backend]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-hub BugyouConsumer 1 10
+command[check_fedmsg_cexceptions_pdc_backend]={{libdir}}/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-hub PDCUpdater 1 10
+
+command[check_fedmsg_cbacklog_busgateway_hub]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-hub Nommer 500 1000
+command[check_fedmsg_cbacklog_busgateway_relay]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-relay RelayConsumer 10 50
+command[check_fedmsg_cbacklog_busgateway_gateway]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-gateway GatewayConsumer 10 50
+command[check_fedmsg_cbacklog_anitya_relay]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-relay RelayConsumer 10 50
+command[check_fedmsg_cbacklog_app]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-relay RelayConsumer 10 50
+command[check_fedmsg_cbacklog_value]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-irc IRCBotConsumer 10 50
+command[check_fedmsg_cbacklog_pkgs]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-hub GenACLsConsumer 10 50
+command[check_fedmsg_cbacklog_summershum]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-hub SummerShumConsumer 100 500
+command[check_fedmsg_cbacklog_badges_backend]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-hub FedoraBadgesConsumer 7000 10000
+command[check_fedmsg_cbacklog_notifs_backend]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-hub FMNConsumer 15000 20000
+command[check_fedmsg_cbacklog_bugzilla2fedmsg]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py moksha-hub BugzillaConsumer 10 100
+command[check_fedmsg_cbacklog_fedimg_backend]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-hub FedimgConsumer 2000 5000
+command[check_fedmsg_cbacklog_hotness_backend]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-hub BugzillaTicketFiler 1000 5000
+command[check_fedmsg_cbacklog_bodhi_backend01_hub]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-hub Masher 500 1000
+command[check_fedmsg_cbacklog_bodhi_backend02_hub]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-hub UpdatesHandler 500 1000
+command[check_fedmsg_cbacklog_autocloud_backend]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-hub AutoCloudConsumer 100 500
+command[check_fedmsg_cbacklog_packages_backend]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-hub CacheInvalidator 20000 30000
+command[check_fedmsg_cbacklog_bugyou_backend]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-hub BugyouConsumer 5000 10000
+command[check_fedmsg_cbacklog_pdc_backend]={{libdir}}/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-hub PDCUpdater 10000 20000
+
+command[check_fedmsg_fmn_digest_last_ran]={{libdir}}/nagios/plugins/check_fedmsg_producer_last_ran.py fedmsg-hub DigestProducer 90 600
+command[check_fedmsg_fmn_confirm_last_ran]={{libdir}}/nagios/plugins/check_fedmsg_producer_last_ran.py fedmsg-hub ConfirmationProducer 90 600
--- a/roles/nagios_client/templates/check_fedmsg_gateway_proc.cfg.j2
+++ b/roles/nagios_client/templates/check_fedmsg_gateway_proc.cfg.j2
@ -0,0 +1 @@
+command[check_fedmsg_gateway_proc]={{ libdir }}/nagios/plugins/check_procs -c 1:1 -C 'fedmsg-gateway' -u fedmsg
--- a/roles/nagios_client/templates/check_fedmsg_hub_proc.cfg.j2
+++ b/roles/nagios_client/templates/check_fedmsg_hub_proc.cfg.j2
@ -0,0 +1 @@
+command[check_fedmsg_hub_proc]={{ libdir }}/nagios/plugins/check_procs -c 1:1 -C 'fedmsg-hub' -u fedmsg
--- a/roles/nagios_client/templates/check_fedmsg_hub_procs_bugyou.cfg.j2
+++ b/roles/nagios_client/templates/check_fedmsg_hub_procs_bugyou.cfg.j2
@ -0,0 +1 @@
+command[check_fedmsg_hub_procs_bugyou]={{ libdir }}/nagios/plugins/check_procs -c 3:3 -C 'fedmsg-hub' -u fedmsg
--- a/roles/nagios_client/templates/check_fedmsg_irc_proc.cfg.j2
+++ b/roles/nagios_client/templates/check_fedmsg_irc_proc.cfg.j2
@ -0,0 +1 @@
+command[check_fedmsg_irc_proc]={{ libdir }}/nagios/plugins/check_procs -c 1:1 -C 'fedmsg-irc' -u fedmsg
--- a/roles/nagios_client/templates/check_fedmsg_masher_proc.cfg.j2
+++ b/roles/nagios_client/templates/check_fedmsg_masher_proc.cfg.j2
@ -0,0 +1 @@
+command[check_fedmsg_masher_proc]={{ libdir }}/nagios/plugins/check_procs -c 1:1 -C 'fedmsg-hub' -u apache
--- a/roles/nagios_client/templates/check_fedmsg_relay_proc.cfg.j2
+++ b/roles/nagios_client/templates/check_fedmsg_relay_proc.cfg.j2
@ -0,0 +1 @@
+command[check_fedmsg_relay_proc]={{ libdir }}/nagios/plugins/check_procs -c 1:1 -C 'fedmsg-relay' -u fedmsg
--- a/roles/nagios_client/templates/check_fmn.cfg.j2
+++ b/roles/nagios_client/templates/check_fmn.cfg.j2
@ -0,0 +1,2 @@
+command[check_fmn_worker_queue]={{ libdir }}/nagios/plugins/check_rabbitmq_size workers 200 1000
+command[check_fmn_backend_queue]={{ libdir }}/nagios/plugins/check_rabbitmq_size backends 100 200
--- a/roles/nagios_client/templates/check_happroxy_conns.cfg.j2
+++ b/roles/nagios_client/templates/check_happroxy_conns.cfg.j2
@ -0,0 +1 @@
+command[check_haproxy_conns]=/usr/lib64/nagios/plugins/check_haproxy_conns.py
--- a/roles/nagios_client/templates/check_happroxy_mirrorlist.cfg.j2
+++ b/roles/nagios_client/templates/check_happroxy_mirrorlist.cfg.j2
@ -0,0 +1 @@
+command[check_haproxy_mirrorlist]=/usr/lib64/nagios/plugins/check_haproxy_mirrorlist.py
--- a/roles/nagios_client/templates/check_ipa.cfg.j2
+++ b/roles/nagios_client/templates/check_ipa.cfg.j2
@ -0,0 +1 @@
+command[check_ipa_replication]={{ libdir }}/nagios/plugins/check_ipa_replication -u ldaps://localhost/
--- a/roles/nagios_client/templates/check_koschei_polling_proc.cfg.j2
+++ b/roles/nagios_client/templates/check_koschei_polling_proc.cfg.j2
@ -0,0 +1 @@
+command[check_koschei_polling_proc]={{ libdir }}/nagios/plugins/check_procs -s RSD -u koschei -C koschei-polling -c 1:1
--- a/roles/nagios_client/templates/check_koschei_resolver_proc.cfg.j2
+++ b/roles/nagios_client/templates/check_koschei_resolver_proc.cfg.j2
@ -0,0 +1 @@
+command[check_koschei_resolver_proc]={{ libdir }}/nagios/plugins/check_procs -s RSD -u koschei -C koschei-resolve -c 1:1
--- a/roles/nagios_client/templates/check_koschei_scheduler_proc.cfg.j2
+++ b/roles/nagios_client/templates/check_koschei_scheduler_proc.cfg.j2
@ -0,0 +1 @@
+command[check_koschei_scheduler_proc]={{ libdir }}/nagios/plugins/check_procs -s RSD -u koschei -C koschei-schedul -c 1:1
--- a/roles/nagios_client/templates/check_koschei_watcher_proc.cfg.j2
+++ b/roles/nagios_client/templates/check_koschei_watcher_proc.cfg.j2
@ -0,0 +1 @@
+command[check_koschei_watcher_proc]={{ libdir }}/nagios/plugins/check_procs -s RSD -u koschei -C koschei-watcher -c 1:1
--- a/roles/nagios_client/templates/check_lock.cfg.j2
+++ b/roles/nagios_client/templates/check_lock.cfg.j2
@ -0,0 +1 @@
+command[check_lock]={{ libdir }}/nagios/plugins/check_lock
--- a/roles/nagios_client/templates/check_lock_file_age.cfg.j2
+++ b/roles/nagios_client/templates/check_lock_file_age.cfg.j2
@ -0,0 +1 @@
+command[check_lock_file_age]={{ libdir }}/nagios/plugins/check_lock_file_age -w 1 -c 5 -f /var/lock/fedora-ca/lock
--- a/roles/nagios_client/templates/check_memcache.cfg.j2
+++ b/roles/nagios_client/templates/check_memcache.cfg.j2
@ -0,0 +1,2 @@
+command[check_memcache]=/usr/lib64/nagios/plugins/check_procs -c 1:1 -a '/usr/bin/memcached' -u memcached
+command[check_memcache_connect]=/usr/lib64/nagios/plugins/check_memcache_connect
--- a/roles/nagios_client/templates/check_merged_file_age.cfg.j2
+++ b/roles/nagios_client/templates/check_merged_file_age.cfg.j2
@ -0,0 +1 @@
+command[check_merged_file_age]=/usr/lib64/nagios/plugins/check_file_age -w 120 -c 300 /var/log/merged/messages.log
--- a/roles/nagios_client/templates/check_mirrorlist_cache.cfg.j2
+++ b/roles/nagios_client/templates/check_mirrorlist_cache.cfg.j2
@ -0,0 +1 @@
+command[check_mirrorlist_cache]={{ libdir }}/nagios/plugins/check_file_age -w 14400 -c 129600 -f /var/lib/mirrormanager/mirrorlist_cache.pkl
--- a/roles/nagios_client/templates/check_mysql.cfg.j2
+++ b/roles/nagios_client/templates/check_mysql.cfg.j2
@ -0,0 +1 @@
+command[check_mysql_backup]={{ libdir }}/nagios/plugins/check_file_age -w 86400 -c 129600 -f /backups/fpo-mediawiki-latest.xz
--- a/roles/nagios_client/templates/check_openvpn_link.cfg.j2
+++ b/roles/nagios_client/templates/check_openvpn_link.cfg.j2
@ -0,0 +1 @@
+command[check_openvpn_link]={{ libdir }}/nagios/plugins/check_ping -H 192.168.1.41 -w 375.0,20% -c 500,60%
--- a/roles/nagios_client/templates/check_osbs.cfg.j2
+++ b/roles/nagios_client/templates/check_osbs.cfg.j2
@ -0,0 +1,2 @@
+command[check_osbs_builds]={{ libdir }}/nagios/plugins/check_osbs_builds.py
+command[check_osbs_api]={{ libdir }}/nagios/plugins/check_osbs_api.py
--- a/roles/nagios_client/templates/check_postfix_queue.cfg.j2
+++ b/roles/nagios_client/templates/check_postfix_queue.cfg.j2
@ -0,0 +1 @@
+command[check_postfix_queue]={{ libdir }}/nagios/plugins/check_postfix_queue -w {{ nrpe_check_postfix_queue_warn }} -c {{ nrpe_check_postfix_queue_crit }}
--- a/roles/nagios_client/templates/check_raid.cfg.j2
+++ b/roles/nagios_client/templates/check_raid.cfg.j2
@ -0,0 +1 @@
+command[check_raid]={{ libdir }}/nagios/plugins/check_raid.py
--- a/roles/nagios_client/templates/check_readonly_fs.cfg.j2
+++ b/roles/nagios_client/templates/check_readonly_fs.cfg.j2
@ -0,0 +1 @@
+command[check_readonly_fs]=/usr/lib64/nagios/plugins/check_readonly_fs
--- a/roles/nagios_client/templates/check_redis_proc.cfg.j2
+++ b/roles/nagios_client/templates/check_redis_proc.cfg.j2
@ -0,0 +1 @@
+command[check_redis_proc]=/usr/lib64/nagios/plugins/check_procs -c 1:1 -C 'redis-server' -u redis
--- a/roles/nagios_client/templates/check_supybot_fedmsg_plugin.cfg.j2
+++ b/roles/nagios_client/templates/check_supybot_fedmsg_plugin.cfg.j2
@ -0,0 +1 @@
+command[check_supybot_fedmsg_plugin]={{libdir}}/nagios/plugins/check_supybot_plugin -t fedmsg
--- a/roles/nagios_client/templates/check_swap.cfg.j2
+++ b/roles/nagios_client/templates/check_swap.cfg.j2
@ -0,0 +1 @@
+command[check_swap]={{ libdir }}/nagios/plugins/check_swap -w 15% -c 10%
--- a/roles/nagios_client/templates/check_testcloud.cfg.j2
+++ b/roles/nagios_client/templates/check_testcloud.cfg.j2
@ -0,0 +1 @@
+command[check_testcloud]={{ libdir }}/nagios/plugins/check_testcloud
--- a/roles/nagios_client/templates/check_unbound_proc.cfg.j2
+++ b/roles/nagios_client/templates/check_unbound_proc.cfg.j2
@ -0,0 +1 @@
+command[check_unbound_proc]={{ libdir }}/nagios/plugins/check_procs -c 1:1 -C 'unbound' -u unbound
--- a/roles/nagios_client/templates/check_varnish_proc.cfg.j2
+++ b/roles/nagios_client/templates/check_varnish_proc.cfg.j2
@ -0,0 +1 @@
+command[check_varnish_proc]=/usr/lib64/nagios/plugins/check_procs -c 1:2 -C 'varnishd' -u varnish
--- a/roles/nagios_client/templates/nrpe.cfg.j2
+++ b/roles/nagios_client/templates/nrpe.cfg.j2
@ -0,0 +1,228 @@
+#############################################################################
+# Sample NRPE Config File 
+# Written by: Ethan Galstad (nagios@nagios.org)
+# 
+# Last Modified: 11-23-2007
+#
+# NOTES:
+# This is a sample configuration file for the NRPE daemon.  It needs to be
+# located on the remote host that is running the NRPE daemon, not the host
+# from which the check_nrpe client is being executed.
+#############################################################################
+
+
+# LOG FACILITY
+# The syslog facility that should be used for logging purposes.
+
+log_facility=daemon
+
+
+
+# PID FILE
+# The name of the file in which the NRPE daemon should write it's process ID
+# number.  The file is only written if the NRPE daemon is started by the root
+# user and is running in standalone mode.
+
+pid_file=/var/run/nrpe/nrpe.pid
+
+
+
+# PORT NUMBER
+# Port number we should wait for connections on.
+# NOTE: This must be a non-priviledged port (i.e. > 1024).
+# NOTE: This option is ignored if NRPE is running under either inetd or xinetd
+
+server_port=5666
+
+
+
+# SERVER ADDRESS
+# Address that nrpe should bind to in case there are more than one interface
+# and you do not want nrpe to bind on all interfaces.
+# NOTE: This option is ignored if NRPE is running under either inetd or xinetd
+
+#server_address=127.0.0.1
+
+
+
+# NRPE USER
+# This determines the effective user that the NRPE daemon should run as.  
+# You can either supply a username or a UID.
+# 
+# NOTE: This option is ignored if NRPE is running under either inetd or xinetd
+
+nrpe_user=nrpe
+
+
+
+# NRPE GROUP
+# This determines the effective group that the NRPE daemon should run as.  
+# You can either supply a group name or a GID.
+# 
+# NOTE: This option is ignored if NRPE is running under either inetd or xinetd
+
+nrpe_group=nrpe
+
+
+
+# ALLOWED HOST ADDRESSES
+# This is an optional comma-delimited list of IP address or hostnames 
+# that are allowed to talk to the NRPE daemon. Network addresses with a bit mask
+# (i.e. 192.168.1.0/24) are also supported. Hostname wildcards are not currently 
+# supported.
+#
+# Note: The daemon only does rudimentary checking of the client's IP
+# address.  I would highly recommend adding entries in your /etc/hosts.allow
+# file to allow only the specified host to connect to the port
+# you are running this daemon on.
+#
+# NOTE: This option is ignored if NRPE is running under either inetd or xinetd
+
+
+allowed_hosts=10.5.126.41,192.168.1.10,192.168.1.20,209.132.181.35
+ 
+
+
+# COMMAND ARGUMENT PROCESSING
+# This option determines whether or not the NRPE daemon will allow clients
+# to specify arguments to commands that are executed.  This option only works
+# if the daemon was configured with the --enable-command-args configure script
+# option.  
+#
+# *** ENABLING THIS OPTION IS A SECURITY RISK! *** 
+# Read the SECURITY file for information on some of the security implications
+# of enabling this variable.
+#
+# Values: 0=do not allow arguments, 1=allow command arguments
+
+dont_blame_nrpe=0
+
+
+
+# COMMAND PREFIX
+# This option allows you to prefix all commands with a user-defined string.
+# A space is automatically added between the specified prefix string and the
+# command line from the command definition.
+#
+# *** THIS EXAMPLE MAY POSE A POTENTIAL SECURITY RISK, SO USE WITH CAUTION! ***
+# Usage scenario: 
+# Execute restricted commmands using sudo.  For this to work, you need to add
+# the nagios user to your /etc/sudoers.  An example entry for alllowing 
+# execution of the plugins from might be:
+#
+# nagios          ALL=(ALL) NOPASSWD: /usr/lib/nagios/plugins/
+#
+# This lets the nagios user run all commands in that directory (and only them)
+# without asking for a password.  If you do this, make sure you don't give
+# random users write access to that directory or its contents!
+
+# command_prefix=/usr/bin/sudo 
+
+
+
+# DEBUGGING OPTION
+# This option determines whether or not debugging messages are logged to the
+# syslog facility.
+# Values: 0=debugging off, 1=debugging on
+
+debug=0
+
+
+
+# COMMAND TIMEOUT
+# This specifies the maximum number of seconds that the NRPE daemon will
+# allow plugins to finish executing before killing them off.
+
+command_timeout=100
+
+
+
+# CONNECTION TIMEOUT
+# This specifies the maximum number of seconds that the NRPE daemon will
+# wait for a connection to be established before exiting. This is sometimes
+# seen where a network problem stops the SSL being established even though
+# all network sessions are connected. This causes the nrpe daemons to
+# accumulate, eating system resources. Do not set this too low.
+
+connection_timeout=300
+
+
+
+# WEEK RANDOM SEED OPTION
+# This directive allows you to use SSL even if your system does not have
+# a /dev/random or /dev/urandom (on purpose or because the necessary patches
+# were not applied). The random number generator will be seeded from a file
+# which is either a file pointed to by the environment valiable $RANDFILE
+# or $HOME/.rnd. If neither exists, the pseudo random number generator will
+# be initialized and a warning will be issued.
+# Values: 0=only seed from /dev/[u]random, 1=also seed from weak randomness
+
+#allow_weak_random_seed=1
+
+
+
+# INCLUDE CONFIG FILE
+# This directive allows you to include definitions from an external config file.
+
+#include=<somefile.cfg>
+
+
+
+# INCLUDE CONFIG DIRECTORY
+# This directive allows you to include definitions from config files (with a
+# .cfg extension) in one or more directories (with recursion).
+
+include_dir=/etc/nrpe.d/
+
+
+
+# COMMAND DEFINITIONS
+# Command definitions that this daemon will run.  Definitions
+# are in the following format:
+#
+# command[<command_name>]=<command_line>
+#
+# When the daemon receives a request to return the results of <command_name>
+# it will execute the command specified by the <command_line> argument.
+#
+# Unlike Nagios, the command line cannot contain macros - it must be
+# typed exactly as it should be executed.
+#
+# Note: Any plugins that are used in the command lines must reside
+# on the machine that this daemon is running on!  The examples below
+# assume that you have plugins installed in a /usr/local/nagios/libexec
+# directory.  Also note that you will have to modify the definitions below
+# to match the argument format the plugins expect.  Remember, these are
+# examples only!
+
+
+# The following examples use hardcoded command arguments...
+
+command[check_users]={{ libdir }}/nagios/plugins/check_users -w 5 -c 10
+command[check_load]={{ libdir }}/nagios/plugins/check_load -w 15,10,5 -c 30,25,20
+command[check_hda1]={{ libdir }}/nagios/plugins/check_disk -w 20% -c 10% -p /dev/hda1
+{% if inventory_hostname not in groups['zombie-infested'] %}
+command[check_zombie_procs]={{ libdir }}/nagios/plugins/check_procs -w 5 -c 10 -s Z
+{% else %}
+# This host is prone to Zombies and we do not care or want to alert on it so we make the limits very high
+command[check_zombie_procs]={{ libdir }}/nagios/plugins/check_procs -w 50000 -c 100000 -s Z
+{% endif %}
+command[check_total_procs]={{ libdir }}/nagios/plugins/check_procs -w {{ nrpe_procs_warn }} -c {{ nrpe_procs_crit }}
+
+
+# The following examples allow user-supplied arguments and can
+# only be used if the NRPE daemon was compiled with support for 
+# command arguments *AND* the dont_blame_nrpe directive in this
+# config file is set to '1'.  This poses a potential security risk, so
+# make sure you read the SECURITY file before doing this.
+
+#command[check_users]=/usr/lib64/nagios/plugins/check_users -w $ARG1$ -c $ARG2$
+#command[check_load]=/usr/lib64/nagios/plugins/check_load -w $ARG1$ -c $ARG2$
+#command[check_disk]=/usr/lib64/nagios/plugins/check_disk -w $ARG1$ -c $ARG2$ -p $ARG3$
+#command[check_procs]=/usr/lib64/nagios/plugins/check_procs -w $ARG1$ -c $ARG2$ -s $ARG3$
+
+
+# NEVER ADD ANYTHING HERE - ANY ENTRIES TO NRPE SHOULD BE in .cfg files in /etc/nrpe.d/
+
+# NEVER NEVER NEVER
+#
--- a/roles/nagios_server/README.rst
+++ b/roles/nagios_server/README.rst
@ -0,0 +1,78 @@
+===================================
+ Nagios 4 Configuration for Fedora
+===================================
+
+The Fedora Infrastructure Nagios is built on a set of configurations
+originally written for Nagios 2 and then upgraded over time to Nagios
+3 and then 4.08. With additional changes made in the 4.2 series of
+Nagios this needed a better rewrite as various parts came from
+pre-puppet and then various puppet modules added on top. 
+
+In order to get this rewrite done, we will use as much of the original
+layout of the Fedora ansible nagios module but with rewrites to better
+match current Nagios configurations so that it can be maintained.
+
+Role directory layout
+=====================
+The original layout branched out from 
+
+  roles/nagios/client/
+  roles/nagios/server/
+
+With the usual trees below this. This breaks ansible best practices
+and how most new modules are set up so the rewrite uses:
+
+  roles/nagios_client/
+  roles/nagios_server/
+
+=====================
+ Nagios Server Files
+=====================
+
+The Nagios Server Files require a large layout change. The original
+Nagios system used multiple independant modes and files which caused
+problems when hosts were removed. The new system will use hosts set up
+from the Fedora Ansible Inventory with hostgroups set up to match
+groups.
+
+  roles/nagios_server/{files,handlers,tasks,templates}
+
+  r.../n.../files/httpd ==> /etc/httpd/conf.d files
+  r.../n.../files/nagios ==> /etc/nagios/ files
+  r.../n.../files/nagios/commands      command files
+  r.../n.../files/nagios/hosts         host files
+  r.../n.../files/nagios/hostgroups    groups made from hosts
+  r.../n.../files/nagios/services      services
+  r.../n.../files/nagios/servicegroups groups made from services
+  r.../n.../files/nagios/contacts      files for people
+  r.../n.../files/nagios/contactgroups groups made from contacts
+      
+  similar layout for templates
+  handlers has the ways to restart and check configuration
+  tasks has the main rules for building stuff.
+
+===================
+Nagios Module Steps
+===================
+
+1. Check to see if the nagios user is configured. Someone years ago
+   chose that our monitoring uses UID/GID 420. Har Har.
+   Setup any other groups and permissions
+2. Install the needed packages for the server.
+3. Setup the directories on the server
+    /etc/nagios/{child}
+4. Synchonise over the static files
+    /etc/nagios/commands/
+    /etc/nagios/services/
+    /etc/nagios/servicegroups/
+    /etc/nagios/contacts/
+    /etc/nagios/contactgroups/
+    /usr/lib64/nagios/plugins/
+    /usr/local/bin
+    /usr/share/nagios/html/
+5. Build template files
+    /etc/nagios/commands/
+    /etc/nagios/hosts/{ansible-inventory, ansible-vars, other}
+    /etc/nagios/hostgroups/
+6. Fix selinux policy
+7. Restart services
--- a/roles/nagios_server/files/httpd/nagios.conf
+++ b/roles/nagios_server/files/httpd/nagios.conf
@ -0,0 +1,36 @@
+# noc1
+ScriptAlias /nagios/cgi-bin/ /usr/lib64/nagios/cgi-bin/
+
+# noc2
+ScriptAlias /nagios-external/cgi-bin/ /usr/lib64/nagios/cgi-bin/
+
+# test
+ScriptAlias /nagios-just-a-test/cgi-bin/ /usr/lib64/nagios/cgi-bin/
+
+ScriptAlias	/tac.cgi	/usr/lib64/nagios/cgi-bin/tac.cgi
+
+<Location />
+  AuthName "Nagios GSSAPI Login"
+  GssapiCredStore keytab:/etc/krb5.HTTP_admin.fedoraproject.org.keytab
+  AuthType GSSAPI
+  # This is off because Apache (and thus mod_auth_gssapi) doesn't know this is proxied over TLS
+  GssapiSSLonly Off
+  GssapiLocalName on
+  Require valid-user
+</Location>
+
+<Location ~ "/(nagios|nagios-external|nagios-just-a-test)/cgi-bin/">
+  Options ExecCGI
+</Location>
+
+<Directory "/usr/share/nagios/html">
+  Options None
+</Directory>
+
+Alias /nagios /usr/share/nagios/html/
+
+# This will only affect noc2 because the proxies only forward -external to it.
+Alias /nagios-external /usr/share/nagios/html/
+
+# Test
+Alias /nagios-test /usr/share/nagios/html/
--- a/roles/nagios_server/files/nagios/commands/bzr.cfg
+++ b/roles/nagios_server/files/nagios/commands/bzr.cfg
@ -0,0 +1,8 @@
+# 'check_bzr' command definition
+# I'd like this to actually interact with BZR, but I can't find any 
+# proper documentation on the protocol to craft send/expect/quit 
+# strings.
+define command{
+        command_name    check_bzr
+        command_line    $USER1$/check_tcp -H $HOSTADDRESS$ -p 4155
+}
--- a/roles/nagios_server/files/nagios/commands/disk.cfg
+++ b/roles/nagios_server/files/nagios/commands/disk.cfg
@ -0,0 +1,15 @@
+define command {
+        command_name    check_by_ssh_check_raid
+        command_line    $USER1$/check_by_ssh -t 30 -H $HOSTADDRESS$ "$USER1$/check_raid.py"
+}
+
+define command {
+        command_name    check_by_ssh_check_disk
+        command_line    $USER1$/check_by_ssh -t 30 -H $HOSTADDRESS$ "$USER1$/check_disk -w $ARG1$% -c $ARG2$% -p $ARG3$"
+}
+
+# 'check_postgres_conns' command definition
+define command{
+        command_name    check_postgres_conns
+        command_line    $USER1$/check_by_ssh -t 30 -H $HOSTADDRESS$ "$USER1$/check_procs -u postgres -w $ARG1$ -c $ARG2$ -a $ARG3$"
+}
--- a/roles/nagios_server/files/nagios/commands/dns.cfg
+++ b/roles/nagios_server/files/nagios/commands/dns.cfg
@ -0,0 +1,11 @@
+# 'check_dns' command definition
+define command{
+        command_name    check_dns
+        command_line    $USER1$/check_dns -H www.yahoo.com -s $HOSTADDRESS$
+        }
+
+# 'check_dns_fpo' command definition
+define command{
+        command_name    check_dns_fpo
+        command_line    $USER1$/check_dns -t 30 -H fedoraproject.org -A -s $HOSTADDRESS$
+        }
--- a/roles/nagios_server/files/nagios/commands/git.cfg
+++ b/roles/nagios_server/files/nagios/commands/git.cfg
@ -0,0 +1,8 @@
+# 'check_git' command definition
+# I'd like this to actually interact with GIT, but I can't find any 
+# proper documentation on the protocol to craft send/expect/quit 
+# strings.
+define command{
+        command_name    check_git
+        command_line    $USER1$/check_tcp -H $HOSTADDRESS$ -p 9418
+}
--- a/roles/nagios_server/files/nagios/commands/httpd.cfg
+++ b/roles/nagios_server/files/nagios/commands/httpd.cfg
@ -0,0 +1,79 @@
+##
+## This file has the commands to check and restart general httpd services
+## and websites.
+##
+
+################################################################################
+# COMMAND DEFINITIONS
+#
+# SYNTAX:
+#
+#	define command{
+#               template      <templatename>
+#		name          <objectname>
+#               command_name  <commandname>
+#               command_line  <commandline>
+#               }
+#
+# WHERE:
+#
+# <templatename> = object name of another command definition that should be
+#                  used as a template for this definition (optional)
+# <objectname>   = object name of command definition, referenced by other
+#                  command definitions that use it as a template (optional)
+# <commandname>  = name of the command, as recognized/used by Nagios
+# <commandline>  = command line
+#
+################################################################################
+
+# 'reload httpd'
+define command {
+       command_name    restart_httpd
+       command_line    $USER1$/restart_httpd $SERVICESTATE$ $SERVICESTATETYPE$ $SERVICEATTEMPT$ $HOSTADDRESS$ "$HOSTALIAS$" "$SERVICEDESC$" "$SERVICESTATE$"
+}
+
+
+#
+# 'check_website_publiclist' command definition
+define command{
+        command_name    check_website_publiclist
+        command_line    $USER1$/check_http -w 60 -c 80 -I $HOSTADDRESS$ -H $ARG1$ -u "$ARG2$" -s "$ARG3$"
+}
+
+# 'check_website' command definition
+define command{
+        command_name    check_website
+        command_line    $USER1$/check_http -w 30 -c 40 -I $HOSTADDRESS$ -H $ARG1$ -u "$ARG2$" -s "$ARG3$"
+}
+
+define command{
+        command_name    check_website_ppc
+        command_line    $USER1$/check_http -w 300 -c 400 -I $HOSTADDRESS$ -H $ARG1$ -u "$ARG2$" -s "$ARG3$"
+}
+
+define command{
+        command_name    check_website_ssl
+        command_line    $USER1$/check_http -w 30 -c 40 --ssl -I $HOSTADDRESS$ -H $ARG1$ -u $ARG2$ -s "$ARG3$"
+}
+
+define command{
+       command_name    check_ssl_cert
+       command_line    $USER1$/check_http -I $HOSTADDRESS$ -H $ARG1$ -C $ARG2$
+}
+
+define command{
+        command_name    check_website_publiclist_ssl
+        command_line    $USER1$/check_http -w 40 -c 60 --ssl -I $HOSTADDRESS$ -H $ARG1$ -u $ARG2$ -s "$ARG3$"
+}
+
+# 'check_http' command definition
+define command{
+        command_name    check_http
+        command_line    $USER1$/check_http -H $HOSTADDRESS$
+}
+
+# 'check_https' command definition
+define command{
+        command_name    check_https
+        command_line    $USER1$/check_http -H $HOSTADDRESS$ --ssl
+}
--- a/roles/nagios_server/files/nagios/commands/koji.cfg
+++ b/roles/nagios_server/files/nagios/commands/koji.cfg
@ -0,0 +1,29 @@
+################################################################################
+# COMMAND DEFINITIONS
+#
+# SYNTAX:
+#
+#	define command{
+#               template      <templatename>
+#		name          <objectname>
+#               command_name  <commandname>
+#               command_line  <commandline>
+#               }
+#
+# WHERE:
+#
+# <templatename> = object name of another command definition that should be
+#                  used as a template for this definition (optional)
+# <objectname>   = object name of command definition, referenced by other
+#                  command definitions that use it as a template (optional)
+# <commandname>  = name of the command, as recognized/used by Nagios
+# <commandline>  = command line
+#
+################################################################################
+
+# 'check_koji'
+define command{
+        command_name    check_koji
+        command_line    $USER1$/check_koji
+}
+
--- a/roles/nagios_server/files/nagios/commands/local.cfg
+++ b/roles/nagios_server/files/nagios/commands/local.cfg
@ -0,0 +1,36 @@
+# 'check_local_disk' command definition
+define command{
+        command_name    check_local_disk
+        command_line    $USER1$/check_disk -w $ARG1$ -c $ARG2$ -p $ARG3$
+        }
+
+# 'check_local_load' command definition
+define command{
+        command_name    check_local_load
+        command_line    $USER1$/check_load -w $ARG1$ -c $ARG2$
+        }
+
+# 'check_local_procs' command definition
+define command{
+        command_name    check_local_procs
+        command_line    $USER1$/check_procs -w $ARG1$ -c $ARG2$ -s $ARG3$
+        }
+
+# 'check_local_users' command definition
+define command{
+        command_name    check_local_users
+        command_line    $USER1$/check_users -w $ARG1$ -c $ARG2$
+        }
+
+# 'check_local_swap' command definition
+define command{
+	command_name	check_local_swap
+	command_line	$USER1$/check_swap -w $ARG1$ -c $ARG2$
+	}
+
+# 'check_local_mrtgtraf' command definition
+define command{
+	command_name	check_local_mrtgtraf
+	command_line	$USER1$/check_mrtgtraf -F $ARG1$ -a $ARG2$ -w $ARG3$ -c $ARG4$ -e $ARG5$
+	}
+
--- a/roles/nagios_server/files/nagios/commands/misc.cfg
+++ b/roles/nagios_server/files/nagios/commands/misc.cfg
@ -0,0 +1,96 @@
+################################################################################
+# COMMAND DEFINITIONS
+#
+# SYNTAX:
+#
+#	define command{
+#               template      <templatename>
+#		name          <objectname>
+#               command_name  <commandname>
+#               command_line  <commandline>
+#               }
+#
+# WHERE:
+#
+# <templatename> = object name of another command definition that should be
+#                  used as a template for this definition (optional)
+# <objectname>   = object name of command definition, referenced by other
+#                  command definitions that use it as a template (optional)
+# <commandname>  = name of the command, as recognized/used by Nagios
+# <commandline>  = command line
+#
+################################################################################
+
+define command{
+        command_name    true
+        command_line    /bin/true
+}
+
+define command{
+    command_name check_dummy
+    command_line $USER1$/check_dummy $ARG1$ $ARG2$
+}
+
+# 'check_tape'
+define command{
+        command_name    check_tape
+        command_line    $USER1$/check_tape
+}
+
+# 'check_ftp' command definition
+define command{
+        command_name    check_ftp
+        command_line    $USER1$/check_ftp -H $HOSTADDRESS$
+        }
+
+
+# 'check_hpjd' command definition
+define command{
+        command_name    check_hpjd
+        command_line    $USER1$/check_hpjd -H $HOSTADDRESS$ -C public
+        }
+
+# 'check_snmp' command definition
+define command{
+        command_name    check_snmp
+        command_line    $USER1$/check_snmp -H $HOSTADDRESS$ $ARG1$
+        }
+
+
+# 'check_nntp' command definition
+define command{
+        command_name    check_nntp
+        command_line    $USER1$/check_nntp -H $HOSTADDRESS$
+        }
+
+
+# 'check_telnet' command definition
+define command{
+        command_name    check_telnet
+        command_line    $USER1$/check_tcp -H $HOSTADDRESS$ -p 23
+        }
+
+# 'check_dhcp' command definition
+define command{
+	command_name	check_dhcp
+	command_line	$USER1$/check_dhcp $ARG1$
+	}
+
+# 'check_pop' command definition
+define command{
+        command_name    check_pop
+        command_line    $USER1$/check_pop -H $HOSTADDRESS$
+        }
+
+# 'check_imap' command definition
+define command{
+        command_name    check_imap
+        command_line    $USER1$/check_imap -H $HOSTADDRESS$ $ARG1$
+        }
+
+# 'check_nt' command definition
+define command{
+	command_name	check_nt
+	command_line	$USER1$/check_nt -H $HOSTADDRESS$ -p 12489 -v $ARG1$ $ARG2$
+	}
+
--- a/roles/nagios_server/files/nagios/commands/notify.cfg
+++ b/roles/nagios_server/files/nagios/commands/notify.cfg
@ -0,0 +1,87 @@
+################################################################################
+#
+# SAMPLE NOTIFICATION COMMANDS
+#
+# These are some example notification commands.  They may or may not work on
+# your system without modification.  As an example, some systems will require 
+# you to use "/usr/bin/mailx" instead of "/usr/bin/mail" in the commands below.
+#
+################################################################################
+
+# 'host-notify-by-email' command definition
+define command{
+	command_name	host-notify-by-email
+	command_line	/usr/bin/printf "%b" "***** Nagios  *****\n\nNotification Type: $NOTIFICATIONTYPE$\nHost: $HOSTNAME$\nState: $HOSTSTATE$\nAddress: $HOSTADDRESS$\nInfo: $HOSTOUTPUT$\nSource: $$(hostname)\n\nDate/Time: $LONGDATETIME$\n" | /bin/mail -s "Host $HOSTSTATE$ alert for $HOSTNAME$!" $CONTACTEMAIL$
+	}
+
+# 'notify-service-by-email' command definition
+define command{
+	command_name	notify-service-by-email
+	command_line	/usr/bin/printf "%b" "***** Nagios *****\n\nNotification Type: $NOTIFICATIONTYPE$\n\nService: $SERVICEDESC$\nHost: $HOSTALIAS$\nAddress: $HOSTADDRESS$\nState: $SERVICESTATE$\n\nDate/Time: $LONGDATETIME$\n\nAdditional Info:\n\n$SERVICEOUTPUT$\n" | /usr/bin/mail -s "** $NOTIFICATIONTYPE$ Service Alert: $HOSTALIAS$/$SERVICEDESC$ is $SERVICESTATE$ **" $CONTACTEMAIL$
+	}
+
+# 'notify-by-epager' command definition
+define command{
+       command_name    notify-by-epager
+       command_line    /usr/bin/printf "%b" "Service: $SERVICEDESC$\nHost: $HOSTNAME$\nInfo: $SERVICEOUTPUT$\nSource: $$(hostname -s)\nDate: $LONGDATETIME$" | /bin/mail -s "$NOTIFICATIONTYPE$: $HOSTALIAS$/$SERVICEDESC$ is $SERVICESTATE$" $CONTACTPAGER$
+       }
+
+
+# 'host-notify-by-epager' command definition
+define command{
+	command_name	host-notify-by-epager
+	command_line	/usr/bin/printf "%b" "Host '$HOSTALIAS$' is $HOSTSTATE$\nInfo: $HOSTOUTPUT$\nSource: $$(hostname -s)\nTime: $LONGDATETIME$" | /bin/mail -s "$NOTIFICATIONTYPE$ alert - Host $HOSTNAME$ is $HOSTSTATE$" $CONTACTPAGER$
+	}
+
+# 'host-notify-by-ircbot' command definition
+define command{
+	command_name	host-notify-by-ircbot
+	command_line	/usr/bin/printf "%b" "#fedora-noc $NOTIFICATIONTYPE$ - $HOSTALIAS$ is $HOSTSTATE$: $HOSTOUTPUT$ ($$(hostname -s)) $HOSTACKAUTHOR$ $SERVICEACKAUTHOR$" | /usr/local/bin/irc-colorize.py | nc -w 1 value01 5050
+	}
+
+# 'notify-by-email' command definition
+define command{
+	command_name	notify-by-email
+	command_line	/usr/bin/printf "%b" "***** Nagios  *****\n\nNotification Type: $NOTIFICATIONTYPE$\n\nService: $SERVICEDESC$\nHost: $HOSTALIAS$\nAddress: $HOSTADDRESS$\nState: $SERVICESTATE$\nSource: $$(hostname)\n\nDate/Time: $LONGDATETIME$\n\nAdditional Info:\n\n$SERVICEOUTPUT$" | /bin/mail -s "** $NOTIFICATIONTYPE$ alert - $HOSTALIAS$/$SERVICEDESC$ is $SERVICESTATE$ **" $CONTACTEMAIL$
+	}
+
+# 'notify-by-ircbot' command definition
+define command{
+	command_name	notify-by-ircbot
+	command_line	/usr/bin/printf "%b" "#fedora-noc $NOTIFICATIONTYPE$ - $HOSTALIAS$/$SERVICEDESC$ is $SERVICESTATE$: $SERVICEOUTPUT$ ($$(hostname -s)) $HOSTACKAUTHOR$ $SERVICEACKAUTHOR$" | /usr/local/bin/irc-colorize.py | nc -w 1 value01 5050
+	}
+
+# 'host-notify-by-fedmsg' command definition
+define command{
+	command_name	host-notify-by-fedmsg
+    command_line    /usr/bin/echo '{"type": "$NOTIFICATIONTYPE$", "host": "$HOSTALIAS$", "state": "$HOSTSTATE$", "output": "$HOSTOUTPUT$", "host_ack_author": "$HOSTACKAUTHOR$", "service_ack_author": "$SERVICEACKAUTHOR$"}' | fedmsg-logger --cert-prefix nagios --modname nagios --topic host.state.change --json-input
+	}
+
+# 'notify-by-epager' command definition
+define command{
+	command_name	notify-by-epager
+	command_line	/usr/bin/printf "%b" "Service: $SERVICEDESC$\nHost: $HOSTNAME$\nInfo: $SERVICEOUTPUT$\nSource: $$(hostname -s)\nDate: $LONGDATETIME$" | /bin/mail -s "$NOTIFICATIONTYPE$: $HOSTALIAS$/$SERVICEDESC$ is $SERVICESTATE$" $CONTACTPAGER$
+	}
+
+
+# 'notify-by-fedmsg' command definition
+define command{
+	command_name	notify-by-fedmsg
+    command_line    /usr/bin/echo '{"type": "$NOTIFICATIONTYPE$", "host": "$HOSTALIAS$", "state": "$SERVICESTATE$", "service": "$SERVICEDESC$", "output": "$SERVICEOUTPUT$", "host_ack_author": "$HOSTACKAUTHOR$", "service_ack_author": "$SERVICEACKAUTHOR$"}' | fedmsg-logger --cert-prefix nagios --modname nagios --topic service.state.change --json-input
+	}
+
+# 'notify-by-xmpp' command definition
+define command{
+	command_name	notify-by-xmpp
+	command_line	/usr/local/bin/xmppsend -a /etc/nagios/private/xmppnagios.ini  "Service: $SERVICEDESC$\nHost: $HOSTNAME$\nInfo: $SERVICEOUTPUT$\nDate: $LONGDATETIME$"  $CONTACTEMAIL$
+	}
+
+
+# 'host-notify-by-xmpp' command definition
+define command{
+	command_name	host-notify-by-xmpp
+	command_line	/usr/local/bin/xmppsend -a /etc/nagios/private/xmppnagios.ini  "Host '$HOSTALIAS$' is $HOSTSTATE$\nInfo: $HOSTOUTPUT$\nDate: $LONGDATETIME$"   $CONTACTEMAIL$
+	}
+
+
+
--- a/roles/nagios_server/files/nagios/commands/nrpe.cfg
+++ b/roles/nagios_server/files/nagios/commands/nrpe.cfg
@ -0,0 +1,17 @@
+# 'test nrpe'
+define command{
+        command_name    test_nrpe
+        command_line    $USER1$/check_nrpe -t 30 -H $HOSTADDRESS$
+
+}
+# 'check by nrpe'
+define command{
+        command_name    check_by_nrpe
+        command_line    $USER1$/check_nrpe -t 30 -H $HOSTADDRESS$ -c $ARG1$
+}
+
+# 'check-host-alive-nrpe' is better for hosts that are on vpn.
+define command{
+        command_name    check-host-alive-nrpe
+        command_line    $USER1$/check_nrpe -t 30 -H $HOSTADDRESS$
+        }
--- a/roles/nagios_server/files/nagios/commands/perfdata.cfg
+++ b/roles/nagios_server/files/nagios/commands/perfdata.cfg
@ -0,0 +1,26 @@
+################################################################################
+#
+# SAMPLE PERFORMANCE DATA COMMANDS
+#
+# These are sample performance data commands that can be used to send performance
+# data output to two text files (one for hosts, another for services).  If you
+# plan on simply writing performance data out to a file, consider using the 
+# host_perfdata_file and service_perfdata_file options in the main config file.
+#
+################################################################################
+
+
+# 'process-host-perfdata' command definition
+define command{
+	command_name	process-host-perfdata
+	command_line	/usr/bin/printf "%b" "$LASTHOSTCHECK$\t$HOSTNAME$\t$HOSTSTATE$\t$HOSTATTEMPT$\t$HOSTSTATETYPE$\t$HOSTEXECUTIONTIME$\t$HOSTOUTPUT$\t$HOSTPERFDATA$\n" >> /var/log/nagios/host-perfdata.out
+	}
+
+
+# 'process-service-perfdata' command definition
+define command{
+	command_name	process-service-perfdata
+	command_line	/usr/bin/printf "%b" "$LASTSERVICECHECK$\t$HOSTNAME$\t$SERVICEDESC$\t$SERVICESTATE$\t$SERVICEATTEMPT$\t$SERVICESTATETYPE$\t$SERVICEEXECUTIONTIME$\t$SERVICELATENCY$\t$SERVICEOUTPUT$\t$SERVICEPERFDATA$\n" >> /var/log/nagios/service-perfdata.out
+	}
+
+
--- a/roles/nagios_server/files/nagios/commands/ping.cfg
+++ b/roles/nagios_server/files/nagios/commands/ping.cfg
@ -0,0 +1,31 @@
+# This command checks to see if a host is "alive" by pinging it
+# The check must result in a 100% packet loss or 5 second (3000ms) round trip 
+# average time to produce a critical error.
+# Note: Only one ICMP echo packet is sent (determined by the '-p 1' argument)
+
+# 'check-host-alive' command definition
+define command{
+        command_name    check-host-alive
+        command_line    $USER1$/check_ping -H $HOSTADDRESS$ -w 3000.0,80% -c 5000.0,100% -p 5
+        }
+
+define command{
+        command_name    check-host-alive4
+        command_line    $USER1$/check_ping -4 -H $HOSTADDRESS$ -w 3000.0,80% -c 5000.0,100% -p 2
+        }
+
+define command{
+        command_name    check-host-alive6
+        command_line    $USER1$/check_ping -6 -H $HOSTADDRESS$ -w 3000.0,80% -c 5000.0,100% -p 2
+        }
+
+# 'check_ping' command definition
+define command{
+        command_name    check_ping4
+        command_line    $USER1$/check_ping -4 -H $HOSTADDRESS$ -w $ARG1$ -c $ARG2$ -p 5
+        }
+
+define command{
+        command_name    check_ping6
+        command_line    $USER1$/check_ping -6 -H $HOSTADDRESS$ -w $ARG1$ -c $ARG2$ -p 5
+        }
--- a/roles/nagios_server/files/nagios/commands/postgres.cfg
+++ b/roles/nagios_server/files/nagios/commands/postgres.cfg
@ -0,0 +1,5 @@
+# 'pgsql'
+define command{
+	command_name	check_pgsql
+	command_line	$USER1$/check_pgsql -H $HOSTADDRESS$ -d $ARG1$ -p '{{nagios_db_user_password}}' --logname 'nagiosuser'
+}
--- a/roles/nagios_server/files/nagios/commands/rsyslog.cfg
+++ b/roles/nagios_server/files/nagios/commands/rsyslog.cfg
@ -0,0 +1,28 @@
+################################################################################
+# COMMAND DEFINITIONS
+#
+# SYNTAX:
+#
+#	define command{
+#               template      <templatename>
+#		name          <objectname>
+#               command_name  <commandname>
+#               command_line  <commandline>
+#               }
+#
+# WHERE:
+#
+# <templatename> = object name of another command definition that should be
+#                  used as a template for this definition (optional)
+# <objectname>   = object name of command definition, referenced by other
+#                  command definitions that use it as a template (optional)
+# <commandname>  = name of the command, as recognized/used by Nagios
+# <commandline>  = command line
+#
+################################################################################
+
+
+define command {
+        command_name    restart_rsyslog
+        command_line    $USER1$/restart_rsyslog $SERVICESTATE$ $SERVICESTATETYPE$ $SERVICEATTEMPT$ $HOSTADDRESS$ "$HOSTALIAS$" "$SERVICEDESC$" "$SERVICESTATE$"
+}
--- a/roles/nagios_server/files/nagios/commands/smtp.cfg
+++ b/roles/nagios_server/files/nagios/commands/smtp.cfg
@ -0,0 +1,12 @@
+# 'check_smtp' command definition
+define command{
+        command_name    check_smtp
+        command_line    $USER1$/check_smtp -H $HOSTADDRESS$
+        }
+
+
+# 'check_email_delivery' command definition
+define command{
+        command_name    check_email_delivery
+        command_line    $USER1$/check_email_delivery_epn -H $ARG1$ --mailto $ARG2$ --mailfrom $ARG3$ --username $ARG4$ --password $ARG5$ -w $ARG6$ -c $ARG7$
+}
--- a/roles/nagios_server/files/nagios/commands/ssh.cfg
+++ b/roles/nagios_server/files/nagios/commands/ssh.cfg
@ -0,0 +1,22 @@
+# 'check_ssh' command definition
+define command{
+	command_name	check_ssh
+	command_line	$USER1$/check_ssh -H $HOSTADDRESS$
+}
+
+
+define command {
+        command_name    check_by_ssh_check_raid
+        command_line    $USER1$/check_by_ssh -t 30 -H $HOSTADDRESS$ "$USER1$/check_raid.py"
+}
+
+define command {
+        command_name    check_by_ssh_check_disk
+        command_line    $USER1$/check_by_ssh -t 30 -H $HOSTADDRESS$ "$USER1$/check_disk -w $ARG1$% -c $ARG2$% -p $ARG3$"
+}
+
+# 'check_postgres_conns' command definition
+define command{
+        command_name    check_postgres_conns
+        command_line    $USER1$/check_by_ssh -t 30 -H $HOSTADDRESS$ "$USER1$/check_procs -u postgres -w $ARG1$ -c $ARG2$ -a $ARG3$"
+}
--- a/roles/nagios_server/files/nagios/commands/tcp.cfg
+++ b/roles/nagios_server/files/nagios/commands/tcp.cfg
@ -0,0 +1,6 @@
+
+# 'check_tcp' command definition
+define command{
+	command_name	check_tcp
+	command_line	$USER1$/check_tcp -H $HOSTADDRESS$ -p $ARG1$
+}
--- a/roles/nagios_server/files/nagios/commands/testcloud.cfg
+++ b/roles/nagios_server/files/nagios/commands/testcloud.cfg
@ -0,0 +1,5 @@
+# 'check_testcloud'
+define command{
+        command_name    check_testcloud
+        command_line    $USER1$/check_testcloud
+}
--- a/roles/nagios_server/files/nagios/commands/udp.cfg
+++ b/roles/nagios_server/files/nagios/commands/udp.cfg
@ -0,0 +1,5 @@
+# 'check_udp' command definition
+define command{
+	command_name	check_udp
+	command_line	$USER1$/check_udp -H $HOSTADDRESS$ -p $ARG1$
+	}
--- a/roles/nagios_server/files/nagios/commands/unbound.cfg
+++ b/roles/nagios_server/files/nagios/commands/unbound.cfg
@ -0,0 +1,12 @@
+# 'check_unbound_80' command definition
+define command{
+        command_name    check_unbound_80
+        command_line    $USER1$/check_dig -H $HOSTADDRESS$ -w 5 -c 9 -p 80 -l $ARG1$ -A "+tcp"
+        }
+
+
+# 'check_unbound_443' command definition
+define command{
+        command_name    check_unbound_443
+        command_line    $USER1$/check_dig_ssl -H $HOSTADDRESS$ -w 5 -c 9 -p 443 -L $ARG1$ -l $ARG2$ -A "+tcp"
+        }
--- a/roles/nagios_server/files/nagios/configs/escalations.cfg
+++ b/roles/nagios_server/files/nagios/configs/escalations.cfg
@ -0,0 +1,22 @@
+define hostescalation{
+	host_name	*
+	hostgroup_name	*
+	contact_groups	fedora-sysadmin-email,fedora-sysadmin-emergency,fedora-sysadmin-ircbot
+	first_notification	2
+	last_notification	0
+	notification_interval	60
+	escalation_period	24x7
+	escalation_options	d,u,r
+}
+
+
+define serviceescalation{
+	host_name	*
+	service_description	*
+	contact_groups	fedora-sysadmin-email,fedora-sysadmin-emergency,fedora-sysadmin-ircbot
+	first_notification	2
+	last_notification	0
+	notification_interval	60
+	escalation_period	24x7
+	escalation_options	w,u,c,r
+}
--- a/roles/nagios_server/files/nagios/configs/minimal.cfg
+++ b/roles/nagios_server/files/nagios/configs/minimal.cfg
@ -0,0 +1,362 @@
+###############################################################################
+# MINIMAL.CFG
+#
+# MINIMALISTIC OBJECT CONFIG FILE (Template-Based Object File Format)
+#
+# Last Modified: 08-10-2005
+#
+#
+# NOTE: This config file is intended to be used to test a Nagios installation
+#       that has been compiled with support for the template-based object
+#       configuration files.
+#
+#       This config file is intended to servce as an *extremely* simple 
+#       example of how you can create your object configuration file(s).
+#       If you're interested in more complex object configuration files for
+#       Nagios, look in the sample-config/template-object/ subdirectory of
+#       the distribution.
+#
+###############################################################################
+
+
+
+###############################################################################
+###############################################################################
+#
+# TIME PERIODS
+#
+###############################################################################
+###############################################################################
+
+# This defines a timeperiod where all times are valid for checks, 
+# notifications, etc.  The classic "24x7" support nightmare. :-)
+
+define timeperiod{
+        timeperiod_name 24x7
+        alias           24 Hours A Day, 7 Days A Week
+        sunday          00:00-24:00
+        monday          00:00-24:00
+        tuesday         00:00-24:00
+        wednesday       00:00-24:00
+        thursday        00:00-24:00
+        friday          00:00-24:00
+        saturday        00:00-24:00
+        }
+
+
+
+
+###############################################################################
+###############################################################################
+#
+# COMMANDS
+#
+###############################################################################
+###############################################################################
+
+# This is a sample service notification command that can be used to send email 
+# notifications (about service alerts) to contacts.
+# 'check_ssh' command definition
+define command{
+	command_name	notify-by-email
+	command_line	/usr/bin/printf "%b" "***** Nagios  *****\n\nNotification Type: $NOTIFICATIONTYPE$\n\nService: $SERVICEDESC$\nHost: $HOSTALIAS$\nAddress: $HOSTADDRESS$\nState: $SERVICESTATE$\n\nDate/Time: $LONGDATETIME$\n\nAdditional Info:\n\n$OUTPUT$" | /bin/mail -s "** $NOTIFICATIONTYPE$ alert - $HOSTALIAS$/$SERVICEDESC$ is $SERVICESTATE$ **" $CONTACTEMAIL$
+	}
+
+
+# This is a sample host notification command that can be used to send email 
+# notifications (about host alerts) to contacts.
+
+define command{
+	command_name	host-notify-by-email
+	command_line	/usr/bin/printf "%b" "***** Nagios  *****\n\nNotification Type: $NOTIFICATIONTYPE$\nHost: $HOSTNAME$\nState: $HOSTSTATE$\nAddress: $HOSTADDRESS$\nInfo: $OUTPUT$\n\nDate/Time: $LONGDATETIME$\n" | /bin/mail -s "Host $HOSTSTATE$ alert for $HOSTNAME$!" $CONTACTEMAIL$
+	}
+
+
+# Command to check to see if a host is "alive" (up) by pinging it
+
+define command{
+        command_name    check-host-alive
+        command_line    $USER1$/check_ping -4 -H $HOSTADDRESS$ -w 300,99% -c 500,100% -p 2
+        }
+
+
+# Generic command to check a device by pinging it
+
+define command{
+	command_name	check_ping
+	command_line	$USER1$/check_ping -4 -H $HOSTADDRESS$ -w $ARG1$ -c $ARG2$ -p 5
+	}
+
+
+# Command used to check disk space usage on local partitions
+
+define command{
+	command_name	check_local_disk
+	command_line	$USER1$/check_disk -w $ARG1$ -c $ARG2$ -p $ARG3$
+	}
+
+
+# Command used to check the number of currently logged in users on the
+# local machine
+
+define command{
+	command_name	check_local_users
+	command_line	$USER1$/check_users -w $ARG1$ -c $ARG2$
+	}
+
+
+# Command to check the number of running processing on the local machine
+
+define command{
+	command_name	check_local_procs
+	command_line	$USER1$/check_procs -w $ARG1$ -c $ARG2$
+	}
+
+
+# Command to check the load on the local machine
+
+define command{
+	command_name	check_local_load
+	command_line	$USER1$/check_load -w $ARG1$ -c $ARG2$
+	}
+
+
+
+###############################################################################
+###############################################################################
+#
+# CONTACTS
+#
+###############################################################################
+###############################################################################
+
+# In this simple config file, a single contact will receive all alerts.
+# This assumes that you have an account (or email alias) called
+# "nagios-admin" on the local host.
+
+define contact{
+        contact_name                    nagios-admin
+        alias                           Nagios Admin
+        service_notification_period     24x7
+        host_notification_period        24x7
+        service_notification_options    w,u,c,r
+        host_notification_options       d,r
+        service_notification_commands   notify-by-email
+        host_notification_commands      host-notify-by-email
+        email                           admin@fedoraproject.org
+        }
+
+
+
+###############################################################################
+###############################################################################
+#
+# CONTACT GROUPS
+#
+###############################################################################
+###############################################################################
+
+# We only have one contact in this simple configuration file, so there is
+# no need to create more than one contact group.
+
+define contactgroup{
+        contactgroup_name       admins
+        alias                   Nagios Administrators
+        members                 nagios-admin
+        }
+
+
+
+###############################################################################
+###############################################################################
+#
+# HOSTS
+#
+###############################################################################
+###############################################################################
+
+# Generic host definition template - This is NOT a real host, just a template!
+
+define host{
+        name                            generic-host    ; The name of this host template
+        notifications_enabled           1       ; Host notifications are enabled
+        event_handler_enabled           1       ; Host event handler is enabled
+        flap_detection_enabled          1       ; Flap detection is enabled
+        failure_prediction_enabled      1       ; Failure prediction is enabled
+        process_perf_data               1       ; Process performance data
+        retain_status_information       1       ; Retain status information across program restarts
+        retain_nonstatus_information    1       ; Retain non-status information across program restarts
+        register                        0       ; DONT REGISTER THIS DEFINITION - ITS NOT A REAL HOST, JUST A TEMPLATE!
+        }
+
+
+# Since this is a simple configuration file, we only monitor one host - the
+# local host (this machine).
+
+define host{
+        use                     generic-host            ; Name of host template to use
+        host_name               localhost
+        alias                   localhost
+        address                 127.0.0.1
+        check_command           check-host-alive
+        max_check_attempts      10
+        notification_interval   120
+        notification_period     24x7
+        notification_options    d,r
+        contact_groups  admins
+        }
+
+
+
+###############################################################################
+###############################################################################
+#
+# HOST GROUPS
+#
+###############################################################################
+###############################################################################
+
+# We only have one host in our simple config file, so there is no need to
+# create more than one hostgroup.
+
+define hostgroup{
+        hostgroup_name  test
+        alias           Test Servers
+        members         localhost
+        }
+
+
+
+###############################################################################
+###############################################################################
+#
+# SERVICES
+#
+###############################################################################
+###############################################################################
+
+# Generic service definition template - This is NOT a real service, just a template!
+
+define service{
+        name                            generic-service ; The 'name' of this service template
+        active_checks_enabled           1       ; Active service checks are enabled
+        passive_checks_enabled          1       ; Passive service checks are enabled/accepted
+        parallelize_check               1       ; Active service checks should be parallelized (disabling this can lead to major performance problems)
+        obsess_over_service             1       ; We should obsess over this service (if necessary)
+        check_freshness                 0       ; Default is to NOT check service 'freshness'
+        notifications_enabled           1       ; Service notifications are enabled
+        event_handler_enabled           1       ; Service event handler is enabled
+        flap_detection_enabled          1       ; Flap detection is enabled
+        failure_prediction_enabled      1       ; Failure prediction is enabled
+        process_perf_data               1       ; Process performance data
+        retain_status_information       1       ; Retain status information across program restarts
+        retain_nonstatus_information    1       ; Retain non-status information across program restarts
+        register                        0       ; DONT REGISTER THIS DEFINITION - ITS NOT A REAL SERVICE, JUST A TEMPLATE!
+        }
+
+
+# Define a service to "ping" the local machine
+
+define service{
+        use                             generic-service         ; Name of service template to use
+        host_name                       localhost
+        service_description             PING
+        is_volatile                     0
+        check_period                    24x7
+        max_check_attempts              4
+        normal_check_interval           5
+        retry_check_interval            1
+        contact_groups                  admins
+	notification_options		w,u,c,r
+        notification_interval           960
+        notification_period             24x7
+	check_command			check_ping!100.0,20%!500.0,60%
+        }
+
+
+# Define a service to check the disk space of the root partition
+# on the local machine.  Warning if < 20% free, critical if
+# < 10% free space on partition.
+
+define service{
+        use                             generic-service         ; Name of service template to use
+        host_name                       localhost
+        service_description             Root Partition
+        is_volatile                     0
+        check_period                    24x7
+        max_check_attempts              4
+        normal_check_interval           5
+        retry_check_interval            1
+        contact_groups                  admins
+	notification_options		w,u,c,r
+        notification_interval           960
+        notification_period             24x7
+	check_command			check_local_disk!20%!10%!/
+        }
+
+
+
+# Define a service to check the number of currently logged in
+# users on the local machine.  Warning if > 20 users, critical
+# if > 50 users.
+
+define service{
+        use                             generic-service         ; Name of service template to use
+        host_name                       localhost
+        service_description             Current Users
+        is_volatile                     0
+        check_period                    24x7
+        max_check_attempts              4
+        normal_check_interval           5
+        retry_check_interval            1
+        contact_groups                  admins
+	notification_options		w,u,c,r
+        notification_interval           960
+        notification_period             24x7
+	check_command			check_local_users!20!50
+        }
+
+
+# Define a service to check the number of currently running procs
+# on the local machine.  Warning if > 250 processes, critical if
+# > 400 users.
+
+define service{
+        use                             generic-service         ; Name of service template to use
+        host_name                       localhost
+        service_description             Total Processes
+        is_volatile                     0
+        check_period                    24x7
+        max_check_attempts              4
+        normal_check_interval           5
+        retry_check_interval            1
+        contact_groups                  admins
+	notification_options		w,u,c,r
+        notification_interval           960
+        notification_period             24x7
+	check_command			check_local_procs!250!400
+        }
+
+
+
+# Define a service to check the load on the local machine. 
+
+define service{
+        use                             generic-service         ; Name of service template to use
+        host_name                       localhost
+        service_description             Current Load
+        is_volatile                     0
+        check_period                    24x7
+        max_check_attempts              4
+        normal_check_interval           5
+        retry_check_interval            1
+        contact_groups                  admins
+	notification_options		w,u,c,r
+        notification_interval           960
+        notification_period             24x7
+	check_command			check_local_load!5.0,4.0,3.0!10.0,6.0,4.0
+        }
+
+
+
+# EOF
--- a/roles/nagios_server/files/nagios/configs/nagios.cfg
+++ b/roles/nagios_server/files/nagios/configs/nagios.cfg
--- a/roles/nagios_server/files/nagios/configs/timeperiods.cfg
+++ b/roles/nagios_server/files/nagios/configs/timeperiods.cfg
@ -0,0 +1,135 @@
+###############################################################################
+# TIMEPERIODS.CFG - SAMPLE TIMEPERIOD DEFINITIONS
+#
+#
+# NOTES: This config file provides you with some example timeperiod definitions
+#        that you can reference in host, service, contact, and dependency
+#        definitions.
+#       
+#        You don't need to keep timeperiods in a separate file from your other
+#        object definitions.  This has been done just to make things easier to
+#        understand.
+#
+###############################################################################
+
+
+
+###############################################################################
+###############################################################################
+#
+# TIME PERIODS
+#
+###############################################################################
+###############################################################################
+
+
+define timeperiod{
+        timeperiod_name 24x7
+        alias           24 Hours A Day, 7 Days A Week
+        sunday          00:00-24:00
+        monday          00:00-24:00
+        tuesday         00:00-24:00
+        wednesday       00:00-24:00
+        thursday        00:00-24:00
+        friday          00:00-24:00
+        saturday        00:00-24:00
+}
+
+define timeperiod{
+        timeperiod_name 16x7
+        alias           15 Hours a day, 7 days a week
+        sunday          00:00-04:00,13:00-24:00
+        monday          00:00-04:00,13:00-24:00
+        tuesday         00:00-04:00,13:00-24:00
+        wednesday       00:00-04:00,13:00-24:00
+        thursday        00:00-04:00,13:00-24:00
+        friday          00:00-04:00,13:00-24:00
+        saturday        00:00-04:00,13:00-24:00
+        }
+
+define timeperiod{
+        timeperiod_name 16x7-AU
+        alias           15 Hours a day, 7 days a week
+        sunday          00:00-14:00,22:00-24:00
+        monday          00:00-14:00,22:00-24:00
+        tuesday         00:00-14:00,22:00-24:00
+        wednesday       00:00-14:00,22:00-24:00
+        thursday        00:00-14:00,22:00-24:00
+        friday          00:00-14:00,22:00-24:00
+        saturday        00:00-14:00,22:00-24:00
+        }
+
+
+# Members of sysadmin-main already get nagios messages
+define timeperiod{
+        timeperiod_name never
+        alias           Never
+        }
+
+# This defines a timeperiod where all times are valid for checks, 
+# notifications, etc.  The classic "24x7" support nightmare. :-)
+define timeperiod{
+        timeperiod_name 24x7
+        alias           24 Hours A Day, 7 Days A Week
+        sunday          00:00-24:00
+        monday          00:00-24:00
+        tuesday         00:00-24:00
+        wednesday       00:00-24:00
+        thursday        00:00-24:00
+        friday          00:00-24:00
+        saturday        00:00-24:00
+        }
+
+
+# 'workhours' timeperiod definition
+define timeperiod{
+        timeperiod_name workhours
+        alias           Normal Work Hours
+        monday          09:00-17:00
+        tuesday         09:00-17:00
+        wednesday       09:00-17:00
+        thursday        09:00-17:00
+        friday          09:00-17:00
+        }
+
+
+# 'none' timeperiod definition
+define timeperiod{
+        timeperiod_name none
+        alias           No Time Is A Good Time
+        }
+
+# Some U.S. holidays
+# Note: The timeranges for each holiday are meant to *exclude* the holidays from being
+# treated as a valid time for notifications, etc.  You probably don't want your pager 
+# going off on New Year's.  Although you're employer might... :-)
+define timeperiod{
+        name                    us-holidays
+        timeperiod_name         us-holidays
+        alias                   U.S. Holidays
+
+        january 1               00:00-00:00     ; New Years
+        monday -1 may           00:00-00:00     ; Memorial Day (last Monday in May)
+        july 4                  00:00-00:00     ; Independence Day
+        monday 1 september      00:00-00:00     ; Labor Day (first Monday in September)
+        thursday 4 november     00:00-00:00     ; Thanksgiving (4th Thursday in November)
+        december 25             00:00-00:00     ; Christmas
+        }
+
+
+# This defines a modified "24x7" timeperiod that covers every day of the
+# year, except for U.S. holidays (defined in the timeperiod above).
+define timeperiod{
+        timeperiod_name 24x7_sans_holidays
+        alias           24x7 Sans Holidays
+
+        use             us-holidays             ; Get holiday exceptions from other timeperiod
+
+        sunday          00:00-24:00
+        monday          00:00-24:00
+        tuesday         00:00-24:00
+        wednesday       00:00-24:00
+        thursday        00:00-24:00
+        friday          00:00-24:00
+        saturday        00:00-24:00
+        }
--- a/roles/nagios_server/files/nagios/contactgroups/bodhi.cfg
+++ b/roles/nagios_server/files/nagios/contactgroups/bodhi.cfg
@ -0,0 +1,5 @@
+define contactgroup {
+        contactgroup_name       bodhi
+        alias                   Bodhi Notifications
+        members                 bowlofeggs
+}
--- a/roles/nagios_server/files/nagios/contactgroups/build-sysadmin-email.cfg
+++ b/roles/nagios_server/files/nagios/contactgroups/build-sysadmin-email.cfg
@ -0,0 +1,5 @@
+#define contactgroup{
+#        contactgroup_name       build-sysadmin-email
+#        alias                   Build Sysadmin Email Contacts
+#        members kevin,aditya
+#        }
--- a/roles/nagios_server/files/nagios/contactgroups/fedora-sysadmin-email.cfg
+++ b/roles/nagios_server/files/nagios/contactgroups/fedora-sysadmin-email.cfg
@ -0,0 +1,5 @@
+define contactgroup{
+        contactgroup_name       fedora-sysadmin-email
+        alias                   Fedora Sysadmin Email Contacts
+	members admin,kevin,puiterwijkp,smooge,ausil,jcollie,nb,rigeld2,codeblock,hvivani
+        }
--- a/roles/nagios_server/files/nagios/contactgroups/fedora-sysadmin-ircbot.cfg
+++ b/roles/nagios_server/files/nagios/contactgroups/fedora-sysadmin-ircbot.cfg
@ -0,0 +1,5 @@
+define contactgroup{
+        contactgroup_name       fedora-sysadmin-ircbot
+        alias                   Fedora Sysadmin irc Contacts
+        members ircbot,fedmsg
+        }
--- a/roles/nagios_server/files/nagios/contactgroups/fedora-sysadmin-pager.cfg
+++ b/roles/nagios_server/files/nagios/contactgroups/fedora-sysadmin-pager.cfg
@ -0,0 +1,10 @@
+define contactgroup{
+    contactgroup_name       fedora-sysadmin-pager
+    alias                   Fedora Sysadmin Pager Contacts
+    members smoogep,kevinp,puiterwijkp
+}
+define contactgroup{
+    contactgroup_name       fedora-sysadmin-emergency
+    alias                   Fedora Sysadmin Pager Contacts
+    members smooge-emergency,kevin-emergency,puiterwijk-emergency
+}
--- a/roles/nagios_server/files/nagios/contactgroups/null.cfg
+++ b/roles/nagios_server/files/nagios/contactgroups/null.cfg
@ -0,0 +1,5 @@
+define contactgroup{
+    contactgroup_name       null
+    alias                   null
+    members null
+}
--- a/Show more
+++ b/Show more
				`@ -0,0 +1 @@`
				`command[check_autocloud_proc]=/usr/lib64/nagios/plugins/check_procs -c 1:1 -C 'python' -a 'autocloud_job.py' -u root`
				`@ -0,0 +1 @@`
				`command[check_cron]={{ libdir }}/nagios/plugins/check_procs -c 1:15 -C 'crond' -u root`
				`@ -0,0 +1 @@`
				`command[check_fedmsg_gateway_proc]={{ libdir }}/nagios/plugins/check_procs -c 1:1 -C 'fedmsg-gateway' -u fedmsg`
				`@ -0,0 +1 @@`
				`command[check_fedmsg_hub_proc]={{ libdir }}/nagios/plugins/check_procs -c 1:1 -C 'fedmsg-hub' -u fedmsg`
				`@ -0,0 +1 @@`
				`command[check_fedmsg_hub_procs_bugyou]={{ libdir }}/nagios/plugins/check_procs -c 3:3 -C 'fedmsg-hub' -u fedmsg`
				`@ -0,0 +1 @@`
				`command[check_fedmsg_irc_proc]={{ libdir }}/nagios/plugins/check_procs -c 1:1 -C 'fedmsg-irc' -u fedmsg`
				`@ -0,0 +1 @@`
				`command[check_fedmsg_masher_proc]={{ libdir }}/nagios/plugins/check_procs -c 1:1 -C 'fedmsg-hub' -u apache`
				`@ -0,0 +1 @@`
				`command[check_fedmsg_relay_proc]={{ libdir }}/nagios/plugins/check_procs -c 1:1 -C 'fedmsg-relay' -u fedmsg`
				`@ -0,0 +1 @@`
				`command[check_haproxy_conns]=/usr/lib64/nagios/plugins/check_haproxy_conns.py`
				`@ -0,0 +1 @@`
				`command[check_haproxy_mirrorlist]=/usr/lib64/nagios/plugins/check_haproxy_mirrorlist.py`