From 7ef93ded5dd346fa96eb5c9f9eed444345cb4d32 Mon Sep 17 00:00:00 2001 From: Kevin Fenzi Date: Mon, 17 Jun 2013 21:13:31 +0000 Subject: [PATCH] First cut at moving over nrpe client scripts. --- files/nagios/client/check_cron.cfg | 1 + files/nagios/client/check_disk.cfg | 1 + .../nagios/client/check_mirrorlist_cache.cfg | 1 + files/nagios/client/check_postfix_queue.cfg | 1 + files/nagios/client/check_raid.cfg | 1 + files/nagios/client/check_swap.cfg | 1 + .../nagios/client/scripts/check_postfix_queue | 49 +++++++++++++++++++ files/nagios/client/scripts/check_raid.py | 45 +++++++++++++++++ tasks/nagios_client.yml | 22 ++++++++- 9 files changed, 120 insertions(+), 2 deletions(-) create mode 100644 files/nagios/client/check_cron.cfg create mode 100644 files/nagios/client/check_disk.cfg create mode 100644 files/nagios/client/check_mirrorlist_cache.cfg create mode 100644 files/nagios/client/check_postfix_queue.cfg create mode 100644 files/nagios/client/check_raid.cfg create mode 100644 files/nagios/client/check_swap.cfg create mode 100755 files/nagios/client/scripts/check_postfix_queue create mode 100755 files/nagios/client/scripts/check_raid.py diff --git a/files/nagios/client/check_cron.cfg b/files/nagios/client/check_cron.cfg new file mode 100644 index 0000000000..e01f40799e --- /dev/null +++ b/files/nagios/client/check_cron.cfg @@ -0,0 +1 @@ +command[check_cron]=/usr/lib64/nagios/plugins/check_procs -c 1:10 -C 'crond' -u root diff --git a/files/nagios/client/check_disk.cfg b/files/nagios/client/check_disk.cfg new file mode 100644 index 0000000000..fa235c9245 --- /dev/null +++ b/files/nagios/client/check_disk.cfg @@ -0,0 +1 @@ +command[check_disk_/]=/usr/lib64/nagios/plugins/check_disk -w 15% -c 10% -p / diff --git a/files/nagios/client/check_mirrorlist_cache.cfg b/files/nagios/client/check_mirrorlist_cache.cfg new file mode 100644 index 0000000000..458184825d --- /dev/null +++ b/files/nagios/client/check_mirrorlist_cache.cfg @@ -0,0 +1 @@ +command[check_mirrorlist_cache]=/usr/lib64/nagios/plugins/check_file_age -w 86400 -c 129600 -f /var/lib/mirrormanager/mirrorlist_cache.pkl diff --git a/files/nagios/client/check_postfix_queue.cfg b/files/nagios/client/check_postfix_queue.cfg new file mode 100644 index 0000000000..5574698031 --- /dev/null +++ b/files/nagios/client/check_postfix_queue.cfg @@ -0,0 +1 @@ +command[check_postfix_queue]=/usr/lib64/nagios/plugins/check_postfix_queue -w 2 -c 5 diff --git a/files/nagios/client/check_raid.cfg b/files/nagios/client/check_raid.cfg new file mode 100644 index 0000000000..5c15d450ce --- /dev/null +++ b/files/nagios/client/check_raid.cfg @@ -0,0 +1 @@ +command[check_raid]=/usr/lib64/nagios/plugins/check_raid.py diff --git a/files/nagios/client/check_swap.cfg b/files/nagios/client/check_swap.cfg new file mode 100644 index 0000000000..bc4f19d5d5 --- /dev/null +++ b/files/nagios/client/check_swap.cfg @@ -0,0 +1 @@ +command[check_swap]=/usr/lib64/nagios/plugins/check_swap -w 15% -c 10% diff --git a/files/nagios/client/scripts/check_postfix_queue b/files/nagios/client/scripts/check_postfix_queue new file mode 100755 index 0000000000..44ab4445f9 --- /dev/null +++ b/files/nagios/client/scripts/check_postfix_queue @@ -0,0 +1,49 @@ +#!/bin/bash +# +# 19-07-2010 +# Author: Cherwin Nooitmeer +# + +# exit codes +e_ok=0 +e_warning=1 +e_critical=2 +e_unknown=3 + +# regular expression that matches queue IDs (e.g. D71EF7AC80F8) +queue_id='^[A-F0-9][A-F0-9][A-F0-9][A-F0-9][A-F0-9][A-F0-9][A-F0-9][A-F0-9][A-F0-9][A-F0-9]' + +usage="Invalid command line usage" + +if [ -z $1 ]; then + echo $usage + exit $e_unknown +fi + +while getopts ":w:c:" options +do + case $options in + w ) warning=$OPTARG ;; + c ) critical=$OPTARG ;; + * ) echo $usage + exit $e_unknown ;; + esac +done + +# determine queue size +qsize=$(mailq | egrep -c $queue_id) +if [ -z $qsize ] +then + exit $e_unknown +fi + +if [ $qsize -ge $critical ]; then + retval=$e_critical +elif [ $qsize -ge $warning ]; then + retval=$e_warning +elif [ $qsize -lt $warning ]; then + retval=$e_ok +fi + +echo "$qsize mail(s) in queue | mail_queue=$qsize" +exit $retval diff --git a/files/nagios/client/scripts/check_raid.py b/files/nagios/client/scripts/check_raid.py new file mode 100755 index 0000000000..48cddd93d4 --- /dev/null +++ b/files/nagios/client/scripts/check_raid.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python +# +# very simple python script to parse out /proc/mdstat +# and give results for nagios to monitor +# + +import sys +import string + +devices = [] + +try: + mdstat = string.split(open('/proc/mdstat').read(), '\n') +except IOError: + # seems we have no software raid on this machines + sys.exit(0) + +error = "" +i = 0 +for line in mdstat: + if line[0:2] == 'md': + device = string.split(line)[0] + devices.append(device) + status = string.split(mdstat[i+1])[3] + if string.count(status, "_"): + # see if we can figure out what's going on + err = string.split(mdstat[i+2]) + msg = "device=%s status=%s" % (device, status) + if len(err) > 0: + msg = msg + " rebuild=%s" % err[0] + + if not error: + error = msg + else: + error = error + ", " + msg + i = i + 1 + +if not error: + print "DEVICES %s OK" % " ".join(devices) + sys.exit(0) + +else: + print error + sys.exit(2) + diff --git a/tasks/nagios_client.yml b/tasks/nagios_client.yml index 20a050b439..21cd848a5c 100644 --- a/tasks/nagios_client.yml +++ b/tasks/nagios_client.yml @@ -17,6 +17,12 @@ tags: - packages +- name: install local nrpe check scripts that are not packaged + copy: src=$files/nagios/client/scripts/$item dest=/usr/lib64/nagios/plugins/$item + with_items: + - check_postfix_queue + - check_raid.py + # create dirs # puppet used to make /var/spool/nagios (owned by nagios.nagios) mode 750 # and /usr/lib/nagios/plugins (owned by root) mode 755 - but we don't know WHY @@ -35,9 +41,21 @@ tags: - config +- name: install nrpe client configs + copy: src=$files/nagios/client/$item dest=/etc/nrpe.d/$item + with_items: + - check_mirrorlist_cache.cfg + - check_raid.cfg + - check_cron.cfg + - check_disk.cfg + - check_swap.cfg + - check_postfix_queue.cfg + notify: + - restart nrpe + tags: + - config + - name: nrpe service start service: name=nrpe state=running enabled=true tags: - service - -