diff --git a/roles/nagios_client/files/scripts/check_memcache_connect b/roles/nagios_client/files/scripts/check_memcache_connect new file mode 100644 index 0000000000..7c472e3ec8 --- /dev/null +++ b/roles/nagios_client/files/scripts/check_memcache_connect @@ -0,0 +1,24 @@ +#!/bin/bash +# +# 2014-12-19 +# Author: Ralph Bean + +# exit codes +ok=0 +warn=1 +crit=2 +unkn=3 + +# Right now we just check to see if we can even run this command without +# hanging and timing out. In the future, we could parse stdout for more +# fine-grained information. +echo stats | nc 127.0.0.1 11211 > /dev/null +status=$? + +if [ $status -ne 0 ]; then + echo "CRIT: stats command got status code $status" + exit $crit +else + echo "OK: stats command got status code $status" + exit $ok +fi diff --git a/roles/nagios_client/tasks/main.yml b/roles/nagios_client/tasks/main.yml index 6c91ddab69..aa9b6c2223 100644 --- a/roles/nagios_client/tasks/main.yml +++ b/roles/nagios_client/tasks/main.yml @@ -31,6 +31,7 @@ - check_fedmsg_producers_consumers.py - check_supybot_plugin - check_datanommer_timesince.py + - check_memcache_connect when: not inventory_hostname.startswith('noc') tags: - nagios_client diff --git a/roles/nagios_client/templates/check_memcache.cfg.j2 b/roles/nagios_client/templates/check_memcache.cfg.j2 index b350a654e3..b0ec100a5d 100644 --- a/roles/nagios_client/templates/check_memcache.cfg.j2 +++ b/roles/nagios_client/templates/check_memcache.cfg.j2 @@ -1,2 +1,2 @@ command[check_memcache]=/usr/lib64/nagios/plugins/check_procs -c 1:1 -a '/usr/bin/memcached' -u memcached - +command[check_memcache_connect]=/usr/lib64/nagios/plugins/check_memcache_connect diff --git a/roles/nagios_server/files/nagios/services/memcached.cfg b/roles/nagios_server/files/nagios/services/memcached.cfg index 9f497b50c6..814a5a8530 100644 --- a/roles/nagios_server/files/nagios/services/memcached.cfg +++ b/roles/nagios_server/files/nagios/services/memcached.cfg @@ -1,12 +1,24 @@ define service { host_name memcached01 - service_description Check memcached daemon + service_description Check for the presence of the memcached daemon check_command check_by_nrpe!check_memcache use defaulttemplate } define service { host_name memcached02 - service_description Check memcached daemon + service_description Check for the presence of the memcached daemon check_command check_by_nrpe!check_memcache use defaulttemplate } +define service { + host_name memcached01 + service_description Check for connectivity to the memcached daemon + check_command check_by_nrpe!check_memcache_connect + use defaulttemplate +} +define service { + host_name memcached02 + service_description Check for connectivity to the memcached daemon + check_command check_by_nrpe!check_memcache_connect + use defaulttemplate +} diff --git a/roles/nagios_server/files/nrpe.cfg b/roles/nagios_server/files/nrpe.cfg index 86af64b5da..4fb1cdb424 100644 --- a/roles/nagios_server/files/nrpe.cfg +++ b/roles/nagios_server/files/nrpe.cfg @@ -238,6 +238,7 @@ command[check_fcomm_queue]=/usr/lib64/nagios/plugins/check_fcomm_queue command[check_redis_proc]=/usr/lib64/nagios/plugins/check_procs -c 1:1 -C 'redis-server' -u redis command[check_openvpn_link]=/usr/lib64/nagios/plugins/check_ping -H 192.168.1.58 -w 375.0,20% -c 500,60% command[check_memcache]=/usr/lib64/nagios/plugins/check_procs -c 1:1 -a '/usr/bin/memcached' -u memcached +command[check_memcache_connect]=/usr/lib64/nagios/plugins/check_memcache_connect # The following are fedmsg/datanommer checks to be run on busgateway01. # They check for the time since the latest message in any particular category.