Make a zombie-infested group and set nrpe zombie check limits super high in that group so we don't have to hear about the latest outbreak.

This commit is contained in:
Kevin Fenzi 2016-05-27 17:49:51 +00:00
parent 23d3321a49
commit 0c2d5a7444
2 changed files with 18 additions and 0 deletions

View file

@ -1180,3 +1180,16 @@ osbs-master01.stg.phx2.fedoraproject.org
[docker-registry-stg]
docker-registry01.stg.phx2.fedoraproject.org
#
# Hosts in this group have zombie processes for various reasons
# and we want to not alert on those, so to the client nrpe.conf uses
# this group to denote those.
#
[zombie-infested]
# anon git via systemd socket seems to get zombies from time to time
pkgs02.phx2.fedoraproject.org
# the openstack 5.0 vnc console viewer causes bunches of Zombies
fed-cloud09.cloud.fedoraproject.org
# Ansible from time to time in large runs has zombie threads
batcave01.phx2.fedoraproject.org

View file

@ -201,7 +201,12 @@ include_dir=/etc/nrpe.d/
command[check_users]={{ libdir }}/nagios/plugins/check_users -w 5 -c 10
command[check_load]={{ libdir }}/nagios/plugins/check_load -w 15,10,5 -c 30,25,20
command[check_hda1]={{ libdir }}/nagios/plugins/check_disk -w 20% -c 10% -p /dev/hda1
{% if inventory_hostname not in groups['zombie-infested']
command[check_zombie_procs]={{ libdir }}/nagios/plugins/check_procs -w 5 -c 10 -s Z
{% else %}
# This host is prone to Zombies and we do not care or want to alert on it so we make the limits very high
command[check_zombie_procs]={{ libdir }}/nagios/plugins/check_procs -w 50000 -c 100000 -s Z
{% endif %}
command[check_total_procs]={{ libdir }}/nagios/plugins/check_procs -w {{ nrpe_procs_warn }} -c {{ nrpe_procs_crit }}