Make a zombie-infested group and set nrpe zombie check limits super high in that group so we don't have to hear about the latest outbreak.
This commit is contained in:
parent
23d3321a49
commit
0c2d5a7444
2 changed files with 18 additions and 0 deletions
|
@ -1180,3 +1180,16 @@ osbs-master01.stg.phx2.fedoraproject.org
|
|||
|
||||
[docker-registry-stg]
|
||||
docker-registry01.stg.phx2.fedoraproject.org
|
||||
|
||||
#
|
||||
# Hosts in this group have zombie processes for various reasons
|
||||
# and we want to not alert on those, so to the client nrpe.conf uses
|
||||
# this group to denote those.
|
||||
#
|
||||
[zombie-infested]
|
||||
# anon git via systemd socket seems to get zombies from time to time
|
||||
pkgs02.phx2.fedoraproject.org
|
||||
# the openstack 5.0 vnc console viewer causes bunches of Zombies
|
||||
fed-cloud09.cloud.fedoraproject.org
|
||||
# Ansible from time to time in large runs has zombie threads
|
||||
batcave01.phx2.fedoraproject.org
|
||||
|
|
|
@ -201,7 +201,12 @@ include_dir=/etc/nrpe.d/
|
|||
command[check_users]={{ libdir }}/nagios/plugins/check_users -w 5 -c 10
|
||||
command[check_load]={{ libdir }}/nagios/plugins/check_load -w 15,10,5 -c 30,25,20
|
||||
command[check_hda1]={{ libdir }}/nagios/plugins/check_disk -w 20% -c 10% -p /dev/hda1
|
||||
{% if inventory_hostname not in groups['zombie-infested']
|
||||
command[check_zombie_procs]={{ libdir }}/nagios/plugins/check_procs -w 5 -c 10 -s Z
|
||||
{% else %}
|
||||
# This host is prone to Zombies and we do not care or want to alert on it so we make the limits very high
|
||||
command[check_zombie_procs]={{ libdir }}/nagios/plugins/check_procs -w 50000 -c 100000 -s Z
|
||||
{% endif %}
|
||||
command[check_total_procs]={{ libdir }}/nagios/plugins/check_procs -w {{ nrpe_procs_warn }} -c {{ nrpe_procs_crit }}
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue