try and get nagios working on noc01.iad2

This commit is contained in:
Stephen Smoogen 2020-06-08 11:17:14 -04:00
parent ca8a5edc08
commit e26ead0f70
18 changed files with 886 additions and 6 deletions

View file

@ -1,19 +0,0 @@
#!/bin/bash
FAILURES=$(/usr/bin/wget --timeout=60 -q -O- http://koji.phx2.fedoraproject.org/koji/builds | /bin/grep -c failed.png)
WARNING=20
CRITICAL=25
if [ $FAILURES -gt $CRITICAL ]
then
echo "Koji: CRITICAL failed builds: $FAILURES"
exit 2
elif [ $FAILURES -gt $WARNING ]
then
echo "Koji: WARNING failed builds: $FAILURES"
exit 1
else
echo "Koji: OK failed builds: $FAILURES"
exit 0
fi

View file

@ -0,0 +1,27 @@
define service {
hostgroup_name basset
service_description mongo process
check_command check_by_nrpe!check_mongo_proc
use defaulttemplate
}
define service {
hostgroup_name basset
service_description rabbitmq process
check_command check_by_nrpe!check_rabbitmq_proc
use defaulttemplate
}
define service {
hostgroup_name basset
service_description basset worker processes
check_command check_by_nrpe!check_worker_proc
use defaulttemplate
}
define service {
hostgroup_name basset
service_description basset processing queue
check_command check_by_nrpe!check_basset_queue
use defaulttemplate
}

View file

@ -0,0 +1,6 @@
define service {
host_name certgetter01.iad2.fedoraproject.org
service_description certgetter-http
check_command check_http!certgetter01.iad2.fedoraproject.org
use defaulttemplate
}

View file

@ -0,0 +1,6 @@
define service {
host_name db03.iad2.fedoraproject.org
service_description Check MySQL Backup
check_command check_by_nrpe!check_mysql_backup
use defaulttemplate
}

View file

@ -0,0 +1,76 @@
define service {
hostgroup_name all, !mincheckgrp
service_description Disk_Space_/
check_command check_by_nrpe!check_disk_/
use disktemplate
}
define service {
hostgroup_name all, !mincheckgrp
service_description Disk Space /boot
check_command check_by_nrpe!check_disk_/boot
use disktemplate
}
define service {
hostgroup_name qahardware
service_description Disk Space /srv
check_command check_by_nrpe!check_disk_/srv
use disktemplate
}
define service {
host_name log01.iad2.fedoraproject.org
service_description Disk space /var/log
check_command check_by_nrpe!check_disk_/var/log
use disktemplate
}
define service {
hostgroup_name pkgs
service_description Check read-only filesystem
check_command check_by_nrpe!check_readonly_fs
use disktemplate
}
define service {
hostgroup_name pkgs
service_description Disk space /srv/cache/lookaside
check_command check_by_nrpe!check_disk_/srv/cache/lookaside
use disktemplate
}
define service {
hostgroup_name koji
service_description Disk space /
check_command check_by_nrpe!check_disk_/
use ppc-secondarytemplate
}
define service {
hostgroup_name retrace
service_description Disk space /
check_command check_by_nrpe!check_disk_/
use retracetemplate
}
define service {
hostgroup_name retrace
service_description Disk Space for huge /srv
check_command check_by_nrpe!check_disk_huge_/srv
use disktemplate
}
define service {
hostgroup_name people
service_description Disk space /project
check_command check_by_nrpe!check_disk_/project/
use disktemplate
}
define service {
hostgroup_name oci_registry
service_description Disk space /srv/registry
check_command check_by_nrpe!check_disk_/srv/registry
use disktemplate
}

View file

@ -0,0 +1,487 @@
## There are lots of different sections in this now-enormous file
## Each one starts with a 'BEGIN' comment.
# BEGIN, check for the existance of processes
define service {
host_name value01.iad2.fedoraproject.org
service_description Check for fedmsg-irc proc
check_command check_by_nrpe!check_fedmsg_irc_proc
use defaulttemplate
}
define service {
hostgroup_name proxies
service_description Check fedmsg-gateway consumers backlog
check_command check_by_nrpe!check_fedmsg_cbacklog_busgateway_gateway
use defaulttemplate
}
define service {
hostgroup_name proxies
service_description Check for existence fedmsg-gateway proc
check_command check_by_nrpe!check_fedmsg_gateway_proc
use defaulttemplate
}
define service {
hostgroup_name proxies
service_description Check fedmsg consumers and producers gateway
check_command check_by_nrpe!check_fedmsg_cp_busgateway_gateway
use defaulttemplate
}
define service {
hostgroup_name proxies
service_description Check fedmsg-gateway consumers exceptions
check_command check_by_nrpe!check_fedmsg_cexceptions_busgateway_gateway
use defaulttemplate
}
define service {
host_name busgateway01.iad2.fedoraproject.org
service_description Check for fedmsg-hub proc
check_command check_by_nrpe!check_fedmsg_hub_proc
use defaulttemplate
}
define service {
host_name busgateway01.iad2.fedoraproject.org
service_description Check for fedmsg-gateway proc
check_command check_by_nrpe!check_fedmsg_gateway_proc
use defaulttemplate
}
define service {
host_name busgateway01.iad2.fedoraproject.org
service_description Check for fedmsg-relay proc
check_command check_by_nrpe!check_fedmsg_relay_proc
use defaulttemplate
}
define service {
host_name badges-backend01.iad2.fedoraproject.org
service_description Check for fedmsg-hub proc
check_command check_by_nrpe!check_fedmsg_hub_proc
use defaulttemplate
}
define service {
host_name pkgs02.iad2.fedoraproject.org
service_description Check for fedmsg-hub proc
check_command check_by_nrpe!check_fedmsg_hub_proc
use defaulttemplate
}
define service {
host_name fedimg01.iad2.fedoraproject.org
service_description Check for fedmsg-hub proc
check_command check_by_nrpe!check_fedmsg_hub_proc
use defaulttemplate
}
define service {
host_name packages03.iad2.fedoraproject.org
service_description Check for fedmsg-hub proc
check_command check_by_nrpe!check_fedmsg_hub_proc
use defaulttemplate
}
define service {
host_name pdc-backend01.iad2.fedoraproject.org
service_description Check for fedmsg-hub proc
check_command check_by_nrpe!check_fedmsg_hub_proc
use defaulttemplate
}
# Odd one, check for the supybot fedmsg plugin
define service {
host_name value01.iad2.fedoraproject.org
service_description Check supybot fedmsg plugin
check_command check_by_nrpe!check_supybot_fedmsg_plugin
use defaulttemplate
}
# BEGIN, check datanommer history
define service {
host_name busgateway01.iad2.fedoraproject.org
service_description Check datanommer for recent buildsys/koji messages
check_command check_by_nrpe!check_datanommer_buildsys
use defaulttemplate
}
define service {
host_name busgateway01.iad2.fedoraproject.org
service_description Check datanommer for recent git messages
check_command check_by_nrpe!check_datanommer_git
use defaulttemplate
}
define service {
host_name busgateway01.iad2.fedoraproject.org
service_description Check datanommer for recent bodhi compose messages
check_command check_by_nrpe!check_datanommer_bodhi_composes
use defaulttemplate
}
define service {
host_name busgateway01.iad2.fedoraproject.org
service_description Check datanommer for recent bodhi messages
check_command check_by_nrpe!check_datanommer_bodhi
use defaulttemplate
}
define service {
host_name busgateway01.iad2.fedoraproject.org
service_description Check datanommer for recent wiki messages
check_command check_by_nrpe!check_datanommer_wiki
use defaulttemplate
}
define service {
host_name busgateway01.iad2.fedoraproject.org
service_description Check datanommer for recent compose messages
check_command check_by_nrpe!check_datanommer_compose
use defaulttemplate
}
define service {
host_name busgateway01.iad2.fedoraproject.org
service_description Check datanommer for recent meetbot messages
check_command check_by_nrpe!check_datanommer_meetbot
use defaulttemplate
}
define service {
host_name busgateway01.iad2.fedoraproject.org
service_description Check datanommer for recent fas messages
check_command check_by_nrpe!check_datanommer_fas
use defaulttemplate
}
define service {
host_name busgateway01.iad2.fedoraproject.org
service_description Check datanommer for recent fedoraplanet messages
check_command check_by_nrpe!check_datanommer_planet
use defaulttemplate
}
define service {
host_name busgateway01.iad2.fedoraproject.org
service_description Check datanommer for recent copr finished build messages
check_command check_by_nrpe!check_datanommer_copr
use defaulttemplate
}
define service {
host_name busgateway01.iad2.fedoraproject.org
service_description Check datanommer for recent fedbadges messages
check_command check_by_nrpe!check_datanommer_fedbadges
use defaulttemplate
}
define service {
host_name busgateway01.iad2.fedoraproject.org
service_description Check datanommer for recent fedocal messages
check_command check_by_nrpe!check_datanommer_fedocal
use defaulttemplate
}
define service {
host_name busgateway01.iad2.fedoraproject.org
service_description Check datanommer for recent ansible messages
check_command check_by_nrpe!check_datanommer_ansible
use defaulttemplate
}
define service {
host_name busgateway01.iad2.fedoraproject.org
service_description Check datanommer for recent fedimg messages
check_command check_by_nrpe!check_datanommer_fedimg
use defaulttemplate
}
define service {
host_name busgateway01.iad2.fedoraproject.org
service_description Check datanommer for recent hotness messages
check_command check_by_nrpe!check_datanommer_hotness
use defaulttemplate
}
define service {
host_name busgateway01.iad2.fedoraproject.org
service_description Check datanommer for recent faf messages
check_command check_by_nrpe!check_datanommer_faf
use defaulttemplate
}
define service {
host_name busgateway01.iad2.fedoraproject.org
service_description Check datanommer for recent mailman messages
check_command check_by_nrpe!check_datanommer_mailman
use defaulttemplate
}
define service {
host_name busgateway01.iad2.fedoraproject.org
service_description Check datanommer for recent bugzilla messages
check_command check_by_nrpe!check_datanommer_bugzilla
use defaulttemplate
}
define service {
host_name busgateway01.iad2.fedoraproject.org
service_description Check datanommer for recent github messages
check_command check_by_nrpe!check_datanommer_github
use defaulttemplate
}
define service {
host_name busgateway01.iad2.fedoraproject.org
service_description Check datanommer for recent kerneltest messages
check_command check_by_nrpe!check_datanommer_kerneltest
use defaulttemplate
}
define service {
host_name busgateway01.iad2.fedoraproject.org
service_description Check datanommer for recent fmn messages
check_command check_by_nrpe!check_datanommer_fmn
use defaulttemplate
}
define service {
host_name busgateway01.iad2.fedoraproject.org
service_description Check datanommer for recent rpm sign messages
check_command check_by_nrpe!check_datanommer_rpmsign
use defaulttemplate
}
define service {
host_name busgateway01.iad2.fedoraproject.org
service_description Check datanommer for recent mdapi messages
check_command check_by_nrpe!check_datanommer_mdapi
use defaulttemplate
}
define service {
host_name busgateway01.iad2.fedoraproject.org
service_description Check datanommer for recent greenwave messages
check_command check_by_nrpe!check_datanommer_greenwave
use defaulttemplate
}
define service {
host_name busgateway01.iad2.fedoraproject.org
service_description Check datanommer for recent resultsdb messages
check_command check_by_nrpe!check_datanommer_resultsdb
use defaulttemplate
}
# BEGIN, check consumers and producers
define service {
host_name busgateway01.iad2.fedoraproject.org
service_description Check fedmsg consumers and producers hub
check_command check_by_nrpe!check_fedmsg_cp_busgateway_hub
use defaulttemplate
}
define service {
host_name busgateway01.iad2.fedoraproject.org
service_description Check fedmsg consumers and producers relay
check_command check_by_nrpe!check_fedmsg_cp_busgateway_relay
use defaulttemplate
}
define service {
host_name busgateway01.iad2.fedoraproject.org
service_description Check fedmsg consumers and producers gateway
check_command check_by_nrpe!check_fedmsg_cp_busgateway_gateway
use defaulttemplate
}
define service {
host_name value01.iad2.fedoraproject.org
service_description Check fedmsg consumers and producers irc
check_command check_by_nrpe!check_fedmsg_cp_value
use defaulttemplate
}
define service {
host_name badges-backend01.iad2.fedoraproject.org
service_description Check fedmsg consumers and producers hub
check_command check_by_nrpe!check_fedmsg_cp_badges_backend
use defaulttemplate
}
define service {
host_name bugzilla2fedmsg01.iad2.fedoraproject.org
service_description Check fedmsg consumers and producers hub
check_command check_by_nrpe!check_fedmsg_cp_bugzilla2fedmsg
use defaulttemplate
}
define service {
host_name fedimg01.iad2.fedoraproject.org
service_description Check fedmsg consumers and producers hub
check_command check_by_nrpe!check_fedmsg_cp_fedimg_backend
use defaulttemplate
}
define service {
host_name packages03.iad2.fedoraproject.org
service_description Check fedmsg consumers and producers hub
check_command check_by_nrpe!check_fedmsg_cp_packages_backend
use defaulttemplate
}
define service {
host_name pdc-backend01.iad2.fedoraproject.org
service_description Check fedmsg consumers and producers hub
check_command check_by_nrpe!check_fedmsg_cp_pdc_backend
use defaulttemplate
}
define service {
host_name mbs-backend01.iad2.fedoraproject.org
service_description Check fedmsg consumers and producers hub
check_command check_by_nrpe!check_fedmsg_cp_mbs_backend
use defaulttemplate
}
# BEGIN exceptions counter
define service {
host_name busgateway01.iad2.fedoraproject.org
service_description Check fedmsg-hub consumers exceptions
check_command check_by_nrpe!check_fedmsg_cexceptions_busgateway_hub
use defaulttemplate
}
define service {
host_name busgateway01.iad2.fedoraproject.org
service_description Check fedmsg-relay consumers exceptions
check_command check_by_nrpe!check_fedmsg_cexceptions_busgateway_relay
use defaulttemplate
}
define service {
host_name busgateway01.iad2.fedoraproject.org
service_description Check fedmsg-gateway consumers exceptions
check_command check_by_nrpe!check_fedmsg_cexceptions_busgateway_gateway
use defaulttemplate
}
define service {
host_name value01.iad2.fedoraproject.org
service_description Check fedmsg-irc consumers exceptions
check_command check_by_nrpe!check_fedmsg_cexceptions_value
use defaulttemplate
}
define service {
host_name badges-backend01.iad2.fedoraproject.org
service_description Check fedmsg-hub consumers exceptions
check_command check_by_nrpe!check_fedmsg_cexceptions_badges_backend
use defaulttemplate
}
define service {
host_name notifs-backend01.iad2.fedoraproject.org
service_description Check fedmsg-hub consumers exceptions
check_command check_by_nrpe!check_fedmsg_cexceptions_notifs_backend
use defaulttemplate
}
define service {
host_name bugzilla2fedmsg01.iad2.fedoraproject.org
service_description Check fedmsg-hub consumers exceptions
check_command check_by_nrpe!check_fedmsg_cexceptions_bugzilla2fedmsg
use defaulttemplate
}
define service {
host_name fedimg01.iad2.fedoraproject.org
service_description Check fedmsg-hub consumers exceptions
check_command check_by_nrpe!check_fedmsg_cexceptions_fedimg_backend
use defaulttemplate
}
define service {
host_name packages03.iad2.fedoraproject.org
service_description Check fedmsg-hub consumers exceptions
check_command check_by_nrpe!check_fedmsg_cexceptions_packages_backend
use defaulttemplate
}
define service {
host_name pdc-backend01.iad2.fedoraproject.org
service_description Check fedmsg-hub consumers exceptions
check_command check_by_nrpe!check_fedmsg_cexceptions_pdc_backend
use defaulttemplate
}
define service {
host_name mbs-backend01.iad2.fedoraproject.org
service_description Check fedmsg-hub consumers exceptions
check_command check_by_nrpe!check_fedmsg_cexceptions_mbs_backend
use defaulttemplate
}
# BEGIN backlog checking
define service {
host_name busgateway01.iad2.fedoraproject.org
service_description Check fedmsg-hub consumers backlog
check_command check_by_nrpe!check_fedmsg_cbacklog_busgateway_hub
use defaulttemplate
}
define service {
host_name busgateway01.iad2.fedoraproject.org
service_description Check fedmsg-relay consumers backlog
check_command check_by_nrpe!check_fedmsg_cbacklog_busgateway_relay
use defaulttemplate
}
define service {
host_name busgateway01.iad2.fedoraproject.org
service_description Check fedmsg-gateway consumers backlog
check_command check_by_nrpe!check_fedmsg_cbacklog_busgateway_gateway
use defaulttemplate
}
define service {
host_name value01.iad2.fedoraproject.org
service_description Check fedmsg-irc consumers backlog
check_command check_by_nrpe!check_fedmsg_cbacklog_value
use defaulttemplate
}
define service {
host_name badges-backend01.iad2.fedoraproject.org
service_description Check fedmsg-hub consumers backlog
check_command check_by_nrpe!check_fedmsg_cbacklog_badges_backend
use defaulttemplate
}
define service {
host_name notifs-backend01.iad2.fedoraproject.org
service_description Check fedmsg-hub consumers backlog
check_command check_by_nrpe!check_fedmsg_cbacklog_notifs_backend
use defaulttemplate
}
define service {
host_name bugzilla2fedmsg01.iad2.fedoraproject.org
service_description Check fedmsg-hub consumers backlog
check_command check_by_nrpe!check_fedmsg_cbacklog_bugzilla2fedmsg
use defaulttemplate
}
define service {
host_name fedimg01.iad2.fedoraproject.org
service_description Check fedmsg-hub consumers backlog
check_command check_by_nrpe!check_fedmsg_cbacklog_fedimg_backend
use defaulttemplate
}
define service {
host_name packages03.iad2.fedoraproject.org
service_description Check fedmsg-hub consumers backlog
check_command check_by_nrpe!check_fedmsg_cbacklog_packages_backend
use defaulttemplate
}
define service {
host_name pdc-backend01.iad2.fedoraproject.org
service_description Check fedmsg-hub consumers backlog
check_command check_by_nrpe!check_fedmsg_cbacklog_pdc_backend
use defaulttemplate
}
define service {
host_name mbs-backend01.iad2.fedoraproject.org
service_description Check fedmsg-hub consumers backlog
check_command check_by_nrpe!check_fedmsg_cbacklog_mbs_backend
use defaulttemplate
}

View file

@ -0,0 +1,45 @@
define service {
hostgroup_name proxies
service_description Check MirrorList 1 Cache
check_command check_by_nrpe!check_mirrorlist1_cache
use defaulttemplate
check_interval 120
notification_interval 130
}
define service {
hostgroup_name proxies
service_description Check MirrorList 2 Cache
check_command check_by_nrpe!check_mirrorlist2_cache
use defaulttemplate
check_interval 120
notification_interval 130
}
define service {
hostgroup_name proxies
service_description Check TicketKey age
check_command check_by_nrpe!check_ticketkey_age
use defaulttemplate
check_interval 120
notification_interval 130
}
define service {
hostgroup_name proxies
service_description Check ostree summary age
check_command check_by_nrpe!check_ostree_summary_file_age
use defaulttemplate
check_interval 120
notification_interval 130
}
define service {
host_name log01.iad2.fedoraproject.org
service_description Check Merged Log
check_command check_by_nrpe!check_merged_file_age
use defaulttemplate
check_interval 120
notification_interval 130
event_handler restart_rsyslog
}

View file

@ -0,0 +1,20 @@
define service {
host_name notifs-backend01.iad2.fedoraproject.org
service_description Check backend irc queue size
check_command check_by_nrpe!check_fmn_backend_irc_queue
use defaulttemplate
}
define service {
host_name notifs-backend01.iad2.fedoraproject.org
service_description Check backend email queue size
check_command check_by_nrpe!check_fmn_backend_email_queue
use defaulttemplate
}
define service {
host_name notifs-backend01.iad2.fedoraproject.org
service_description Check worker queue size
check_command check_by_nrpe!check_fmn_worker_queue
use defaulttemplate
}

View file

@ -0,0 +1,16 @@
define service {
host_name koji01.iad2.fedoraproject.org
service_description Check Koji
check_command check_koji
max_check_attempts 5
use criticaltemplate
}
define service {
host_name koji01.iad2.fedoraproject.org
service_description Check Koji wellness
check_command check_koji_wellness.py!koji.fedoraproject.org!koji
max_check_attempts 5
use criticaltemplate
}

View file

@ -0,0 +1,13 @@
define service {
host_name rawhide-composer.iad2.fedoraproject.org, koji01.phx2.fedoraproject.org
service_description Check NFS File Locks
check_command check_by_nrpe!check_lock
use criticaltemplate
}
define service {
host_name fas01.iad2.fedoraproject.org
service_description Check certificate lock
check_command check_by_nrpe!check_lock_file_age
use defaulttemplate
}

View file

@ -0,0 +1,7 @@
define service {
host_name mailman01.iad2.fedoraproject.org
service_description check mailman api
check_command check_by_nrpe!check_mailman_api
max_check_attempts 5
use defaulttemplate
}

View file

@ -0,0 +1,8 @@
define service {
host_name bastion02.iad2.fedoraproject.org, bastion01.phx2.fedoraproject.org, sundries01.phx2.fedoraproject.org, sundries02.phx2.fedoraproject.org, wiki01.phx2.fedoraproject.org, wiki02.phx2.fedoraproject.org
service_description nrpe
check_command test_nrpe
max_check_attempts 2
check_interval 2
use defaulttemplate
}

View file

@ -0,0 +1,7 @@
define service {
host_name osbs-master01.iad2.fedoraproject.org
service_description Check OSBS API endpoint paths
check_command check_by_nrpe!check_osbs_api
max_check_attempts 5
use defaulttemplate
}

View file

@ -0,0 +1,14 @@
define service {
host_name db-koji01.iad2.fedoraproject.org
service_description Check Koji DB
check_command check_pgsql!koji
use criticaltemplate
}
define service {
host_name db-fas01.iad2.fedoraproject.org
service_description Check FAS DB
check_command check_pgsql!fas2
use criticaltemplate
servicegroups fas
}

View file

@ -0,0 +1,96 @@
# RabbitMQ processes (for each host)
define service {
host_name rabbitmq01.iad2.fedoraproject.org
service_description Check bus server processes
check_command check_by_nrpe!check_rabbitmq_server
use defaulttemplate
}
define service {
host_name rabbitmq02.iad2.fedoraproject.org
service_description Check bus server processes
check_command check_by_nrpe!check_rabbitmq_server
use defaulttemplate
}
define service {
host_name rabbitmq03.iad2.fedoraproject.org
service_description Check bus server processes
check_command check_by_nrpe!check_rabbitmq_server
use defaulttemplate
}
# RabbitMQ alarms (for each host)
define service {
host_name rabbitmq01.iad2.fedoraproject.org
service_description Check bus server alarms
check_command check_by_nrpe!check_rabbitmq_watermark
use defaulttemplate
}
define service {
host_name rabbitmq02.iad2.fedoraproject.org
service_description Check bus server alarms
check_command check_by_nrpe!check_rabbitmq_watermark
use defaulttemplate
}
define service {
host_name rabbitmq03.iad2.fedoraproject.org
service_description Check bus server alarms
check_command check_by_nrpe!check_rabbitmq_watermark
use defaulttemplate
}
# The following results are cluster-wide, no need to run them on each cluster member
define service {
host_name rabbitmq01.iad2.fedoraproject.org
service_description Check bus cluster
check_command check_by_nrpe!check_rabbitmq_cluster
use defaulttemplate
}
define service {
host_name rabbitmq01.iad2.fedoraproject.org
service_description Check bus cluster connections
check_command check_by_nrpe!check_rabbitmq_connections
use defaulttemplate
}
define service {
host_name rabbitmq01.iad2.fedoraproject.org
service_description Check bus cluster overview
check_command check_by_nrpe!check_rabbitmq_overview
use defaulttemplate
}
define service {
host_name rabbitmq01.iad2.fedoraproject.org
service_description Check bus exchanges in /pubsub
check_command check_by_nrpe!check_rabbitmq_exchange_pubsub
use defaulttemplate
}
define service {
host_name rabbitmq01.iad2.fedoraproject.org
service_description Check bus exchanges in /public_pubsub
check_command check_by_nrpe!check_rabbitmq_exchange_public_pubsub
use defaulttemplate
}
define service {
host_name rabbitmq01.iad2.fedoraproject.org
service_description Check bus queues in /pubsub
check_command check_by_nrpe!check_rabbitmq_queue_pubsub
use defaulttemplate
}
define service {
host_name rabbitmq01.iad2.fedoraproject.org
service_description Check bus queues in /public_pubsub
check_command check_by_nrpe!check_rabbitmq_queue_public_pubsub
use defaulttemplate
}