set up things so nagios in iad2 is mostly ready.

This commit is contained in:
Stephen Smoogen 2020-05-21 19:20:38 -04:00
parent 419140e4c5
commit 192637532c
32 changed files with 117 additions and 156 deletions

View file

@ -24,4 +24,4 @@ custom_rules: [
]
nagios_srcdir: 'nagios'
nagios_location: 'internal'
nagios_location: 'iad2_internal'

View file

@ -33,4 +33,4 @@ csi_relationship: |
- if this host is down, dhcp/bootp leases/renew will fail. pxe booting will fail as well
nagios_srcdir: 'nagios'
nagios_location: 'internal'
nagios_location: 'phx2_internal'

View file

@ -31,25 +31,6 @@ define command{
command_line $USER1$/check_dummy $ARG1$ $ARG2$
}
# 'check_tape'
define command{
command_name check_tape
command_line $USER1$/check_tape
}
# 'check_ftp' command definition
define command{
command_name check_ftp
command_line $USER1$/check_ftp -H $HOSTADDRESS$
}
# 'check_hpjd' command definition
define command{
command_name check_hpjd
command_line $USER1$/check_hpjd -H $HOSTADDRESS$ -C public
}
# 'check_snmp' command definition
define command{
command_name check_snmp
@ -76,21 +57,3 @@ define command{
command_line $USER1$/check_dhcp $ARG1$
}
# 'check_pop' command definition
define command{
command_name check_pop
command_line $USER1$/check_pop -H $HOSTADDRESS$
}
# 'check_imap' command definition
define command{
command_name check_imap
command_line $USER1$/check_imap -H $HOSTADDRESS$ $ARG1$
}
# 'check_nt' command definition
define command{
command_name check_nt
command_line $USER1$/check_nt -H $HOSTADDRESS$ -p 12489 -v $ARG1$ $ARG2$
}

View file

@ -1,17 +0,0 @@
#!/bin/bash
CODE=$(snmpwalk -v 1 -c public tape01.phx2.fedoraproject.org 1.3.6.1.4.1.674.10893.2.102.2.1 | awk '{print $4}')
WARNING=4
if [ $CODE -gt $WARNING ]
then
echo "Tape: CRITICAL global status: $CODE"
exit 2
elif [ $CODE -eq $WARNING ]
then
echo "Tape: WARNING global status: $CODE"
exit 1
else
echo "Tape: OK global status: $CODE"
exit 0
fi

View file

@ -103,17 +103,8 @@
tags:
- nagios_server
## Copy over system configs
- name: Copy specialized nrpe.cfg for nagios server
copy: src=nrpe/nrpe.cfg dest=/etc/nagios/nrpe.cfg mode=0644 group=root owner=root
notify:
- restart nrpe
tags:
- config
- nagios_server
## Copy over the nagios configs
- name: Copy /etc/nagios/conf.d/*cfg files
- name: Copy /etc/nagios/conf.d/cfg files
copy: src=nagios/configs/{{ item }} dest=/etc/nagios/conf.d/{{ item }}
with_items:
- escalations.cfg
@ -182,31 +173,42 @@
notify: restart nagios
## Copy over the services
- name: Copy /etc/nagios/services (PHX2)
copy: src=nagios/services/{{ item }} dest=/etc/nagios/services/{{ item }}
- name: Copy /etc/nagios/services (PHX2 specific files)
copy: src=nagios/services/phx2/{{ item }} dest=/etc/nagios/services/{{ item }}
with_items:
- basset.cfg
- certgetter.cfg
- db_backups.cfg
- disk.cfg
- dns.cfg
- fedmsg.cfg
- file_age.cfg
- fmn.cfg
- haproxy.cfg
- koji.cfg
- koschei.cfg
- locking.cfg
- mail_queue.cfg
- mailman.cfg
- memcached.cfg
- nagios.cfg
- nrpe.cfg
- osbs.cfg
- pgsql.cfg
- rabbitmq.cfg
tags:
- nagios_config
- nagios_server
- nagios_hostgroups
when: env == "production" and nagios_location == 'phx2_internal'
notify: restart nagios
- name: Copy /etc/nagios/services (internal files)
copy: src=nagios/services/{{ item }} dest=/etc/nagios/services/{{ item }}
with_items:
- dns.cfg
- haproxy.cfg
- mail_queue.cfg
- memcached.cfg
- nagios.cfg
- pagure_redis.cfg
- ping.cfg
- pgsql.cfg
- procs.cfg
- rabbitmq.cfg
- raid.cfg
- smtp-mm.cfg
- ssh.cfg
@ -215,12 +217,11 @@
- templates.cfg
- unbound.cfg
- vpnclients.cfg
- certgetter.cfg
tags:
- nagios_config
- nagios_server
- nagios_hostgroups
when: env == "production" and nagios_location == "internal"
when: env == "production" and nagios_location == 'phx2_internal'
notify: restart nagios
## Copy over the services
@ -236,26 +237,28 @@
when: env == "production" and nagios_location == "external"
notify: restart nagios
## Copy over the services
- name: Copy /etc/nagios/services (staging)
copy: src=nagios/services/{{ item }} dest=/etc/nagios/services/{{ item }}
with_items:
- mail_queue.cfg
- ping.cfg
- templates.cfg
tags:
- nagios_config
- nagios_server
when: env == "staging"
notify: restart nagios
## Copy over the servicegroups
- name: Copy /etc/nagios/servicegroups
synchronize: src=nagios/servicegroups/ dest=/etc/nagios/servicegroups/
tags:
- nagios_config
- nagios_server
when: nagios_location == "internal"
when: nagios_location == 'phx2_internal'
notify: restart nagios
## Copy over the servicegroups
- name: Copy /etc/nagios/servicegroups
copy: src=nagios/servicegroups/{{item}} dest=/etc/nagios/servicegroups/{{item}}
with_items:
- bodhi.cfg
- fas.cfg
- fp-wiki.cfg
- freemedia.cfg
- mirrorlist.cfg
tags:
- nagios_config
- nagios_server
when: nagios_location == "external"
notify: restart nagios
## Copy over the plugins
@ -283,6 +286,18 @@
## Build template files
# This one may go to being just a regular config file if we can make remote monitoring work
## Copy over system configs
- name: Copy specialized nrpe.cfg for nagios server
template: src=nrpe/nrpe.cfg.j2 dest=/etc/nagios/nrpe.cfg
notify:
- restart nrpe
tags:
- config
- nagios_server
- name: Configure perms on nrpe correctly
file: dest=/etc/nrpe/nrpe.cfg mode=0644 group=root owner=root
- name: Template out the nagios httpd conf
template: src=httpd/{{item}}.j2 dest=/etc/httpd/conf.d/{{item}}
with_items:
@ -328,7 +343,7 @@
- ipa.cfg
- phx2-mgmt.cfg
- mirrorlist-proxies.cfg
when: env == "production" and nagios_location == "internal"
when: env == "production" and nagios_location == 'phx2_internal'
tags:
- nagios_server
- nagios_config
@ -345,8 +360,6 @@
- websites
notify: restart nagios
- name: Build out nagios host templates (production)
template: src=nagios/hosts/{{item}}.j2 dest=/etc/nagios/hosts/{{item}} mode=0644 owner=root group=root
with_items:
@ -375,7 +388,7 @@
- phx2-hosts.cfg
- phx2-mgmt-hosts.cfg
- staging-hosts.cfg
when: env == "production" and nagios_location == "internal"
when: env == "production" and nagios_location == 'phx2_internal'
tags:
- nagios_server
- nagios_config
@ -392,19 +405,6 @@
- nagios_config
notify: restart nagios
- name: Build out nagios host templates (staging)
template: src=nagios/hosts/{{item}}.j2 dest=/etc/nagios/hosts/{{item}} mode=0644 owner=root group=root
with_items:
- phx2-hosts.cfg
- staging-hosts.cfg
- phx2-mgmt-hosts.cfg
when: env == "staging"
tags:
- nagios_server
- nagios_config
- nagios_hosts
notify: restart nagios
- name: Build out nagios hostgroup templates
template: src=nagios/hostgroups/{{item}}.j2 dest=/etc/nagios/hostgroups/{{item}} mode=0644 owner=root group=root
with_items:
@ -413,7 +413,7 @@
- nomail.cfg
- checkswap.cfg
- checkraid.cfg
when: nagios_location == "internal"
when: nagios_location == 'phx2_internal'
tags:
- nagios_server
- nagios_config
@ -435,7 +435,7 @@
template: src=nagios/servicegroups/{{item}}.j2 dest=/etc/nagios/servicegroups/{{item}} mode=0644 owner=root group=root
with_items:
- mgmt-http.cfg
when: nagios_location == "internal"
when: nagios_location == 'phx2_internal'
tags:
- nagios_server
- nagios_config

View file

@ -8,7 +8,7 @@ Alias /robots.txt /var/www/robots.txt
<Location /nagios>
AuthName "Nagios GSSAPI Login"
{% if vars['nagios_location'] == 'internal' %}
{% if vars['nagios_location'] == 'phx2_internal' %}
GssapiCredStore keytab:/etc/krb5.HTTP_nagios{{env_suffix}}.fedoraproject.org.keytab
# This is off because Apache (and thus mod_auth_gssapi) doesn't know this is proxied over TLS
GssapiSSLonly Off

View file

@ -1,7 +1,7 @@
{% for host in groups['all']|sort %}
{% if hostvars[host].datacenter == 'bodhost' and hostvars[host].nagios_Can_Connect == true %}
define host {
{% if vars['nagios_location'] == 'internal' %}
{% if vars['nagios_location'] == 'phx2_internal' %}
use defaulttemplate
{% else %}
use mincheck

View file

@ -1,7 +1,7 @@
{% for host in groups['all']|sort %}
{% if hostvars[host].datacenter == 'dedicatedsolutions' and hostvars[host].nagios_Can_Connect == true %}
define host {
{% if vars['nagios_location'] == 'internal' %}
{% if vars['nagios_location'] == 'phx2_internal' %}
use defaulttemplate
{% else %}
use mincheck

View file

@ -7,7 +7,7 @@ define host {
alias PHX2 Router
use mincheck
check_command check-host-alive4
{% if vars['nagios_location'] == 'internal' %}
{% if vars['nagios_location'] == 'phx2_internal' %}
address 10.5.126.254
{% else %}
address 66.187.228.248
@ -25,8 +25,10 @@ define host {
use mincheck
check_command check-host-alive4
address 152.19.134.129
{% if vars['nagios_location'] == 'internal' %}
{% if vars['nagios_location'] == 'phx2_internal' %}
parents phx2-gw
{% elif vars['nagios_location'] == 'iad2_internal' %}
parents iad2-gw
{% endif %}
}
@ -39,8 +41,10 @@ define host {
use mincheck
check_command check-host-alive4
address 67.219.144.65
{% if vars['nagios_location'] == 'internal' %}
{% if vars['nagios_location'] == 'phx2_internal' %}
parents phx2-gw
{% elif vars['nagios_location'] == 'iad2_internal' %}
parents iad2-gw
{% else %}
parents ibiblio-gw
{% endif %}
@ -54,7 +58,7 @@ define host {
use mincheck
check_command check-host-alive4
address 185.141.164.1
{% if vars['nagios_location'] == 'internal' %}
{% if vars['nagios_location'] == 'phx2_internal' %}
parents phx2-gw
{% else %}
parents ibiblio-gw
@ -69,8 +73,10 @@ define host {
use mincheck
check_command check-host-alive4
address 85.236.55.1
{% if vars['nagios_location'] == 'internal' %}
{% if vars['nagios_location'] == 'phx2_internal' %}
parents phx2-gw
{% elif vars['nagios_location'] == 'iad2_internal' %}
parents iad2-gw
{% else %}
parents ibiblio-gw
{% endif %}
@ -84,8 +90,10 @@ define host {
use mincheck
check_command check-host-alive4
address 140.211.169.193
{% if vars['nagios_location'] == 'internal' %}
{% if vars['nagios_location'] == 'phx2_internal' %}
parents phx2-gw
{% elif vars['nagios_location'] == 'iad2_internal' %}
parents iad2-gw
{% else %}
parents ibiblio-gw
{% endif %}
@ -99,8 +107,10 @@ define host {
use mincheck
check_command check-host-alive4
address 209.132.190.196
{% if vars['nagios_location'] == 'internal' %}
{% if vars['nagios_location'] == 'phx2_internal' %}
parents phx2-gw
{% elif vars['nagios_location'] == 'iad2_internal' %}
parents iad2-gw
{% else %}
parents ibiblio-gw
{% endif %}
@ -114,8 +124,10 @@ define host {
use mincheck
check_command check-host-alive4
address 8.43.85.254
{% if vars['nagios_location'] == 'internal' %}
{% if vars['nagios_location'] == 'phx2_internal' %}
parents phx2-gw
{% elif vars['nagios_location'] == 'iad2_internal' %}
parents iad2-gw
{% else %}
parents ibiblio-gw
{% endif %}
@ -129,8 +141,10 @@ define host {
use mincheck
check_command check-host-alive4
address 209.132.185.254
{% if vars['nagios_location'] == 'internal' %}
{% if vars['nagios_location'] == 'phx2_internal' %}
parents phx2-gw
{% elif vars['nagios_location'] == 'iad2_internal' %}
parents iad2-gw
{% else %}
parents ibiblio-gw
{% endif %}

View file

@ -1,7 +1,7 @@
{% for host in groups['all']|sort %}
{% if hostvars[host].datacenter == 'host1plus' and hostvars[host].nagios_Can_Connect == true %}
define host {
{% if vars['nagios_location'] == 'internal' %}
{% if vars['nagios_location'] == 'phx2_internal' %}
use defaulttemplate
{% else %}
use mincheck

View file

@ -1,7 +1,7 @@
{% for host in groups['all']|sort %}
{% if hostvars[host].datacenter == 'ibiblio' and hostvars[host].nagios_Can_Connect == true %}
define host {
{% if vars['nagios_location'] == 'internal' %}
{% if vars['nagios_location'] == 'phx2_internal' %}
use defaulttemplate
{% else %}
use mincheck

View file

@ -1,7 +1,7 @@
{% for host in groups['all']|sort %}
{% if hostvars[host].datacenter == 'internetx' and hostvars[host].nagios_Can_Connect == true %}
define host {
{% if vars['nagios_location'] == 'internal' %}
{% if vars['nagios_location'] == 'phx2_internal' %}
use defaulttemplate
{% else %}
use mincheck

View file

@ -1,7 +1,7 @@
{% for host in groups['all']|sort %}
{% if hostvars[host].datacenter == 'osuosl' and hostvars[host].nagios_Can_Connect == true %}
define host {
{% if vars['nagios_location'] == 'internal' %}
{% if vars['nagios_location'] == 'phx2_internal' %}
use defaulttemplate
{% else %}
use mincheck

View file

@ -3,8 +3,10 @@ define host {
alias status.fedoraproject.org
use mincheck
address d6tcqd4og8l21.cloudfront.net
{% if vars['nagios_location'] == 'internal' %}
{% if vars['nagios_location'] == 'phx2_internal' %}
parents phx2-gw
{% elif vars['nagios_location'] == 'iad2_internal' %}
parents iad2-gw
{% else %}
parents ibiblio-gw
{% endif %}
@ -15,8 +17,10 @@ define host {
alias cdn.registry.fedoraproject.org
use mincheck
address cdn.registry.fedoraproject.org
{% if vars['nagios_location'] == 'internal' %}
{% if vars['nagios_location'] == 'phx2_internal' %}
parents phx2-gw
{% elif vars['nagios_location'] == 'iad2_internal' %}
parents iad2-gw
{% else %}
parents ibiblio-gw
{% endif %}
@ -27,8 +31,10 @@ define host {
alias lists.fedoraproject.org
use mincheck
address lists.fedoraproject.org
{% if vars['nagios_location'] == 'internal' %}
{% if vars['nagios_location'] == 'phx2_internal' %}
parents phx2-gw
{% elif vars['nagios_location'] == 'iad2_internal' %}
parents iad2-gw
{% else %}
parents ibiblio-gw
{% endif %}

View file

@ -1,7 +1,7 @@
{% for host in groups['all']|sort %}
{% if hostvars[host].datacenter == 'rdu-cc' and hostvars[host].nagios_Can_Connect == true %}
define host {
{% if vars['nagios_location'] == 'internal' %}
{% if vars['nagios_location'] == 'phx2_internal' or vars['nagios_location'] == 'iad2_internal' %}
use defaulttemplate
{% else %}
use mincheck

View file

@ -1,7 +1,7 @@
{% for host in groups['all']|sort %}
{% if hostvars[host].datacenter == 'rdu' and hostvars[host].nagios_Can_Connect == true %}
define host {
{% if vars['nagios_location'] == 'internal' %}
{% if vars['nagios_location'] == 'phx2_internal' %}
use defaulttemplate
{% else %}
use mincheck

View file

@ -144,20 +144,6 @@ define service {
use websitetemplate
}
#define service {
# host_name magazine2.fedorainfracloud.org
# service_description http-magazine
# use websitetemplate
# check_command check_website!magazine2.fedorainfracloud.org!/!Fedora Magazine
#}
#define service {
# host_name communityblog.fedorainfracloud.org
# service_description http-communityblog
# use websitetemplate
# check_command check_website!communityblog.fedorainfracloud.org!/!Fedora Community Blog
#}
define service {
host_name lists.fedoraproject.org
service_description http-lists.fedoraproject.org
@ -165,7 +151,7 @@ define service {
use websitetemplate
}
{% if vars['nagios_location'] == 'internal' %}
{% if vars['nagios_location'] == 'phx2_internal' %}
##
## Other Frontend Websites
@ -235,13 +221,6 @@ define service {
use internalwebsitetemplate
}
define service {
host_name mailman01.phx2.fedoraproject.org
service_description http-mailman01.phx2.fedoraproject.org-internal
check_command check_website!mailman01.phx2.fedoraproject.org!/archives/!Fedora Mailing-Lists
use websitetemplate
}
define service {
hostgroup_name mm_frontend
service_description http-mm-publiclist-internal
@ -326,11 +305,23 @@ define service {
## Auxillary to websites but necessary to make them happen
define service {
host_name sundries01.phx2.fedoraproject.org
hostgroup_name sundries
service_description websites build happened recently
check_command check_by_nrpe!check_websites_buildtime
use websitetemplate
}
{% endif %}
{% if vars['nagios_location'] == 'phx2_internal' %}
define service {
host_name mailman01.phx2.fedoraproject.org
service_description http-mailman-internal
check_command check_website!mailman01.phx2.fedoraproject.org!/archives/!Fedora Mailing-Lists
use websitetemplate
}
{% endif %}

View file

@ -310,8 +310,6 @@ command[check_temp]=/usr/lib64/nagios/plugins/check_ipmi -t
command[check_fans]=/usr/lib64/nagios/plugins/check_ipmi -f
command[check_mirrorlist_cache]=/usr/lib64/nagios/plugins/check_file_age -w 86400 -c 129600 -f /var/lib/mirrormanager/mirrorlist_cache.proto
command[check_mysql_backup]=/usr/lib64/nagios/plugins/check_file_age -w 86400 -c 129600 -f /backups/fpo-mediawiki.xz
command[check_pgsql_backup]=/usr/lib64/nagios/plugins/check_file_age -w 86400 -c 129600 -f /backups/db-fas01.phx2.fedoraproject.org/fas2.db
command[check_puppetmaster]=/usr/lib64/nagios/plugins/check_procs -c 8:8 -a '/usr/bin/ruby /usr/sbin/puppetmasterd' -u puppet
command[check_supervisor]=/usr/lib64/nagios/plugins/check_procs -c 1:1 -a '/usr/bin/python /usr/bin/supervisord' -u root
command[check_lock]=/usr/lib64/nagios/plugins/check_lock
command[check_lock_file_age]=/usr/lib64/nagios/plugins/check_lock_file_age -w 1 -c 5 -f /var/lock/fedora-ca/lock
@ -446,3 +444,9 @@ command[service_rsyslog_restart]=/usr/bin/sudo /sbin/service rsyslog restart
#command[check_load]=/usr/lib64/nagios/plugins/check_load -w $ARG1$ -c $ARG2$
#command[check_disk]=/usr/lib64/nagios/plugins/check_disk -w $ARG1$ -c $ARG2$ -p $ARG3$
#command[check_procs]=/usr/lib64/nagios/plugins/check_procs -w $ARG1$ -c $ARG2$ -s $ARG3$
{% if vars['nagios_location'] == 'phx2_internal' %}
command[check_pgsql_backup]=/usr/lib64/nagios/plugins/check_file_age -w 86400 -c 129600 -f /backups/db-fas01.phx2.fedoraproject.org/fas2.db
{% elif vars['nagios_location'] == 'iad2_internal' %}
command[check_pgsql_backup]=/usr/lib64/nagios/plugins/check_file_age -w 86400 -c 129600 -f /backups/db-fas01.iad2.fedoraproject.org/fas2.db
{% endif %}