nagios_server: adjust a bunch more things for iad2.

Signed-off-by: Kevin Fenzi <kevin@scrye.com>
This commit is contained in:
Kevin Fenzi 2020-06-30 15:39:32 -07:00
parent 5a7245bf26
commit 632d4a0273
14 changed files with 99 additions and 205 deletions

View file

@ -30,134 +30,86 @@ csi_primary_contact: Fedora Admins - admin@fedoraproject.org
csi_purpose: Monitoring system
#
# This is a list of hosts which are in the PHX2 130 mgmt network
# This is a list of hosts which are in the IAD2 160 mgmt network
# we do not have them in ansible because it tries to connect
# and they don't do ansible
#
phx2_management_hosts:
- backup01.mgmt.fedoraproject.org
- bkernel03.mgmt.fedoraproject.org
- bvirthost01.mgmt.fedoraproject.org
- bvirthost04.mgmt.fedoraproject.org
- bvirthost05.mgmt.fedoraproject.org
- data-analysis01.mgmt.fedoraproject.org
- dell-fx01-01.mgmt.fedoraproject.org
- dell-fx01-02.mgmt.fedoraproject.org
- dell-fx01-03.mgmt.fedoraproject.org
- dell-fx01-04.mgmt.fedoraproject.org
- dell-fx01-05.mgmt.fedoraproject.org
- dell-fx01-06.mgmt.fedoraproject.org
- dell-fx01-07.mgmt.fedoraproject.org
- dell-fx01-08.mgmt.fedoraproject.org
- dell-fx01.mgmt.fedoraproject.org
- dell-fx02-01.mgmt.fedoraproject.org
- dell-fx02-02.mgmt.fedoraproject.org
- dell-fx02-03.mgmt.fedoraproject.org
- dell-fx02-04.mgmt.fedoraproject.org
- dell-fx02-05.mgmt.fedoraproject.org
- dell-fx02-06.mgmt.fedoraproject.org
- dell-fx02-07.mgmt.fedoraproject.org
- dell-fx02-08.mgmt.fedoraproject.org
- dell-fx02.mgmt.fedoraproject.org
- qa09.mgmt.fedoraproject.org
- qa14.mgmt.fedoraproject.org
- retrace01.mgmt.fedoraproject.org
- retrace02.mgmt.fedoraproject.org
- sign-vault05.mgmt.fedoraproject.org
- virthost-comm03.mgmt.fedoraproject.org
- virthost-comm04.mgmt.fedoraproject.org
- virthost01-stg.mgmt.fedoraproject.org
- virthost02.mgmt.fedoraproject.org
- virthost03.mgmt.fedoraproject.org
- bvirthost01-stg.mgmt.fedoraproject.org
- virthost02-stg.mgmt.fedoraproject.org
- virthost06.mgmt.fedoraproject.org
- virthost03-stg.mgmt.fedoraproject.org
- virthost15.mgmt.fedoraproject.org
- virthost05-stg.mgmt.fedoraproject.org
- virthost17.mgmt.fedoraproject.org
- virthost18.mgmt.fedoraproject.org
- virthost19.mgmt.fedoraproject.org
- virthost04-stg.mgmt.fedoraproject.org
- virthost21.mgmt.fedoraproject.org
- virthost22.mgmt.fedoraproject.org
iad2_management_hosts:
- backup01.mgmt.iad2.fedoraproject.org
- bkernel01.mgmt.iad2.fedoraproject.org
- bvmhost-x86-01.mgmt.iad2.fedoraproject.org
- bvmhost-x86-02.mgmt.iad2.fedoraproject.org
- bvmhost-x86-03.mgmt.iad2.fedoraproject.org
- bvmhost-x86-04.mgmt.iad2.fedoraproject.org
- bvmhost-x86-05.mgmt.iad2.fedoraproject.org
- bvmhost-x86-06.mgmt.iad2.fedoraproject.org
- bvmhost-x86-07.mgmt.iad2.fedoraproject.org
- dell-fx01-fc01.mgmt.iad2.fedoraproject.org
- dell-fx01-fc02.mgmt.iad2.fedoraproject.org
- dell-fx01-fc03.mgmt.iad2.fedoraproject.org
- dell-fx01-fc04.mgmt.iad2.fedoraproject.org
- dell-fx01-fc05.mgmt.iad2.fedoraproject.org
- dell-fx01-fc06.mgmt.iad2.fedoraproject.org
- dell-fx01-fc07.mgmt.iad2.fedoraproject.org
- dell-fx01-fc08.mgmt.iad2.fedoraproject.org
- dell-fx01.mgmt.iad2.fedoraproject.org
- dell-fx02-fc01.mgmt.iad2.fedoraproject.org
- dell-fx02-fc02.mgmt.iad2.fedoraproject.org
- dell-fx02-fc03.mgmt.iad2.fedoraproject.org
- dell-fx02-fc04.mgmt.iad2.fedoraproject.org
- dell-fx02-fc05.mgmt.iad2.fedoraproject.org
- dell-fx02-fc06.mgmt.iad2.fedoraproject.org
- dell-fx02-fc07.mgmt.iad2.fedoraproject.org
- dell-fx02-fc08.mgmt.iad2.fedoraproject.org
- dell-fx02.mgmt.iad2.fedoraproject.org
- openqa-x86-worker01.mgmt.iad2.fedoraproject.org
- qvmhost-02.mgmt.iad2.fedoraproject.org
- vmhost-x86-01.mgmt.iad2.fedoraproject.org
- vmhost-x86-02.mgmt.iad2.fedoraproject.org
- vmhost-x86-03.mgmt.iad2.fedoraproject.org
- vmhost-x86-04.mgmt.iad2.fedoraproject.org
- vmhost-x86-05.mgmt.iad2.fedoraproject.org
- vmhost-x86-06.mgmt.iad2.fedoraproject.org
- vmhost-x86-07.mgmt.iad2.fedoraproject.org
#
# These are management interfaces we only want
# to test ping against. No http/https
#
phx2_management_limited:
- moonshot01-ilo.mgmt.fedoraproject.org
- moonshot01-sw1.mgmt.fedoraproject.org
- moonshot01-sw2.mgmt.fedoraproject.org
- opengear01.mgmt.fedoraproject.org
- qa05.mgmt.fedoraproject.org
- qa07.mgmt.fedoraproject.org
- sign-vault03.mgmt.fedoraproject.org
- sign-vault04.mgmt.fedoraproject.org
iad2_management_limited:
- opengear01.mgmt.iad2.fedoraproject.org
- opengear02.mgmt.iad2.fedoraproject.org
- sign-vault01.mgmt.iad2.fedoraproject.org
phx2_management_slowping:
- ppc8-01-fsp.mgmt.fedoraproject.org
- ppc8-02-fsp.mgmt.fedoraproject.org
- ppc8-03-fsp.mgmt.fedoraproject.org
#iad2_management_slowping:
# - ppc8-01-fsp.mgmt.fedoraproject.org
# - ppc8-02-fsp.mgmt.fedoraproject.org
# - ppc8-03-fsp.mgmt.fedoraproject.org
phx2_external:
iad2_external:
- bastion-comm01.fedoraproject.org
- bastion01.fedoraproject.org
- bastion02.fedoraproject.org
- koji.fedoraproject.org
- kojipkgs.fedoraproject.org
- ns04.fedoraproject.org
- pkgs.fedoraproject.org
- proxy01.fedoraproject.org
- proxy10.fedoraproject.org
- puppet.fedoraproject.org
- retrace01.fedoraproject.org
- secondary01.fedoraproject.org
iad2_external:
- proxy-iad01.fedoraproject.org
- proxy-iad02.fedoraproject.org
- dl-iad01.fedoraproject.org
- dl-iad02.fedoraproject.org
- dl-iad03.fedoraproject.org
- dl-iad04.fedoraproject.org
- dl-iad05.fedoraproject.org
- infrastructure.fedoraproject.org
- koji.fedoraproject.org
- kojipkgs.fedoraproject.org
- ns-iad01.fedoraproject.org
- ns-iad02.fedoraproject.org
- bastion-iad01.fedoraproject.org
- pkgs.fedoraproject.org
- proxy01.fedoraproject.org
- proxy10.fedoraproject.org
- retrace01.fedoraproject.org
- secondary01.fedoraproject.org
# When you have a group which comes up with empty members in all.cfg, it
# is because it contains all hosts which aren't pinganble. You may want
# to add that group to this list. Other items on this list are ones
# where it is an enormous group not needed.
# Exclude these ansible host groups in hostgroups/all.cfg
exclude_phx2_hostgroups:
- nixnagios
- dbgserver_stg
- virt_guest
- virt_host
- iad2
- download_iad2
- openqa_iad
- download_tier1
- datacenter_iad2
- sign_vault
- bugzilla2fedmsg
- github2fedmsg
- moksha_hubs
- rabbitmq
- mm
- mm_backend
- mm_frontend
- notifs_backend
- notifs_web
exclude_iad2_hostgroups:
- nixnagios
- dbgserver_stg
- virt_guest
- virt_host
- iad2
- openqa_iad
- sign_vault

View file

@ -416,8 +416,8 @@
- name: Build out nagios host templates (production)
template: src=nagios/hosts/{{item}}.j2 dest=/etc/nagios/hosts/{{item}} mode=0644 owner=root group=root
with_items:
- phx2-hosts.cfg
- phx2-mgmt-hosts.cfg
- iad2-hosts.cfg
- iad2-mgmt-hosts.cfg
- staging-hosts.cfg
when: env == "production" and nagios_location == 'iad2_internal'
tags:
@ -441,7 +441,6 @@
- name: Build out nagios host templates (production)
template: src=nagios/hosts/{{item}}.j2 dest=/etc/nagios/hosts/{{item}} mode=0644 owner=root group=root
with_items:
- phx2-external.cfg
- iad2-external.cfg
when: env == "production" and nagios_location == "external"
tags:
@ -478,7 +477,7 @@
- name: Build out nagios services templates
template: src=nagios/services/{{item}}.j2 dest=/etc/nagios/services/{{item}} mode=0644 owner=root group=root
with_items:
- phx2-mgmt.cfg
- iad2-mgmt.cfg
when: nagios_location == 'iad2_internal'
tags:
- nagios_server

View file

@ -38,7 +38,7 @@ define hostgroup{
define hostgroup{
hostgroup_name routers
alias routers
members phx2-gw, ibiblio-gw, dedicated-gw, host1plus-gw, internetx-gw, osuosl-gw, rdu-gw, rdu-cc-gw, iad2-gw
members ibiblio-gw, dedicated-gw, host1plus-gw, internetx-gw, osuosl-gw, rdu-gw, rdu-cc-gw, iad2-gw
}

View file

@ -3,7 +3,7 @@
###############
## {{ env }}
{% for key, value in groups.items()|sort %}
{% if groups[key] != [] and key not in vars['exclude_phx2_hostgroups'] %}
{% if groups[key] != [] and key not in vars['exclude_iad2_hostgroups'] %}
define hostgroup{
hostgroup_name {{ key }}
alias {{ key }}
@ -38,17 +38,17 @@ define hostgroup{
define hostgroup{
hostgroup_name routers
alias routers
members phx2-gw, ibiblio-gw, dedicated-gw, host1plus-gw, internetx-gw, osuosl-gw, rdu-gw, rdu-cc-gw, iad2-gw
members ibiblio-gw, dedicated-gw, host1plus-gw, internetx-gw, osuosl-gw, rdu-gw, rdu-cc-gw, iad2-gw
}
##
## Management hardware
#define hostgroup {
# hostgroup_name phx2_mgmt_systems
# alias phx2_mgmt_systems
# members {% for host in vars['phx2_management_hosts']|sort %}{{host}}{% if not loop.last %},{% endif %} {% endfor %}
#
#}
# Management hardware
#
define hostgroup {
hostgroup_name iad2_mgmt_systems
alias iad2_mgmt_systems
members {% for host in vars['iad2_management_hosts']|sort %}{{host}}{% if not loop.last %},{% endif %} {% endfor %}
}
# TODO: Add iad2 mgmt here

View file

@ -3,16 +3,15 @@
# be not defined on the external nagios.
#
define host {
host_name phx2-gw
host_name iad2-gw
alias PHX2 Router
use mincheck
check_command check-host-alive4
{% if vars['nagios_location'] == 'iad2_internal' %}
address 10.5.126.254
{% else %}
address 10.3.163.254
parents iad2-gw
{% elif vars['nagios_location'] == 'external' %}
address 66.187.228.248
{% endif %}
{% if vars['nagios_location'] == 'external' %}
parents ibiblio-gw
{% endif %}
@ -26,9 +25,10 @@ define host {
check_command check-host-alive4
address 152.19.134.129
{% if vars['nagios_location'] == 'iad2_internal' %}
parents phx2-gw
{% elif vars['nagios_location'] == 'iad2_internal' %}
parents iad2-gw
{% elif vars['nagios_location'] == 'external' %}
address 66.187.228.248
parents ibiblio-gw
{% endif %}
}
@ -42,10 +42,9 @@ define host {
check_command check-host-alive4
address 67.219.144.65
{% if vars['nagios_location'] == 'iad2_internal' %}
parents phx2-gw
{% elif vars['nagios_location'] == 'iad2_internal' %}
parents iad2-gw
{% else %}
{% elif vars['nagios_location'] == 'external' %}
address 66.187.228.248
parents ibiblio-gw
{% endif %}
@ -59,8 +58,9 @@ define host {
check_command check-host-alive4
address 185.141.164.1
{% if vars['nagios_location'] == 'iad2_internal' %}
parents phx2-gw
{% else %}
parents iad2-gw
{% elif vars['nagios_location'] == 'external' %}
address 66.187.228.248
parents ibiblio-gw
{% endif %}
@ -74,10 +74,9 @@ define host {
check_command check-host-alive4
address 85.236.55.1
{% if vars['nagios_location'] == 'iad2_internal' %}
parents phx2-gw
{% elif vars['nagios_location'] == 'iad2_internal' %}
parents iad2-gw
{% else %}
{% elif vars['nagios_location'] == 'external' %}
address 66.187.228.248
parents ibiblio-gw
{% endif %}
@ -91,10 +90,9 @@ define host {
check_command check-host-alive4
address 140.211.169.193
{% if vars['nagios_location'] == 'iad2_internal' %}
parents phx2-gw
{% elif vars['nagios_location'] == 'iad2_internal' %}
parents iad2-gw
{% else %}
{% elif vars['nagios_location'] == 'external' %}
address 66.187.228.248
parents ibiblio-gw
{% endif %}
@ -108,10 +106,9 @@ define host {
check_command check-host-alive4
address 209.132.190.196
{% if vars['nagios_location'] == 'iad2_internal' %}
parents phx2-gw
{% elif vars['nagios_location'] == 'iad2_internal' %}
parents iad2-gw
{% else %}
{% elif vars['nagios_location'] == 'external' %}
address 66.187.228.248
parents ibiblio-gw
{% endif %}
@ -125,10 +122,9 @@ define host {
check_command check-host-alive4
address 8.43.85.254
{% if vars['nagios_location'] == 'iad2_internal' %}
parents phx2-gw
{% elif vars['nagios_location'] == 'iad2_internal' %}
parents iad2-gw
{% else %}
{% elif vars['nagios_location'] == 'external' %}
address 66.187.228.248
parents ibiblio-gw
{% endif %}
@ -141,9 +137,9 @@ define host {
use mincheck
check_command check-host-alive4
{% if vars['nagios_location'] == 'iad2_internal' %}
address 10.3.163.254
{% else %}
address 209.132.185.254
parents iad2-gw
{% elif vars['nagios_location'] == 'external' %}
address 66.187.228.248
parents ibiblio-gw
{% endif %}

View file

@ -24,7 +24,7 @@ define host {
{% if hostvars[host]['vmhost'] is defined %}
parents {{ hostvars[host]['vmhost'] }}
{% elif hostvars[host].datacenter is defined %}
parents phx2-gw
parents iad2-gw
{% endif %}
}

View file

@ -1,4 +1,4 @@
{% for host in vars['phx2_management_hosts']|sort %}
{% for host in vars['iad2_management_hosts']|sort %}
define host {
host_name {{ host }}
alias {{ host }}
@ -11,7 +11,7 @@ define host {
##
## These hosts may have limited ssh/http/https
{% for host in vars['phx2_management_limited']|sort %}
{% for host in vars['iad2_management_limited']|sort %}
define host {
host_name {{ host }}
alias {{ host }}

View file

@ -4,8 +4,6 @@ define host {
use mincheck
address d6tcqd4og8l21.cloudfront.net
{% if vars['nagios_location'] == 'iad2_internal' %}
parents phx2-gw
{% elif vars['nagios_location'] == 'iad2_internal' %}
parents iad2-gw
{% else %}
parents ibiblio-gw
@ -18,8 +16,6 @@ define host {
use mincheck
address cdn.registry.fedoraproject.org
{% if vars['nagios_location'] == 'iad2_internal' %}
parents phx2-gw
{% elif vars['nagios_location'] == 'iad2_internal' %}
parents iad2-gw
{% else %}
parents ibiblio-gw
@ -32,8 +28,6 @@ define host {
use mincheck
address lists.fedoraproject.org
{% if vars['nagios_location'] == 'iad2_internal' %}
parents phx2-gw
{% elif vars['nagios_location'] == 'iad2_internal' %}
parents iad2-gw
{% else %}
parents ibiblio-gw

View file

@ -1,9 +0,0 @@
{% for host in vars["phx2_external"]|sort %}
define host {
host_name {{ host }}
alias {{ host }}
use mincheck
address {{ host }}
parents phx2-gw
}
{% endfor %}

View file

@ -1,34 +0,0 @@
{% for host in groups['all']|sort %}
{% if hostvars[host].datacenter == 'phx2' and hostvars[host].nagios_Can_Connect == true %}
define host {
{% if hostvars[host].nagios_Check_Services['nrpe'] == true %}
use defaulttemplate
{% else %}
use mincheck
{% endif %}
host_name {{ host }}
{% if hostvars[host].ansible_hostname is defined %}
alias {{ hostvars[host].ansible_hostname }}
{% else %}
alias {{ host }}
{% endif %}
{% if hostvars[host].eth0_ip is defined %}
address {{ hostvars[host].eth0_ip }}
{% elif hostvars[host].em3_ip is defined %}
address {{ hostvars[host].em3_ip }}
{% elif hostvars[host].ansible_default_ipv4 is defined %}
{% if hostvars[host].ansible_default_ipv4.address is defined %}
address {{ hostvars[host].ansible_default_ipv4.address }}
{% endif %}
{% else %}
address {{ host }}
{% endif %}
{% if hostvars[host]['vmhost'] is defined %}
parents {{ hostvars[host]['vmhost'] }}
{% elif hostvars[host].datacenter is defined %}
parents phx2-gw
{% endif %}
}
{% endif %}
{% endfor %}

View file

@ -23,11 +23,11 @@ define host {
{% endif %}
{% if env == 'staging' %}
## Some staging parents do not allow ssh from staging. Just default to gw
parents phx2-gw
parents iad2-gw
{% elif hostvars[host]['vmhost'] is defined %}
parents {{ hostvars[host]['vmhost'] }}
{% elif hostvars[host].datacenter is defined %}
parents phx2-gw
parents iad2-gw
{% endif %}
}

View file

@ -1,6 +1,6 @@
# HTTP
{% for host in vars['phx2_management_hosts']|sort %}
{% for host in vars['iad2_management_hosts']|sort %}
define service {
host_name {{ host }}
service_description {{ host }}-http

View file

@ -445,8 +445,4 @@ command[service_rsyslog_restart]=/usr/bin/sudo /sbin/service rsyslog restart
#command[check_disk]=/usr/lib64/nagios/plugins/check_disk -w $ARG1$ -c $ARG2$ -p $ARG3$
#command[check_procs]=/usr/lib64/nagios/plugins/check_procs -w $ARG1$ -c $ARG2$ -s $ARG3$
{% if vars['nagios_location'] == 'iad2_internal' %}
command[check_pgsql_backup]=/usr/lib64/nagios/plugins/check_file_age -w 86400 -c 129600 -f /backups/db-fas01.phx2.fedoraproject.org/fas2.db
{% elif vars['nagios_location'] == 'iad2_internal' %}
command[check_pgsql_backup]=/usr/lib64/nagios/plugins/check_file_age -w 86400 -c 129600 -f /backups/db-fas01.iad2.fedoraproject.org/fas2.db
{% endif %}

View file

@ -6,4 +6,4 @@ message_ttl: null
thresholds:
warning: 10000
critical: 100000
nagios_server: noc01.phx2.fedoraproject.org
nagios_server: noc01.iad2.fedoraproject.org