script to monitor systemd units on pagure
This commit is contained in:
parent
aace9bb2cc
commit
890dd31cb0
5 changed files with 68 additions and 1 deletions
|
@ -304,6 +304,17 @@
|
||||||
tags:
|
tags:
|
||||||
- nagios_client
|
- nagios_client
|
||||||
|
|
||||||
|
- name: install nrpe check for systemd units
|
||||||
|
template:
|
||||||
|
src={{ item }}.j2 dest=/etc/nrpe.d/{{ item }} owner=root group=root mode=0644
|
||||||
|
with_items:
|
||||||
|
- check_systemd_units.cfg
|
||||||
|
when: inventory_hostname.startswith('pagure02')
|
||||||
|
notify:
|
||||||
|
- restart nrpe
|
||||||
|
tags:
|
||||||
|
- nagios_client
|
||||||
|
|
||||||
- name: Check if the fedmsg group exists
|
- name: Check if the fedmsg group exists
|
||||||
shell: /usr/bin/getent group fedmsg | /usr/bin/wc -l | tr -d ' '
|
shell: /usr/bin/getent group fedmsg | /usr/bin/wc -l | tr -d ' '
|
||||||
register: fedmsg_exists
|
register: fedmsg_exists
|
||||||
|
|
1
roles/nagios_client/templates/check_systemd_units.cfg.j2
Normal file
1
roles/nagios_client/templates/check_systemd_units.cfg.j2
Normal file
|
@ -0,0 +1 @@
|
||||||
|
command[check_systemd_units]=/usr/lib64/nagios/plugins/check_systemd_units
|
48
roles/nagios_server/files/nagios/plugins/check_systemd_units
Executable file
48
roles/nagios_server/files/nagios/plugins/check_systemd_units
Executable file
|
@ -0,0 +1,48 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
# Description : script to check the status of systemd units
|
||||||
|
# if they failed, try to restart the service once !!
|
||||||
|
|
||||||
|
# Author : Seddik Alaoui Ismaili
|
||||||
|
# Version : 1.0
|
||||||
|
|
||||||
|
|
||||||
|
# Exits code
|
||||||
|
warning_exit="1"
|
||||||
|
ok_exit="0"
|
||||||
|
|
||||||
|
# Unit list
|
||||||
|
unit_list=(pagure_ci
|
||||||
|
pagure_ev
|
||||||
|
pagure_fast_worker
|
||||||
|
pagure_loadjson
|
||||||
|
pagure_logcom
|
||||||
|
pagure_medium_worker
|
||||||
|
pagure_milter
|
||||||
|
pagure_mirror
|
||||||
|
pagure_slow_worker
|
||||||
|
pagure_webhook
|
||||||
|
pagure_worker
|
||||||
|
pagure_mirror_project_in.timer)
|
||||||
|
|
||||||
|
#Element's arrays
|
||||||
|
failed_array=()
|
||||||
|
active_array=()
|
||||||
|
|
||||||
|
# Check units's status
|
||||||
|
echo -e "here the lenght of array : ${#active_array[@]}"
|
||||||
|
for element in ${unit_list[@]}; do
|
||||||
|
status=$(systemctl status ${element} |grep -E "Active:" | awk '{ print $2 }')
|
||||||
|
if [ $status == failed ]; then
|
||||||
|
systemctl restart ${element} && active_array+=($element) || failed_array+=($element)
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
# check the lenght of array and print result/exit code for nagios
|
||||||
|
if [ ${#failed_array[@]} -ne "0" ]; then
|
||||||
|
echo -e "WARNING - Failed systemd units after restart : ${failed_array[@]}"
|
||||||
|
exit ${warning_exit}
|
||||||
|
elif [ ${#failed_array[@]} -eq "0" ]; then
|
||||||
|
echo -e "OK - Systemd units are active"
|
||||||
|
exit ${ok_exit}
|
||||||
|
fi
|
|
@ -57,3 +57,10 @@ define service {
|
||||||
use defaulttemplate
|
use defaulttemplate
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define service {
|
||||||
|
hostgroup pagure
|
||||||
|
service_description Systemd Units
|
||||||
|
check_command check_by_nrpe!check_systemd_units
|
||||||
|
use defaulttemplate
|
||||||
|
}
|
||||||
|
|
||||||
|
|
|
@ -421,7 +421,7 @@ command[check_fedmsg_cbacklog_mbs_backend_hub]=/usr/lib64/nagios/plugins/check_f
|
||||||
|
|
||||||
command[check_fedmsg_fmn_digest_last_ran]=/usr/lib64/nagios/plugins/check_fedmsg_producer_last_ran.py fedmsg-hub DigestProducer 90 600
|
command[check_fedmsg_fmn_digest_last_ran]=/usr/lib64/nagios/plugins/check_fedmsg_producer_last_ran.py fedmsg-hub DigestProducer 90 600
|
||||||
command[check_fedmsg_fmn_confirm_last_ran]=/usr/lib64/nagios/plugins/check_fedmsg_producer_last_ran.py fedmsg-hub ConfirmationProducer 30 300
|
command[check_fedmsg_fmn_confirm_last_ran]=/usr/lib64/nagios/plugins/check_fedmsg_producer_last_ran.py fedmsg-hub ConfirmationProducer 30 300
|
||||||
|
command[check_systemd_units]=/usr/lib64/nagios/plugins/check_systemd_units
|
||||||
# The following are 'action commands' where by an actual action is performed
|
# The following are 'action commands' where by an actual action is performed
|
||||||
# like restarting httpd
|
# like restarting httpd
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue