From eae91f0d2befe47e4a0cf439e8f0306f7feb0cb3 Mon Sep 17 00:00:00 2001 From: seddikalaouiismaili Date: Fri, 12 Mar 2021 01:38:33 +0100 Subject: [PATCH] install nrpe check for systemd units --- .../files/scripts/check_systemd_units | 48 +++++++++++++++++++ roles/nagios_client/tasks/main.yml | 8 ++++ 2 files changed, 56 insertions(+) create mode 100644 roles/nagios_client/files/scripts/check_systemd_units diff --git a/roles/nagios_client/files/scripts/check_systemd_units b/roles/nagios_client/files/scripts/check_systemd_units new file mode 100644 index 0000000000..accdc9ab20 --- /dev/null +++ b/roles/nagios_client/files/scripts/check_systemd_units @@ -0,0 +1,48 @@ +#!/usr/bin/env bash + +# Description : script to check the status of systemd units +# if they failed, try to restart the service once !! + +# Author : Seddik Alaoui Ismaili +# Version : 1.0 + + +# Exits code +warning_exit="1" +ok_exit="0" + +# Unit list +unit_list=(pagure_ci +pagure_ev +pagure_fast_worker +pagure_loadjson +pagure_logcom +pagure_medium_worker +pagure_milter +pagure_mirror +pagure_slow_worker +pagure_webhook +pagure_worker +pagure_mirror_project_in.timer) + +#Element's arrays +failed_array=() +active_array=() + +# Check units's status + echo -e "here the lenght of array : ${#active_array[@]}" +for element in ${unit_list[@]}; do + status=$(systemctl status ${element} |grep -E "Active:" | awk '{ print $2 }') + if [ $status == failed ]; then + systemctl restart ${element} && active_array+=($element) || failed_array+=($element) + fi +done + +# check the lenght of array and print result/exit code for nagios +if [ ${#failed_array[@]} -ne "0" ]; then + echo -e "WARNING - Failed systemd units after restart : ${failed_array[@]}" + exit ${warning_exit} +elif [ ${#failed_array[@]} -eq "0" ]; then + echo -e "OK - Systemd units are active" + exit ${ok_exit} +fi diff --git a/roles/nagios_client/tasks/main.yml b/roles/nagios_client/tasks/main.yml index 970bfdda85..6a5e0f6436 100644 --- a/roles/nagios_client/tasks/main.yml +++ b/roles/nagios_client/tasks/main.yml @@ -54,6 +54,14 @@ tags: - nagios_client +- name: install nrpe check for systemd unit + copy: src="script/{{ item }}" dest="{{ libdir }}/nagios/plugins/{{ item }}" mode=0775 owner=nagios group=nagios + with_items: + - check_systemd_units + when: inventory_hostname.startswith('pagure') + tags: + - nagios_client + # create dirs # puppet used to make /var/spool/nagios (owned by nagios.nagios) mode 750 # and /usr/lib/nagios/plugins (owned by root) mode 755 - but we don't know WHY