From 309ec254bfeda81cfffc148b3685272fe175162f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Bompard?= Date: Mon, 12 Aug 2019 14:49:56 +0200 Subject: [PATCH] Add thresholds on Bodhi's RabbitMQ queues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Aurélien Bompard --- playbooks/groups/bodhi-backend.yml | 3 +++ playbooks/openshift-apps/bodhi.yml | 3 +++ roles/rabbit/queue/defaults/main.yml | 4 +++- roles/rabbit/queue/tasks/main.yml | 25 ++++++++++++++++++++++ roles/rabbit/queue/templates/nagios.cfg.j2 | 6 ++++++ roles/rabbit/queue/templates/nrpe.cfg.j2 | 1 + 6 files changed, 41 insertions(+), 1 deletion(-) create mode 100644 roles/rabbit/queue/templates/nagios.cfg.j2 create mode 100644 roles/rabbit/queue/templates/nrpe.cfg.j2 diff --git a/playbooks/groups/bodhi-backend.yml b/playbooks/groups/bodhi-backend.yml index c5cbbda52c..20ac342d62 100644 --- a/playbooks/groups/bodhi-backend.yml +++ b/playbooks/groups/bodhi-backend.yml @@ -71,6 +71,9 @@ username: "bodhi{{ env_suffix }}" queue_name: "{{ bodhi_message_queue_name }}" routing_keys: "{{ bodhi_message_routing_keys }}" + thresholds: + warning: 10 + critical: 100 tasks: diff --git a/playbooks/openshift-apps/bodhi.yml b/playbooks/openshift-apps/bodhi.yml index 43e90902ea..0f7f7f6156 100644 --- a/playbooks/openshift-apps/bodhi.yml +++ b/playbooks/openshift-apps/bodhi.yml @@ -26,6 +26,9 @@ username: "bodhi{{ env_suffix }}" queue_name: "{{ bodhi_message_queue_name }}" routing_keys: "{{ bodhi_message_routing_keys }}" + thresholds: + warning: 10 + critical: 100 - role: openshift/project app: bodhi description: bodhi diff --git a/roles/rabbit/queue/defaults/main.yml b/roles/rabbit/queue/defaults/main.yml index 4f6318dc33..35006ad7c4 100644 --- a/roles/rabbit/queue/defaults/main.yml +++ b/roles/rabbit/queue/defaults/main.yml @@ -1,5 +1,7 @@ -rabbitmq_server: "rabbitmq01{{ env_suffix }}.phx2.fedoraproject.org" +rabbitmq_server: "rabbitmq03{{ env_suffix }}.phx2.fedoraproject.org" vhost: /pubsub default_exchange: amq.topic routing_keys: - "#" +thresholds: false +nagios_server: noc01.phx2.fedoraproject.org diff --git a/roles/rabbit/queue/tasks/main.yml b/roles/rabbit/queue/tasks/main.yml index 846b3bcced..9c6eccb0b8 100644 --- a/roles/rabbit/queue/tasks/main.yml +++ b/roles/rabbit/queue/tasks/main.yml @@ -17,6 +17,9 @@ # # - write_queues (list): A list of queue name prefixes to which the user will # be allowed to publish. +# - thresholds (dict): A dictionary with two keys: "warning" and "critical". +# The values are numbers. Generate an alert in Nagios if +# the number of messages go above these values. - assert: that: @@ -61,3 +64,25 @@ login_user: admin login_password: "{{ (env == 'production')|ternary(rabbitmq_admin_password_production, rabbitmq_admin_password_staging) }}" loop: "{{ routing_keys }}" + +- name: Monitor the {{ queue_name }} queue in Nagios (NRPE) + when: thresholds and env == "production" + delegate_to: "{{ rabbitmq_server }}" + template: + src: nrpe.cfg.j2 + dest: /etc/nrpe.d/check_rabbitmq_queue_{{ queue_name }}.cfg + owner: root + group: root + mode: 0644 + notify: + - restart nrpe + tags: + - nagios_client + +- name: Monitor the {{ queue_name }} queue in Nagios + when: thresholds and env == "production" + delegate_to: "{{ nagios_server }}" + template: + src: nagios.cfg.j2 + dest: /etc/nagios/services/rabbitmq-queue-{{ queue_name }}.cfg + notify: restart nagios \ No newline at end of file diff --git a/roles/rabbit/queue/templates/nagios.cfg.j2 b/roles/rabbit/queue/templates/nagios.cfg.j2 new file mode 100644 index 0000000000..2a995a0a37 --- /dev/null +++ b/roles/rabbit/queue/templates/nagios.cfg.j2 @@ -0,0 +1,6 @@ +define service { + host_name {{ rabbitmq_server }} + service_description Check queue {{ queue_name }} + check_command check_by_nrpe!check_rabbitmq_queue_{{ vhost.replace("/", "") }}_{{ queue_name }} + use defaulttemplate +} \ No newline at end of file diff --git a/roles/rabbit/queue/templates/nrpe.cfg.j2 b/roles/rabbit/queue/templates/nrpe.cfg.j2 new file mode 100644 index 0000000000..23b04c832e --- /dev/null +++ b/roles/rabbit/queue/templates/nrpe.cfg.j2 @@ -0,0 +1 @@ +command[check_rabbitmq_queue_{{ vhost.replace("/", "") }}_{{ queue_name }}]=/usr/lib64/nagios/plugins-rabbitmq/check_rabbitmq_queue --extra-opts=common@/etc/nrpe.d/rabbitmq_args.ini --vhost {{ vhost }} --queue {{ queue_name }} {% if thresholds.warning %}-w {{ thresholds.warning }}{% endif %}{% if thresholds.critical %}-c {{ thresholds.critical }}{% endif %} \ No newline at end of file