Try to do nagios downtimes better for fmn upgrades.

2015-03-19 15:07:32 +00:00 · 2015-03-19 15:07:32 +00:00 · f26d80d3b1
commit f26d80d3b1
parent 857adabe4f
1 changed files with 9 additions and 7 deletions
--- a/playbooks/manual/upgrade/fmn.yml
+++ b/playbooks/manual/upgrade/fmn.yml
@ -33,7 +33,7 @@
  pre_tasks:
  - name: tell nagios to shush w.r.t. the frontend
-    nagios: action=downtime minutes=60 service=host host={{ inventory_hostname }}
+    nagios: action=downtime minutes=15 service=host host={{ inventory_hostname }}
    delegate_to: noc01.phx2.fedoraproject.org
    ignore_errors: true
@ -55,7 +55,7 @@
  pre_tasks:
  - name: tell nagios to shush w.r.t. the backend
-    nagios: action=downtime minutes=60 service=host host={{ inventory_hostname }}
+    nagios: action=downtime minutes=15 service=host host={{ inventory_hostname }}
    delegate_to: noc01.phx2.fedoraproject.org
    ignore_errors: true
@ -76,11 +76,13 @@
  - name: And... start the backend again
    service: name="fedmsg-hub" state=started
-  post_tasks:
+  # Don't bother unshushing the backend here.  it takes a few minutes to start
-  - name: tell nagios to unshush w.r.t. the backend
+  # up anyways, so just let the downtime expire.
-    nagios: action=unsilence service=host host={{ inventory_hostname }}
+  #post_tasks:
-    delegate_to: noc01.phx2.fedoraproject.org
+  #- name: tell nagios to unshush w.r.t. the backend
-    ignore_errors: true
+  #  nagios: action=unsilence service=host host={{ inventory_hostname }}
  #  delegate_to: noc01.phx2.fedoraproject.org
  #  ignore_errors: true
 - name: restart the frontend
  hosts: notifs-web;notifs-web-stg