ansible/playbooks/vhost_reboot.yml

166 lines
4.9 KiB
YAML
Raw Normal View History

#
2016-08-08 19:36:31 +00:00
# This playbook lets you safely reboot a virthost and all it's guests.
#
# requires --extra-vars="target=somevhost fqdn"
# Might add nodns=true or nonagios=true to the extra vars
#General overview:
2012-11-21 17:56:10 +00:00
# talk to the vhost
# get back list of instances
# add each of their hostnames to an addhoc group
# halt each of them in a second play
# wait for them to die
2012-11-21 17:56:10 +00:00
# third play, reboot the vhost
# wait for vhost to come back
2016-08-08 19:36:31 +00:00
# TODO: Figure out how to compare virt info pre and post boot.
2012-11-21 17:56:10 +00:00
- name: find instances
hosts: "{{ target }}"
gather_facts: False
2012-11-21 17:56:10 +00:00
user: root
tasks:
- name: get list of guests
virt: command=list_vms state=running
2012-11-21 17:56:10 +00:00
register: vmlist
2012-11-21 17:59:07 +00:00
# - name: get info on guests (prereboot)
# virt: command=info
# register: vminfo_pre
- name: add them to myvms_new group
2013-11-21 22:07:02 +00:00
local_action: add_host hostname={{ item }} groupname=myvms_new
2016-11-29 01:45:38 +00:00
with_items: "{{ vmlist.list_vms }}"
2012-11-21 17:56:10 +00:00
# Call out to another playbook. Disable any proxies that may live here
- import_playbook: update-proxy-dns.yml
vars:
status: enable
proxies: myvms_new:&proxies
when: nodns is not defined or not "true" in nodns
2012-11-21 17:56:10 +00:00
- name: halt instances
hosts: myvms_new
user: root
gather_facts: False
serial: 1
2012-11-21 17:56:10 +00:00
tasks:
# - name: delegate to on-control01 for ocp4 cluster
# set_fact:
# os_delegate_via: os-control01
# when: inventory_hostname in groups['ocp']
#
# - name: delegate to on-control01 for ocp4 cluster (stg)
# set_fact:
# os_delegate_via: os-control01.stg
# when: inventory_hostname in groups['ocp_stg']
#
# - name: drain OS node if necessary
# command: oc adm drain {{inventory_hostname }} --ignore-daemonsets --delete-local-data
# delegate_to: "{{os_delegate_via}}{{env_suffix}}.iad2.fedoraproject.org"
# when: inventory_hostname.startswith(('ocp', 'worker')) and hostvars[inventory_hostname].datacenter == 'iad2'
#
- name: schedule regular host downtime
nagios: action=downtime minutes=30 service=host host={{ inventory_hostname_short }}{{ env_suffix }}
delegate_to: noc01.iad2.fedoraproject.org
ignore_errors: true
when: nonagios is not defined or not nonagios
- name: shutdown vms
virt: command=shutdown name={{ inventory_hostname }}
2012-11-21 18:45:20 +00:00
ignore_errors: true
delegate_to: "{{ target }}"
2012-11-21 18:45:20 +00:00
- name: wait for the whole set to die.
hosts: myvms_new
gather_facts: False
user: root
tasks:
2012-11-21 18:45:20 +00:00
- name: wait for them to die
local_action: wait_for port=22 delay=30 timeout=300 state=stopped host={{ inventory_hostname }}
2012-11-21 18:22:03 +00:00
- name: reboot vhost
hosts: "{{ target }}"
gather_facts: False
2012-11-21 18:22:03 +00:00
user: root
tasks:
- name: tell nagios to shush
nagios: action=downtime minutes=60 service=host host={{ inventory_hostname_short }}{{ env_suffix }}
delegate_to: noc01.iad2.fedoraproject.org
ignore_errors: true
when: nonagios is not defined or not nonagios
- name: reboot the virthost
reboot:
2012-11-21 18:45:20 +00:00
2013-12-19 20:25:13 +00:00
- name: wait for libvirtd to come back on the virthost
wait_for: path=/run/libvirt/libvirt-sock state=present
2013-12-19 20:25:13 +00:00
- name: look up vmlist
virt: command=list_vms
2012-11-21 18:45:20 +00:00
register: newvmlist
- name: add them to myvms_postreboot group
local_action: add_host hostname={{ item }} groupname=myvms_postreboot
with_items: "{{ newvmlist.list_vms }}"
2018-11-06 21:22:45 +00:00
# - name: sync time
# command: ntpdate -u 1.rhel.pool.ntp.org
2014-10-01 01:53:33 +00:00
- name: tell nagios to unshush
nagios: action=unsilence service=host host={{ inventory_hostname_short }}{{ env_suffix }}
delegate_to: noc01.iad2.fedoraproject.org
2014-10-01 01:53:33 +00:00
ignore_errors: true
when: nonagios is not defined or not nonagios
2014-10-01 01:53:33 +00:00
- name: post reboot tasks
hosts: myvms_postreboot
user: root
gather_facts: False
serial: 1
tasks:
- name: delegate to on-control01 for ocp4 cluster
set_fact:
os_delegate_via: os-control01
when: inventory_hostname in groups['ocp']
- name: delegate to on-control01 for ocp4 cluster (stg)
set_fact:
os_delegate_via: os-control01.stg
when: inventory_hostname in groups['ocp_stg']
- name: Add back to openshift
command: oc adm uncordon {{inventory_hostname}}
delegate_to: "{{os_delegate_via}}{{env_suffix}}.iad2.fedoraproject.org"
when: inventory_hostname.startswith(('ocp', 'worker')) and hostvars[inventory_hostname].datacenter == 'iad2'
- name: restart gssproxy if we rebooted a ipa server
service: name=gssproxy state=restarted
when: inventory_hostname.startswith('ipa')
- name: restart rabbitmq if we rebooted a rabbit server
service: name=rabbitmq-server state=restarted
when: inventory_hostname.startswith('rabbitmq')
# Call out to that dns playbook. Put proxies back in now that they're back
- import_playbook: update-proxy-dns.yml
vars:
status: enable
proxies: myvms_new:&proxies
when: nodns is not defined or not nodns
vars_files:
- /srv/web/infra/ansible/vars/global.yml
2014-01-06 18:22:18 +00:00
- /srv/web/infra/ansible/vars/{{ ansible_distribution }}.yml
# - name: get info on guests (postreboot)
# virt: command=info
# register: vminfo_post