# # This playbook lets you safely reboot a virthost and all it's guests. # # requires --extra-vars="target=somevhost fqdn" # Might add nodns=true or nonagios=true to the extra vars #General overview: # talk to the vhost # get back list of instances # add each of their hostnames to an addhoc group # halt each of them in a second play # wait for them to die # third play, reboot the vhost # wait for vhost to come back # TODO: Figure out how to compare virt info pre and post boot. - name: find instances hosts: "{{ target }}" gather_facts: False user: root tasks: - name: get list of guests virt: command=list_vms state=running register: vmlist # - name: get info on guests (prereboot) # virt: command=info # register: vminfo_pre - name: add them to myvms_new group local_action: add_host hostname={{ item }} groupname=myvms_new with_items: "{{ vmlist.list_vms }}" # Call out to another playbook. Disable any proxies that may live here - import_playbook: update-proxy-dns.yml vars: status: enable proxies: myvms_new:&proxies when: nodns is not defined or not "true" in nodns - name: halt instances hosts: myvms_new user: root gather_facts: False serial: 1 tasks: # - name: delegate to on-control01 for ocp4 cluster # set_fact: # os_delegate_via: os-control01 # when: inventory_hostname in groups['ocp'] # # - name: delegate to on-control01 for ocp4 cluster (stg) # set_fact: # os_delegate_via: os-control01.stg # when: inventory_hostname in groups['ocp_stg'] # # - name: drain OS node if necessary # command: oc adm drain {{inventory_hostname }} --ignore-daemonsets --delete-local-data # delegate_to: "{{os_delegate_via}}{{env_suffix}}.iad2.fedoraproject.org" # when: inventory_hostname.startswith(('ocp', 'worker')) and hostvars[inventory_hostname].datacenter == 'iad2' # - name: schedule regular host downtime nagios: action=downtime minutes=30 service=host host={{ inventory_hostname_short }}{{ env_suffix }} delegate_to: noc01.iad2.fedoraproject.org ignore_errors: true when: nonagios is not defined or not nonagios - name: shutdown vms virt: command=shutdown name={{ inventory_hostname }} ignore_errors: true delegate_to: "{{ target }}" - name: wait for the whole set to die. hosts: myvms_new gather_facts: False user: root tasks: - name: wait for them to die local_action: wait_for port=22 delay=30 timeout=300 state=stopped host={{ inventory_hostname }} - name: reboot vhost hosts: "{{ target }}" gather_facts: False user: root tasks: - name: tell nagios to shush nagios: action=downtime minutes=60 service=host host={{ inventory_hostname_short }}{{ env_suffix }} delegate_to: noc01.iad2.fedoraproject.org ignore_errors: true when: nonagios is not defined or not nonagios - name: reboot the virthost reboot: - name: wait for libvirtd to come back on the virthost wait_for: path=/run/libvirt/libvirt-sock state=present - name: look up vmlist virt: command=list_vms register: newvmlist - name: add them to myvms_postreboot group local_action: add_host hostname={{ item }} groupname=myvms_postreboot with_items: "{{ newvmlist.list_vms }}" # - name: sync time # command: ntpdate -u 1.rhel.pool.ntp.org - name: tell nagios to unshush nagios: action=unsilence service=host host={{ inventory_hostname_short }}{{ env_suffix }} delegate_to: noc01.iad2.fedoraproject.org ignore_errors: true when: nonagios is not defined or not nonagios - name: post reboot tasks hosts: myvms_postreboot user: root gather_facts: False serial: 1 tasks: - name: delegate to on-control01 for ocp4 cluster set_fact: os_delegate_via: os-control01 when: inventory_hostname in groups['ocp'] - name: delegate to on-control01 for ocp4 cluster (stg) set_fact: os_delegate_via: os-control01.stg when: inventory_hostname in groups['ocp_stg'] - name: Add back to openshift command: oc adm uncordon {{inventory_hostname}} delegate_to: "{{os_delegate_via}}{{env_suffix}}.iad2.fedoraproject.org" when: inventory_hostname.startswith(('ocp', 'worker')) and hostvars[inventory_hostname].datacenter == 'iad2' - name: restart gssproxy if we rebooted a ipa server service: name=gssproxy state=restarted when: inventory_hostname.startswith('ipa') - name: restart rabbitmq if we rebooted a rabbit server service: name=rabbitmq-server state=restarted when: inventory_hostname.startswith('rabbitmq') # Call out to that dns playbook. Put proxies back in now that they're back - import_playbook: update-proxy-dns.yml vars: status: enable proxies: myvms_new:&proxies when: nodns is not defined or not nodns vars_files: - /srv/web/infra/ansible/vars/global.yml - /srv/web/infra/ansible/vars/{{ ansible_distribution }}.yml # - name: get info on guests (postreboot) # virt: command=info # register: vminfo_post