From 9e02be6a5afe1f8f450f5a473fd55b023120bf22 Mon Sep 17 00:00:00 2001 From: Pavel Raiskup Date: Mon, 25 May 2020 11:03:58 +0200 Subject: [PATCH] copr-be: automatically cleanup VMs (not)started by resalloc For some reason, starting VMs sometimes fails and VM is kept idling on in our VMs list, and we are accounted for it. This script periodically checks the list of started VMs and terminates those which we are not interested in. The script requires, except for python3, also python3-dateutil. --- .../backend/files/cleanup-vms-aws-resalloc | 97 +++++++++++++++++++ roles/copr/backend/tasks/main.yml | 10 ++ 2 files changed, 107 insertions(+) create mode 100644 roles/copr/backend/files/cleanup-vms-aws-resalloc diff --git a/roles/copr/backend/files/cleanup-vms-aws-resalloc b/roles/copr/backend/files/cleanup-vms-aws-resalloc new file mode 100644 index 0000000000..accd1a1ac3 --- /dev/null +++ b/roles/copr/backend/files/cleanup-vms-aws-resalloc @@ -0,0 +1,97 @@ +#! /usr/bin/python3 + +""" +Cleanup all AWS VM instances which are using 'copr-builder' key, and are not +valid. Either they have no Name tag set (some spawning problems...) or is not +tracked by resalloc server. +""" + +import json +import logging +import subprocess +import time +import os + +import dateutil.parser + +logging.basicConfig(level=logging.INFO) +LOG = logging.getLogger() + + +def run_cmd(cmd): + """ check_output() and decode from utf8 """ + return subprocess.check_output(cmd).decode("utf-8") + + +def _get_instances(): + query = ( + "Reservations[].Instances[].{" + "ID:InstanceId," + "Name:Tags[?Key=='Name']|[0].Value," + "KeyName:KeyName," + "CoprInstance:Tags[?Key=='CoprInstance']|[0].Value," + "Start:LaunchTime" + "}" + ) + aws_command = [ + "aws", "ec2", "describe-instances", + "--query", query, + "--filters", + "Name=key-name,Values=copr-builder", + "Name=instance-state-name,Values=running", + #"Name=tag-key,Values=FedoraCopr,Name=tag-value,Values=copr", + "--output", "json", + "--region", "us-east-1", + ] + return json.loads(run_cmd(aws_command)) + + +def _terminate_instnace(instance_id): + cmd = ["aws", "ec2", "terminate-instances", "--instance-ids", instance_id] + subprocess.call(cmd) + + +def _get_tracked_instances(): + raw = run_cmd(["resalloc-maint", "resource-list"]) + return_tracked = [] + for resource in raw.strip().split("\n"): + return_tracked.append(resource.split(' ')[2]) + return return_tracked + + +def _detect_instance(): + hostname = run_cmd("hostname").strip() + return "devel" if "copr-be-dev" in hostname else "production" + + +def _main(): + + # resalloc account have nonstandard HOME variable + os.environ["HOME"] = "/home/resalloc" + + tracked = _get_tracked_instances() + copr_instance = _detect_instance() + + for instance in _get_instances(): + started = dateutil.parser.parse(instance["Start"]).timestamp() + if time.time() - started < 1800: + continue + + if not instance["Name"]: + LOG.info("shutting down unnamed instance %s", instance["ID"]) + _terminate_instnace(instance["ID"]) + continue + + if instance["CoprInstance"] != copr_instance: + LOG.debug("not our instance: %s (%s)", instance["Name"], instance["CoprInstance"]) + continue + + if instance["Name"] in tracked: + LOG.debug("tracked %s, skipped", instance["Name"]) + continue + + _terminate_instnace(instance["ID"]) + + +if __name__ == "__main__": + _main() diff --git a/roles/copr/backend/tasks/main.yml b/roles/copr/backend/tasks/main.yml index a2ea5686b4..050eff4905 100644 --- a/roles/copr/backend/tasks/main.yml +++ b/roles/copr/backend/tasks/main.yml @@ -26,6 +26,7 @@ - python3-glanceclient - python3-neutronclient - python3-keystoneclient + - python3-dateutil - php-cli - cronolog - nfs-utils @@ -278,6 +279,9 @@ - name: install aws cleaning script copy: src="cleanup-vms-aws" dest=/usr/local/bin/ mode=755 +- name: install aws cleaning script for resalloc + copy: src="cleanup-vms-aws-resalloc" dest=/usr/local/bin/ mode=755 + - name: install cleanup-unused-vms script template: src="cleanup-unused-vms-from-redis" dest=/usr/local/bin/cleanup-unused-vms-from-redis mode=755 tags: @@ -301,6 +305,12 @@ minute="50" user=copr +- name: crontab for cleaning resalloc VMs + cron: name="cleanup nova VMs periodically" + job="/usr/local/bin/cleanup-vms-aws-resalloc" + minute="*/10" + user=resalloc + - name: setup monitoring import_tasks: "monitoring.yml"