copr-be: automatically cleanup VMs (not)started by resalloc

For some reason, starting VMs sometimes fails and VM is kept idling on
in our VMs list, and we are accounted for it.  This script periodically
checks the list of started VMs and terminates those which we are not
interested in.

The script requires, except for python3, also python3-dateutil.
This commit is contained in:
Pavel Raiskup 2020-05-25 11:03:58 +02:00
parent c3f0b55d47
commit 9e02be6a5a
2 changed files with 107 additions and 0 deletions

View file

@ -0,0 +1,97 @@
#! /usr/bin/python3
"""
Cleanup all AWS VM instances which are using 'copr-builder' key, and are not
valid. Either they have no Name tag set (some spawning problems...) or is not
tracked by resalloc server.
"""
import json
import logging
import subprocess
import time
import os
import dateutil.parser
logging.basicConfig(level=logging.INFO)
LOG = logging.getLogger()
def run_cmd(cmd):
""" check_output() and decode from utf8 """
return subprocess.check_output(cmd).decode("utf-8")
def _get_instances():
query = (
"Reservations[].Instances[].{"
"ID:InstanceId,"
"Name:Tags[?Key=='Name']|[0].Value,"
"KeyName:KeyName,"
"CoprInstance:Tags[?Key=='CoprInstance']|[0].Value,"
"Start:LaunchTime"
"}"
)
aws_command = [
"aws", "ec2", "describe-instances",
"--query", query,
"--filters",
"Name=key-name,Values=copr-builder",
"Name=instance-state-name,Values=running",
#"Name=tag-key,Values=FedoraCopr,Name=tag-value,Values=copr",
"--output", "json",
"--region", "us-east-1",
]
return json.loads(run_cmd(aws_command))
def _terminate_instnace(instance_id):
cmd = ["aws", "ec2", "terminate-instances", "--instance-ids", instance_id]
subprocess.call(cmd)
def _get_tracked_instances():
raw = run_cmd(["resalloc-maint", "resource-list"])
return_tracked = []
for resource in raw.strip().split("\n"):
return_tracked.append(resource.split(' ')[2])
return return_tracked
def _detect_instance():
hostname = run_cmd("hostname").strip()
return "devel" if "copr-be-dev" in hostname else "production"
def _main():
# resalloc account have nonstandard HOME variable
os.environ["HOME"] = "/home/resalloc"
tracked = _get_tracked_instances()
copr_instance = _detect_instance()
for instance in _get_instances():
started = dateutil.parser.parse(instance["Start"]).timestamp()
if time.time() - started < 1800:
continue
if not instance["Name"]:
LOG.info("shutting down unnamed instance %s", instance["ID"])
_terminate_instnace(instance["ID"])
continue
if instance["CoprInstance"] != copr_instance:
LOG.debug("not our instance: %s (%s)", instance["Name"], instance["CoprInstance"])
continue
if instance["Name"] in tracked:
LOG.debug("tracked %s, skipped", instance["Name"])
continue
_terminate_instnace(instance["ID"])
if __name__ == "__main__":
_main()

View file

@ -26,6 +26,7 @@
- python3-glanceclient
- python3-neutronclient
- python3-keystoneclient
- python3-dateutil
- php-cli
- cronolog
- nfs-utils
@ -278,6 +279,9 @@
- name: install aws cleaning script
copy: src="cleanup-vms-aws" dest=/usr/local/bin/ mode=755
- name: install aws cleaning script for resalloc
copy: src="cleanup-vms-aws-resalloc" dest=/usr/local/bin/ mode=755
- name: install cleanup-unused-vms script
template: src="cleanup-unused-vms-from-redis" dest=/usr/local/bin/cleanup-unused-vms-from-redis mode=755
tags:
@ -301,6 +305,12 @@
minute="50"
user=copr
- name: crontab for cleaning resalloc VMs
cron: name="cleanup nova VMs periodically"
job="/usr/local/bin/cleanup-vms-aws-resalloc"
minute="*/10"
user=resalloc
- name: setup monitoring
import_tasks: "monitoring.yml"