copr-backend: delete orphaned AWS instances via resalloc-aws-list
This commit is contained in:
parent
9e2172db47
commit
422a344d1b
4 changed files with 4 additions and 240 deletions
|
@ -1,134 +0,0 @@
|
||||||
#! /bin/bash
|
|
||||||
|
|
||||||
cat <<EOF
|
|
||||||
This script doesn't work currently, it is obsoleted by
|
|
||||||
the script 'cleanup-vms-aws-resalloc'. It is kept for documentation purposes or
|
|
||||||
future reference.
|
|
||||||
EOF
|
|
||||||
exit 1
|
|
||||||
|
|
||||||
set -e
|
|
||||||
|
|
||||||
help_output()
|
|
||||||
{
|
|
||||||
cat >&2 <<EOF
|
|
||||||
Usage: $0 [--kill-also-unused]
|
|
||||||
|
|
||||||
This script terminates all VMs started in AWS which are "probably" started by
|
|
||||||
this instance of Copr, but are no more relevant for us.
|
|
||||||
|
|
||||||
Normal mode (no --kill-also-unused) only terminates VMs which
|
|
||||||
- are in running state,
|
|
||||||
- are named properly (e.g. copr-builder-<instance>....)
|
|
||||||
- and are not in our redis database.
|
|
||||||
Such leftover VM usually occurs by manual spawning of bulider, during image
|
|
||||||
creation. But bug in VM spawner can cause this, etc. That's why we run this
|
|
||||||
mode periodically in cron job.
|
|
||||||
|
|
||||||
The --kill-also-unused mode is useful when we need to re-distribute new
|
|
||||||
mock/rpmbuild configuration to all builders. All unused builders are
|
|
||||||
terminated (so we don't kill running builds) and respawned.
|
|
||||||
EOF
|
|
||||||
}
|
|
||||||
|
|
||||||
die ()
|
|
||||||
{
|
|
||||||
echo >&2 "$*"
|
|
||||||
echo >&2
|
|
||||||
help_output
|
|
||||||
exit 1
|
|
||||||
}
|
|
||||||
|
|
||||||
test "$UID" != "0" || die "execute as copr user"
|
|
||||||
|
|
||||||
kill_unused=false
|
|
||||||
|
|
||||||
for arg; do
|
|
||||||
case $arg in
|
|
||||||
-h|--help)
|
|
||||||
help_output
|
|
||||||
exit 1
|
|
||||||
;;
|
|
||||||
--kill-also-unused)
|
|
||||||
kill_unused=true
|
|
||||||
;;
|
|
||||||
*)
|
|
||||||
die "unknown arg $arg"
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
done
|
|
||||||
|
|
||||||
something_found=false
|
|
||||||
|
|
||||||
dump_command ()
|
|
||||||
{
|
|
||||||
echo >&2 " -> $*"
|
|
||||||
"$@"
|
|
||||||
}
|
|
||||||
|
|
||||||
tracked()
|
|
||||||
{
|
|
||||||
name=$(redis-cli --scan --pattern "copr:backend:vm_instance:hset::$1")
|
|
||||||
test -n "$name"
|
|
||||||
}
|
|
||||||
|
|
||||||
used()
|
|
||||||
{
|
|
||||||
task=$(redis-cli hget "copr:backend:vm_instance:hset::$1" task_id)
|
|
||||||
test -n "$task"
|
|
||||||
}
|
|
||||||
|
|
||||||
old_enough()
|
|
||||||
{
|
|
||||||
# give them 1 hour
|
|
||||||
started=$(date --date="$1" +%s)
|
|
||||||
now=$(date +%s)
|
|
||||||
old_enough=$(( now - 3600 ))
|
|
||||||
test "$started" -le "$old_enough"
|
|
||||||
}
|
|
||||||
|
|
||||||
aws_command=(
|
|
||||||
aws ec2 describe-instances
|
|
||||||
--query "Reservations[].Instances[].{Id:InstanceId,Name:Tags[?Key=='Name']|[0].Value,Time:LaunchTime}"
|
|
||||||
--filters "Name=tag:FedoraGroup,Values=copr"
|
|
||||||
"Name=instance-state-name,Values=running"
|
|
||||||
"Name=tag:CoprPurpose,Values=builder"
|
|
||||||
--output text
|
|
||||||
)
|
|
||||||
|
|
||||||
something_found=false
|
|
||||||
|
|
||||||
prefix=dev
|
|
||||||
case $(hostname) in
|
|
||||||
copr-be.*)
|
|
||||||
prefix=prod
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
|
|
||||||
while read -r aws_id vm_name launch_time; do
|
|
||||||
case $vm_name in
|
|
||||||
aws_*_normal_${prefix}_*)
|
|
||||||
something_found=true
|
|
||||||
|
|
||||||
if tracked "$vm_name"; then
|
|
||||||
# skip known VMs
|
|
||||||
! $kill_unused && continue
|
|
||||||
used "$vm_name" && continue
|
|
||||||
fi
|
|
||||||
|
|
||||||
# skip recently started VMs
|
|
||||||
if ! $kill_unused && ! old_enough "$launch_time"; then
|
|
||||||
echo >&2 "$vm_name is not yet old enough: $launch_time"
|
|
||||||
continue
|
|
||||||
fi
|
|
||||||
|
|
||||||
# delete the rest
|
|
||||||
dump_command aws ec2 terminate-instances --instance-ids "$aws_id"
|
|
||||||
;;
|
|
||||||
*)
|
|
||||||
continue ;;
|
|
||||||
esac
|
|
||||||
done < <( "${aws_command[@]}" )
|
|
||||||
|
|
||||||
# fail if no VM was found (weird situation)
|
|
||||||
$something_found
|
|
|
@ -1,92 +0,0 @@
|
||||||
#! /usr/bin/python3
|
|
||||||
|
|
||||||
"""
|
|
||||||
Cleanup all AWS VM instances which are using 'copr-builder' key, and are not
|
|
||||||
valid. Either they have no Name tag set (some spawning problems...) or is not
|
|
||||||
tracked by resalloc server.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import json
|
|
||||||
import logging
|
|
||||||
import subprocess
|
|
||||||
import time
|
|
||||||
|
|
||||||
import dateutil.parser
|
|
||||||
|
|
||||||
logging.basicConfig(level=logging.INFO)
|
|
||||||
LOG = logging.getLogger()
|
|
||||||
|
|
||||||
|
|
||||||
def run_cmd(cmd):
|
|
||||||
""" check_output() and decode from utf8 """
|
|
||||||
return subprocess.check_output(cmd).decode("utf-8")
|
|
||||||
|
|
||||||
|
|
||||||
def _get_instances():
|
|
||||||
query = (
|
|
||||||
"Reservations[].Instances[].{"
|
|
||||||
"ID:InstanceId,"
|
|
||||||
"Name:Tags[?Key=='Name']|[0].Value,"
|
|
||||||
"KeyName:KeyName,"
|
|
||||||
"CoprInstance:Tags[?Key=='CoprInstance']|[0].Value,"
|
|
||||||
"Start:LaunchTime"
|
|
||||||
"}"
|
|
||||||
)
|
|
||||||
aws_command = [
|
|
||||||
"aws", "ec2", "describe-instances",
|
|
||||||
"--query", query,
|
|
||||||
"--filters",
|
|
||||||
"Name=key-name,Values=copr-builder",
|
|
||||||
"Name=instance-state-name,Values=running",
|
|
||||||
#"Name=tag-key,Values=FedoraCopr,Name=tag-value,Values=copr",
|
|
||||||
"--output", "json",
|
|
||||||
"--region", "us-east-1",
|
|
||||||
]
|
|
||||||
return json.loads(run_cmd(aws_command))
|
|
||||||
|
|
||||||
|
|
||||||
def _terminate_instnace(instance_id):
|
|
||||||
cmd = ["aws", "ec2", "terminate-instances", "--instance-ids", instance_id]
|
|
||||||
subprocess.call(cmd)
|
|
||||||
|
|
||||||
|
|
||||||
def _get_tracked_instances():
|
|
||||||
raw = run_cmd(["resalloc-maint", "resource-list"])
|
|
||||||
return_tracked = []
|
|
||||||
for resource in raw.strip().split("\n"):
|
|
||||||
return_tracked.append(resource.split(' ')[2])
|
|
||||||
return return_tracked
|
|
||||||
|
|
||||||
|
|
||||||
def _detect_instance():
|
|
||||||
hostname = run_cmd("hostname").strip()
|
|
||||||
return "devel" if "copr-be-dev" in hostname else "production"
|
|
||||||
|
|
||||||
|
|
||||||
def _main():
|
|
||||||
tracked = _get_tracked_instances()
|
|
||||||
copr_instance = _detect_instance()
|
|
||||||
|
|
||||||
for instance in _get_instances():
|
|
||||||
started = dateutil.parser.parse(instance["Start"]).timestamp()
|
|
||||||
if time.time() - started < 1800:
|
|
||||||
continue
|
|
||||||
|
|
||||||
if not instance["Name"]:
|
|
||||||
LOG.info("shutting down unnamed instance %s", instance["ID"])
|
|
||||||
_terminate_instnace(instance["ID"])
|
|
||||||
continue
|
|
||||||
|
|
||||||
if instance["CoprInstance"] != copr_instance:
|
|
||||||
LOG.debug("not our instance: %s (%s)", instance["Name"], instance["CoprInstance"])
|
|
||||||
continue
|
|
||||||
|
|
||||||
if instance["Name"] in tracked:
|
|
||||||
LOG.debug("tracked %s, skipped", instance["Name"])
|
|
||||||
continue
|
|
||||||
|
|
||||||
_terminate_instnace(instance["ID"])
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
_main()
|
|
|
@ -304,12 +304,6 @@
|
||||||
- redis # TODO: .service in copr-backend should depend on redis
|
- redis # TODO: .service in copr-backend should depend on redis
|
||||||
- "{{ copr_backend_target }}"
|
- "{{ copr_backend_target }}"
|
||||||
|
|
||||||
- name: install aws cleaning script
|
|
||||||
copy: src="cleanup-vms-aws" dest=/usr/local/bin/ mode=755
|
|
||||||
|
|
||||||
- name: install aws cleaning script for resalloc
|
|
||||||
copy: src="cleanup-vms-aws-resalloc" dest=/usr/local/bin/ mode=755
|
|
||||||
|
|
||||||
- name: access.redhat.com offline token file
|
- name: access.redhat.com offline token file
|
||||||
set_fact: "rhn_offline_token_file=/var/lib/resallocserver/.access.redhat.com-copr-team"
|
set_fact: "rhn_offline_token_file=/var/lib/resallocserver/.access.redhat.com-copr-team"
|
||||||
tags:
|
tags:
|
||||||
|
@ -343,13 +337,6 @@
|
||||||
user=copr
|
user=copr
|
||||||
state=absent
|
state=absent
|
||||||
|
|
||||||
- name: setup crontab for VMs
|
|
||||||
cron: name="cleanup AWS VMs periodically"
|
|
||||||
job="/usr/local/bin/cleanup-vms-aws"
|
|
||||||
minute="0"
|
|
||||||
user=copr
|
|
||||||
state=absent
|
|
||||||
|
|
||||||
- name: setup crontab for cleaning up redis
|
- name: setup crontab for cleaning up redis
|
||||||
cron: name="prune redis VM db periodically"
|
cron: name="prune redis VM db periodically"
|
||||||
job="/usr/local/bin/cleanup-unused-vms-from-redis &>> /var/log/copr-backend/cleanup-redis-vms.log"
|
job="/usr/local/bin/cleanup-unused-vms-from-redis &>> /var/log/copr-backend/cleanup-redis-vms.log"
|
||||||
|
@ -359,9 +346,10 @@
|
||||||
|
|
||||||
- name: crontab for cleaning resalloc VMs
|
- name: crontab for cleaning resalloc VMs
|
||||||
cron: name="cleanup nova VMs periodically"
|
cron: name="cleanup nova VMs periodically"
|
||||||
job="/usr/local/bin/cleanup-vms-aws-resalloc &>> /var/log/resallocserver/cron-cleanup-vms-aws.log"
|
job="true /usr/local/bin/cleanup-vms-aws-resalloc &>> /var/log/resallocserver/cron-cleanup-vms-aws.log"
|
||||||
minute="*/10"
|
minute="*/10"
|
||||||
user=resalloc
|
user=resalloc
|
||||||
|
state=absent
|
||||||
|
|
||||||
- name: crontab for cleaning-up unused subscriptions
|
- name: crontab for cleaning-up unused subscriptions
|
||||||
cron: name="cleanup unused Red Hat subscribed systems"
|
cron: name="cleanup unused Red Hat subscribed systems"
|
||||||
|
|
|
@ -30,6 +30,7 @@ aws_x86_64_{% if spot %}spot{% else %}normal{% endif %}_{% if devel %}dev{% else
|
||||||
cmd_delete: "/var/lib/resallocserver/resalloc_provision/vm-delete"
|
cmd_delete: "/var/lib/resallocserver/resalloc_provision/vm-delete"
|
||||||
cmd_livecheck: "resalloc-check-vm-ip"
|
cmd_livecheck: "resalloc-check-vm-ip"
|
||||||
cmd_release: "/var/lib/resallocserver/resalloc_provision/vm-release"
|
cmd_release: "/var/lib/resallocserver/resalloc_provision/vm-release"
|
||||||
|
cmd_list: resalloc-aws-list
|
||||||
livecheck_period: 180
|
livecheck_period: 180
|
||||||
reuse_opportunity_time: 180
|
reuse_opportunity_time: 180
|
||||||
reuse_max_count: 8
|
reuse_max_count: 8
|
||||||
|
@ -54,6 +55,7 @@ aws_aarch64_{% if spot %}spot{% else %}normal{% endif %}_{% if devel %}dev{% els
|
||||||
cmd_delete: "/var/lib/resallocserver/resalloc_provision/vm-delete"
|
cmd_delete: "/var/lib/resallocserver/resalloc_provision/vm-delete"
|
||||||
cmd_livecheck: "resalloc-check-vm-ip"
|
cmd_livecheck: "resalloc-check-vm-ip"
|
||||||
cmd_release: "/var/lib/resallocserver/resalloc_provision/vm-release"
|
cmd_release: "/var/lib/resallocserver/resalloc_provision/vm-release"
|
||||||
|
cmd_list: resalloc-aws-list
|
||||||
livecheck_period: 180
|
livecheck_period: 180
|
||||||
reuse_opportunity_time: 180
|
reuse_opportunity_time: 180
|
||||||
reuse_max_count: 8
|
reuse_max_count: 8
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue