copr-backend: delete orphaned AWS instances via resalloc-aws-list
This commit is contained in:
parent
9e2172db47
commit
422a344d1b
4 changed files with 4 additions and 240 deletions
|
@ -1,134 +0,0 @@
|
|||
#! /bin/bash
|
||||
|
||||
cat <<EOF
|
||||
This script doesn't work currently, it is obsoleted by
|
||||
the script 'cleanup-vms-aws-resalloc'. It is kept for documentation purposes or
|
||||
future reference.
|
||||
EOF
|
||||
exit 1
|
||||
|
||||
set -e
|
||||
|
||||
help_output()
|
||||
{
|
||||
cat >&2 <<EOF
|
||||
Usage: $0 [--kill-also-unused]
|
||||
|
||||
This script terminates all VMs started in AWS which are "probably" started by
|
||||
this instance of Copr, but are no more relevant for us.
|
||||
|
||||
Normal mode (no --kill-also-unused) only terminates VMs which
|
||||
- are in running state,
|
||||
- are named properly (e.g. copr-builder-<instance>....)
|
||||
- and are not in our redis database.
|
||||
Such leftover VM usually occurs by manual spawning of bulider, during image
|
||||
creation. But bug in VM spawner can cause this, etc. That's why we run this
|
||||
mode periodically in cron job.
|
||||
|
||||
The --kill-also-unused mode is useful when we need to re-distribute new
|
||||
mock/rpmbuild configuration to all builders. All unused builders are
|
||||
terminated (so we don't kill running builds) and respawned.
|
||||
EOF
|
||||
}
|
||||
|
||||
die ()
|
||||
{
|
||||
echo >&2 "$*"
|
||||
echo >&2
|
||||
help_output
|
||||
exit 1
|
||||
}
|
||||
|
||||
test "$UID" != "0" || die "execute as copr user"
|
||||
|
||||
kill_unused=false
|
||||
|
||||
for arg; do
|
||||
case $arg in
|
||||
-h|--help)
|
||||
help_output
|
||||
exit 1
|
||||
;;
|
||||
--kill-also-unused)
|
||||
kill_unused=true
|
||||
;;
|
||||
*)
|
||||
die "unknown arg $arg"
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
something_found=false
|
||||
|
||||
dump_command ()
|
||||
{
|
||||
echo >&2 " -> $*"
|
||||
"$@"
|
||||
}
|
||||
|
||||
tracked()
|
||||
{
|
||||
name=$(redis-cli --scan --pattern "copr:backend:vm_instance:hset::$1")
|
||||
test -n "$name"
|
||||
}
|
||||
|
||||
used()
|
||||
{
|
||||
task=$(redis-cli hget "copr:backend:vm_instance:hset::$1" task_id)
|
||||
test -n "$task"
|
||||
}
|
||||
|
||||
old_enough()
|
||||
{
|
||||
# give them 1 hour
|
||||
started=$(date --date="$1" +%s)
|
||||
now=$(date +%s)
|
||||
old_enough=$(( now - 3600 ))
|
||||
test "$started" -le "$old_enough"
|
||||
}
|
||||
|
||||
aws_command=(
|
||||
aws ec2 describe-instances
|
||||
--query "Reservations[].Instances[].{Id:InstanceId,Name:Tags[?Key=='Name']|[0].Value,Time:LaunchTime}"
|
||||
--filters "Name=tag:FedoraGroup,Values=copr"
|
||||
"Name=instance-state-name,Values=running"
|
||||
"Name=tag:CoprPurpose,Values=builder"
|
||||
--output text
|
||||
)
|
||||
|
||||
something_found=false
|
||||
|
||||
prefix=dev
|
||||
case $(hostname) in
|
||||
copr-be.*)
|
||||
prefix=prod
|
||||
;;
|
||||
esac
|
||||
|
||||
while read -r aws_id vm_name launch_time; do
|
||||
case $vm_name in
|
||||
aws_*_normal_${prefix}_*)
|
||||
something_found=true
|
||||
|
||||
if tracked "$vm_name"; then
|
||||
# skip known VMs
|
||||
! $kill_unused && continue
|
||||
used "$vm_name" && continue
|
||||
fi
|
||||
|
||||
# skip recently started VMs
|
||||
if ! $kill_unused && ! old_enough "$launch_time"; then
|
||||
echo >&2 "$vm_name is not yet old enough: $launch_time"
|
||||
continue
|
||||
fi
|
||||
|
||||
# delete the rest
|
||||
dump_command aws ec2 terminate-instances --instance-ids "$aws_id"
|
||||
;;
|
||||
*)
|
||||
continue ;;
|
||||
esac
|
||||
done < <( "${aws_command[@]}" )
|
||||
|
||||
# fail if no VM was found (weird situation)
|
||||
$something_found
|
|
@ -1,92 +0,0 @@
|
|||
#! /usr/bin/python3
|
||||
|
||||
"""
|
||||
Cleanup all AWS VM instances which are using 'copr-builder' key, and are not
|
||||
valid. Either they have no Name tag set (some spawning problems...) or is not
|
||||
tracked by resalloc server.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import subprocess
|
||||
import time
|
||||
|
||||
import dateutil.parser
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
LOG = logging.getLogger()
|
||||
|
||||
|
||||
def run_cmd(cmd):
|
||||
""" check_output() and decode from utf8 """
|
||||
return subprocess.check_output(cmd).decode("utf-8")
|
||||
|
||||
|
||||
def _get_instances():
|
||||
query = (
|
||||
"Reservations[].Instances[].{"
|
||||
"ID:InstanceId,"
|
||||
"Name:Tags[?Key=='Name']|[0].Value,"
|
||||
"KeyName:KeyName,"
|
||||
"CoprInstance:Tags[?Key=='CoprInstance']|[0].Value,"
|
||||
"Start:LaunchTime"
|
||||
"}"
|
||||
)
|
||||
aws_command = [
|
||||
"aws", "ec2", "describe-instances",
|
||||
"--query", query,
|
||||
"--filters",
|
||||
"Name=key-name,Values=copr-builder",
|
||||
"Name=instance-state-name,Values=running",
|
||||
#"Name=tag-key,Values=FedoraCopr,Name=tag-value,Values=copr",
|
||||
"--output", "json",
|
||||
"--region", "us-east-1",
|
||||
]
|
||||
return json.loads(run_cmd(aws_command))
|
||||
|
||||
|
||||
def _terminate_instnace(instance_id):
|
||||
cmd = ["aws", "ec2", "terminate-instances", "--instance-ids", instance_id]
|
||||
subprocess.call(cmd)
|
||||
|
||||
|
||||
def _get_tracked_instances():
|
||||
raw = run_cmd(["resalloc-maint", "resource-list"])
|
||||
return_tracked = []
|
||||
for resource in raw.strip().split("\n"):
|
||||
return_tracked.append(resource.split(' ')[2])
|
||||
return return_tracked
|
||||
|
||||
|
||||
def _detect_instance():
|
||||
hostname = run_cmd("hostname").strip()
|
||||
return "devel" if "copr-be-dev" in hostname else "production"
|
||||
|
||||
|
||||
def _main():
|
||||
tracked = _get_tracked_instances()
|
||||
copr_instance = _detect_instance()
|
||||
|
||||
for instance in _get_instances():
|
||||
started = dateutil.parser.parse(instance["Start"]).timestamp()
|
||||
if time.time() - started < 1800:
|
||||
continue
|
||||
|
||||
if not instance["Name"]:
|
||||
LOG.info("shutting down unnamed instance %s", instance["ID"])
|
||||
_terminate_instnace(instance["ID"])
|
||||
continue
|
||||
|
||||
if instance["CoprInstance"] != copr_instance:
|
||||
LOG.debug("not our instance: %s (%s)", instance["Name"], instance["CoprInstance"])
|
||||
continue
|
||||
|
||||
if instance["Name"] in tracked:
|
||||
LOG.debug("tracked %s, skipped", instance["Name"])
|
||||
continue
|
||||
|
||||
_terminate_instnace(instance["ID"])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
_main()
|
|
@ -304,12 +304,6 @@
|
|||
- redis # TODO: .service in copr-backend should depend on redis
|
||||
- "{{ copr_backend_target }}"
|
||||
|
||||
- name: install aws cleaning script
|
||||
copy: src="cleanup-vms-aws" dest=/usr/local/bin/ mode=755
|
||||
|
||||
- name: install aws cleaning script for resalloc
|
||||
copy: src="cleanup-vms-aws-resalloc" dest=/usr/local/bin/ mode=755
|
||||
|
||||
- name: access.redhat.com offline token file
|
||||
set_fact: "rhn_offline_token_file=/var/lib/resallocserver/.access.redhat.com-copr-team"
|
||||
tags:
|
||||
|
@ -343,13 +337,6 @@
|
|||
user=copr
|
||||
state=absent
|
||||
|
||||
- name: setup crontab for VMs
|
||||
cron: name="cleanup AWS VMs periodically"
|
||||
job="/usr/local/bin/cleanup-vms-aws"
|
||||
minute="0"
|
||||
user=copr
|
||||
state=absent
|
||||
|
||||
- name: setup crontab for cleaning up redis
|
||||
cron: name="prune redis VM db periodically"
|
||||
job="/usr/local/bin/cleanup-unused-vms-from-redis &>> /var/log/copr-backend/cleanup-redis-vms.log"
|
||||
|
@ -359,9 +346,10 @@
|
|||
|
||||
- name: crontab for cleaning resalloc VMs
|
||||
cron: name="cleanup nova VMs periodically"
|
||||
job="/usr/local/bin/cleanup-vms-aws-resalloc &>> /var/log/resallocserver/cron-cleanup-vms-aws.log"
|
||||
job="true /usr/local/bin/cleanup-vms-aws-resalloc &>> /var/log/resallocserver/cron-cleanup-vms-aws.log"
|
||||
minute="*/10"
|
||||
user=resalloc
|
||||
state=absent
|
||||
|
||||
- name: crontab for cleaning-up unused subscriptions
|
||||
cron: name="cleanup unused Red Hat subscribed systems"
|
||||
|
|
|
@ -30,6 +30,7 @@ aws_x86_64_{% if spot %}spot{% else %}normal{% endif %}_{% if devel %}dev{% else
|
|||
cmd_delete: "/var/lib/resallocserver/resalloc_provision/vm-delete"
|
||||
cmd_livecheck: "resalloc-check-vm-ip"
|
||||
cmd_release: "/var/lib/resallocserver/resalloc_provision/vm-release"
|
||||
cmd_list: resalloc-aws-list
|
||||
livecheck_period: 180
|
||||
reuse_opportunity_time: 180
|
||||
reuse_max_count: 8
|
||||
|
@ -54,6 +55,7 @@ aws_aarch64_{% if spot %}spot{% else %}normal{% endif %}_{% if devel %}dev{% els
|
|||
cmd_delete: "/var/lib/resallocserver/resalloc_provision/vm-delete"
|
||||
cmd_livecheck: "resalloc-check-vm-ip"
|
||||
cmd_release: "/var/lib/resallocserver/resalloc_provision/vm-release"
|
||||
cmd_list: resalloc-aws-list
|
||||
livecheck_period: 180
|
||||
reuse_opportunity_time: 180
|
||||
reuse_max_count: 8
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue