ansible/roles/copr/backend/files/cleanup-vms-aws
Pavel Raiskup d562bde01e copr: be: more careful periodic cleanup of VMs
Never remove CoprPurpose=infrastructure, only builders.
2020-04-24 21:34:26 +02:00

127 lines
2.8 KiB
Bash
Executable file

#! /bin/bash
set -e
help_output()
{
cat >&2 <<EOF
Usage: $0 [--kill-also-unused]
This script terminates all VMs started in AWS which are "probably" started by
this instance of Copr, but are no more relevant for us.
Normal mode (no --kill-also-unused) only terminates VMs which
- are in running state,
- are named properly (e.g. copr-builder-<instance>....)
- and are not in our redis database.
Such leftover VM usually occurs by manual spawning of bulider, during image
creation. But bug in VM spawner can cause this, etc. That's why we run this
mode periodically in cron job.
The --kill-also-unused mode is useful when we need to re-distribute new
mock/rpmbuild configuration to all builders. All unused builders are
terminated (so we don't kill running builds) and respawned.
EOF
}
die ()
{
echo >&2 "$*"
echo >&2
help_output
exit 1
}
test "$UID" != "0" || die "execute as copr user"
kill_unused=false
for arg; do
case $arg in
-h|--help)
help_output
exit 1
;;
--kill-also-unused)
kill_unused=true
;;
*)
die "unknown arg $arg"
;;
esac
done
something_found=false
dump_command ()
{
echo >&2 " -> $*"
"$@"
}
tracked()
{
name=$(redis-cli --scan --pattern "copr:backend:vm_instance:hset::$1")
test -n "$name"
}
used()
{
task=$(redis-cli hget "copr:backend:vm_instance:hset::$1" task_id)
test -n "$task"
}
old_enough()
{
# give them 1 hour
started=$(date --date="$1" +%s)
now=$(date +%s)
old_enough=$(( now - 3600 ))
test "$started" -le "$old_enough"
}
aws_command=(
aws ec2 describe-instances
--query "Reservations[].Instances[].{Id:InstanceId,Name:Tags[?Key=='Name']|[0].Value,Time:LaunchTime}"
--filters "Name=tag-key,Values=FedoraCopr,Name=tag-value,Values=copr"
"Name=instance-state-name,Values=running"
"Name=tag-key,Values=CoprPurpose,Name=tag-value,Values=builder"
--output text
)
something_found=false
prefix=dev
case $(hostname) in
copr-be.*)
prefix=prod
;;
esac
while read -r aws_id vm_name launch_time; do
case $vm_name in
copr-$prefix-builder*)
something_found=true
if tracked "$vm_name"; then
# skip known VMs
! $kill_unused && continue
used "$vm_name" && continue
fi
# skip recently started VMs
if ! $kill_unused && ! old_enough "$launch_time"; then
echo >&2 "$vm_name is not yet old enough: $launch_time"
continue
fi
# delete the rest
dump_command aws ec2 terminate-instances --instance-ids "$aws_id"
;;
*)
continue ;;
esac
done < <( "${aws_command[@]}" )
# fail if no VM was found (weird situation)
$something_found