copr: use resalloc-ibm-cloud package

can be found on https://github.com/fedora-copr/resalloc-ibm-cloud
This commit is contained in:
Jiri Kyjovsky 2023-08-22 13:48:21 +02:00 committed by praiskup
parent 87ca231739
commit 67f9873ee0
6 changed files with 65 additions and 522 deletions

View file

@ -13,6 +13,7 @@
- resalloc-openstack
- resalloc-aws
- resalloc-webui
- resalloc-ibm-cloud
- virt-install
- name: See if postgreSQL is initialized

View file

@ -1,43 +1,16 @@
#! /usr/bin/python3
#! /bin/bash
"""
List all IBM Cloud instances that are in Deleting state
"""
case $RESALLOC_NAME in
*tokyo*)
zone=jp-tok
;;
*)
echo "Can't decide what location to assign from: $RESALLOC_NAME"
exit 1
;;
esac
import argparse
import datetime
import os
import subprocess
import sys
from ibm_vpc import VpcV1
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
DEFAULT_TOKEN_FILE = "{{ ibmcloud_token_file }}"
SERVICE_URL = "https://jp-tok.iaas.cloud.ibm.com/v1"
def _get_arg_parser():
parser = argparse.ArgumentParser()
parser.add_argument("--token-file", default=DEFAULT_TOKEN_FILE)
parser.add_argument("--pool")
return parser
def _main():
opts = _get_arg_parser().parse_args()
cmd = f"source {opts.token_file} ; echo $IBMCLOUD_API_KEY"
output = subprocess.check_output(cmd, shell=True)
token = output.decode("utf-8").strip().rsplit("\n", maxsplit=1)[-1]
authenticator = IAMAuthenticator(token)
now = datetime.datetime.now()
service = VpcV1(now.strftime('%Y-%m-%d'), authenticator=authenticator)
service.set_service_url(SERVICE_URL)
resp = service.list_instances()
for server in resp.result["instances"]:
# Resalloc works with underscores, which is not allowed in IBM Cloud
if server["status"] == "deleting":
print("{} {}".format(server["id"], server["name"]))
if __name__ == "__main__":
sys.exit(_main())
exec resalloc-ibm-cloud-list-deleting-vms \
--token-file "{{ ibmcloud_token_file }}" \
--zone "$zone" \
"$@"

View file

@ -1,74 +1,16 @@
#! /usr/bin/python3
#! /bin/bash
import argparse
import datetime
import logging
import os
import subprocess
import sys
case $RESALLOC_NAME in
*tokyo*)
zone=jp-tok
;;
*)
echo "Can't decide what location to assign from: $RESALLOC_NAME"
exit 1
;;
esac
from ibm_vpc import VpcV1
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
DEFAULT_TOKEN_FILE = "{{ ibmcloud_token_file }}"
SERVICE_URL = "https://jp-tok.iaas.cloud.ibm.com/v1"
# Using the highest value possible
LIMIT = 100
def _get_arg_parser():
parser = argparse.ArgumentParser()
parser.add_argument("--token-file", default=DEFAULT_TOKEN_FILE)
parser.add_argument("--pool")
parser.add_argument("--log-level", default="info")
return parser
def _main():
opts = _get_arg_parser().parse_args()
log_level = getattr(logging, opts.log_level.upper())
logging.basicConfig(format='%(levelname)s: %(message)s', level=log_level)
log = logging.getLogger()
pool_id = opts.pool or os.getenv("RESALLOC_POOL_ID")
if not pool_id:
sys.stderr.write("Specify pool ID by --pool or $RESALLOC_POOL_ID\n")
sys.exit(1)
cmd = f"source {opts.token_file} ; echo $IBMCLOUD_API_KEY"
output = subprocess.check_output(cmd, shell=True)
token = output.decode("utf-8").strip().rsplit("\n", maxsplit=1)[-1]
authenticator = IAMAuthenticator(token)
now = datetime.datetime.now()
service = VpcV1(now.strftime('%Y-%m-%d'), authenticator=authenticator)
service.set_service_url(SERVICE_URL)
# Gather the list of all resources here
resources = set()
instances = service.list_instances(limit=LIMIT).result["instances"]
for server in instances:
# Resalloc works with underscores, which is not allowed in IBM Cloud
name = server["name"].replace("-", "_")
if name.startswith(pool_id):
log.debug("found instance %s in state %s, id=%s", name, server["status"], server["id"])
resources.add(name)
volumes = service.list_volumes(limit=LIMIT).result["volumes"]
for volume in volumes:
# Resalloc works with underscores, which is not allowed in IBM Cloud
name = volume["name"].replace("-", "_")
if name.startswith(pool_id):
log.debug("found volume %s in state %s, id: %s", name, volume["status"], volume["id"])
name = name.rsplit("_", 1)[0]
resources.add(name)
# Print them out, so upper level tooling can work with the list
for name in resources:
# The only stdout output comes here!
print(name)
if __name__ == "__main__":
sys.exit(_main())
exec resalloc-ibm-cloud-list-vms \
--token-file "{{ ibmcloud_token_file }}" \
--zone "$zone" \
"$@"

441
roles/copr/backend/templates/resalloc/ibm-cloud-vm.j2 Executable file → Normal file
View file

@ -1,412 +1,39 @@
#! /usr/bin/python3
#! /bin/bash
"""
Start a new VM in IBM Cloud under the copr-team account.
"""
case $RESALLOC_NAME in
*tokyo*)
vpc_id=r022-8438169e-d881-4bda-b603-d31fdf0f8b3a
security_group_id=r022-bf49b90e-c00f-4c68-8707-2936b47b286b
ssh_key_id=r022-3918e368-8e00-4e23-9119-5e3ce1eb33bd
instance_type=cz2-2x4
subnets_ids="jp-tok-1:02e7-84755ffa-97bb-4067-b618-24c788c901cb jp-tok-2:02f7-98674f68-aae1-4ea1-a889-5a0b7a07f4b8 jp-tok-3:02g7-6d92562d-b868-411f-a962-99271d609ba6"
zone=jp-tok
;;
*)
echo "Can't decide what location to assign from: $RESALLOC_NAME"
exit 1
;;
esac
import argparse
import datetime
import logging
import os
import random
import shlex
import subprocess
import sys
params=()
import requests
from ibm_vpc import VpcV1
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
DEFAULT_PLAYBOOK = "{{ provision_directory }}/libvirt-provision.yml"
DEFAULT_TOKEN_FILE = "{{ ibmcloud_token_file }}"
DEFAULT_IMAGE = "{{ copr_builder_images.ibm_cloud.s390x }}"
VPC_ID = "r022-8438169e-d881-4bda-b603-d31fdf0f8b3a"
SECURITY_GROUP_ID = "r022-bf49b90e-c00f-4c68-8707-2936b47b286b"
SSH_KEY_ID = "r022-3918e368-8e00-4e23-9119-5e3ce1eb33bd"
INSTANCE_TYPE = "cz2-2x4"
SERVICE_URL = "https://jp-tok.iaas.cloud.ibm.com/v1"
ZONES = {
"jp-tok-1": {
"subnet_id": "02e7-84755ffa-97bb-4067-b618-24c788c901cb",
},
"jp-tok-2": {
"subnet_id": "02f7-98674f68-aae1-4ea1-a889-5a0b7a07f4b8",
},
"jp-tok-3": {
"subnet_id": "02g7-6d92562d-b868-411f-a962-99271d609ba6",
},
}
def resalloc_to_ibmcloud_name(name):
"""
IBM CLoud doesn't like underscores, and non-alphabetical characters at the
beginning of resource names.
"""
return name.replace("_", "-")
def bind_floating_ip(service, instance_id, opts):
"""
Assign an existing Floating IP to given instance.
"""
log = opts.log
log.info("Bind floating IP")
response_list = service.list_floating_ips().get_result()['floating_ips']
floating_ip_uuid = None
for item in response_list:
if item["name"] != opts.floating_ip_name:
continue
if item["status"] != "available":
log.error("Floating IP %s is already used", opts.floating_ip_name)
sys.exit(1)
floating_ip_uuid = item['id']
floating_ip_address = item['address']
if floating_ip_uuid is None:
log.error("UUID for Floating IP %s not found", opts.floating_ip_name)
sys.exit(1)
network_interface_id = opts.instance_created["primary_network_interface"]["id"]
log.info("Network interface ID: %s", network_interface_id)
service.add_instance_network_interface_floating_ip(
instance_id,
network_interface_id,
floating_ip_uuid,
if [ "$1" == "create" ]; then
params+=(
--playbook "{{ provision_directory }}/libvirt-provision.yml"
--image-uuid "{{ copr_builder_images.ibm_cloud.s390x }}"
--vpc-id "$vpc_id"
--security-group-id "$security_group_id"
--ssh-key-id "$ssh_key_id"
--instance-type "$instance_type"
--subnets-ids $subnets_ids
--
)
log.info("Floating IP: %s", floating_ip_address)
return floating_ip_address
fi
def allocate_and_assign_ip(service, opts):
"""
Allocate and assign a Floating IP to an existing machine in one call.
"""
url = SERVICE_URL + "/floating_ips"
headers = {
"Accept": "application/json",
"Authorization": "Bearer " + service.authenticator.token_manager.get_token(),
}
params = {
'version': "2022-01-18",
'generation': "2",
}
data = {
"name": opts.instance_name,
"target": {
"id": opts.instance_created["primary_network_interface"]["id"],
},
}
response = requests.post(url, headers=headers, json=data, params=params)
assert response.status_code == 201
opts.allocated_floating_ip_id = response.json()["id"]
return response.json()["address"]
def assign_floating_ip(service, instance_id, opts):
"""
Assign a Floating IP address (and create it, if needed).
"""
if opts.floating_ip_name:
return bind_floating_ip(service, instance_id, opts)
return allocate_and_assign_ip(service, opts)
def run_playbook(host, opts):
"""
Run ansible-playbook against the given hostname
"""
cmd = ["ansible-playbook", opts.playbook, "--inventory", "{},".format(host)]
subprocess.check_call(cmd, stdout=sys.stderr)
def create_instance(service, instance_name, opts):
"""
Start the VM, name it "instance_name"
"""
log = opts.log
instance_prototype_model = {
"keys": [{"id": SSH_KEY_ID}],
"name": instance_name,
"profile": {"name": INSTANCE_TYPE},
"vpc": {
"id": VPC_ID,
},
"boot_volume_attachment": {
"volume": {
"name": instance_name + "-root",
"profile": {
"name": "general-purpose",
},
},
"delete_volume_on_instance_delete": True,
},
"image": {"id": opts.image_uuid},
"primary_network_interface": {
'name': 'primary-network-interface',
'subnet': {
"id": ZONES[opts.zone]["subnet_id"],
},
"security_groups": [
{"id": SECURITY_GROUP_ID},
],
},
"zone": {
"name": opts.zone,
},
"volume_attachments": [{
"volume": {
"name": instance_name + "-swap",
"capacity": 168,
"profile": {"name": "general-purpose"},
},
"delete_volume_on_instance_delete": True,
}],
}
ip_address = None
instance_created = None
opts.allocated_floating_ip_id = None
try:
response = service.create_instance(instance_prototype_model)
instance_created = instance_name
opts.instance_created = response.get_result()
log.debug("Instance response: %s", response)
log.debug("Instance response[result]: %s", opts.instance_created)
instance_id = opts.instance_created["id"]
log.info("Instance ID: %s", instance_id)
ip_address = assign_floating_ip(service, instance_id, opts)
_wait_for_ssh(ip_address)
run_playbook(ip_address, opts)
# Tell the Resalloc clients how to connect to this instance.
print(ip_address)
except:
if instance_created:
log.info("Removing the failed machine")
delete_instance(service, instance_name, opts)
raise
def delete_all_ips(service, opts):
"""
Go through all reserved IPs, and remove all which are not assigned
to any VM
"""
response_list = service.list_floating_ips().get_result()['floating_ips']
for fip in response_list:
if fip["status"] != "available":
continue
service.delete_floating_ip(fip["id"])
def delete_instance(service, instance_name, opts):
"""
Repeatedly try to remove the instance, to minimize the chances for
leftovers.
"""
for _ in range(5):
try:
delete_instance_attempt(service, instance_name, opts)
break
except:
opts.log.exception("Attempt to delete instance failed")
pass
def delete_instance_attempt(service, instance_name, opts):
""" one attempt to delete instance by it's name """
log = opts.log
log.info("Searching for instance %s", instance_name)
delete_instance_id = None
response_list = service.list_instances().get_result()['instances']
for item in response_list:
if instance_name == item['name']:
log.debug("Found instance: %s %s %s", item['id'], item['name'], item['status'])
delete_instance_id = item['id']
floating_ip_id = None
response_list = service.list_floating_ips().get_result()['floating_ips']
for floating_ip in response_list:
if floating_ip["name"].startswith(instance_name):
log.debug("Found floating IP %s", floating_ip["name"])
floating_ip_id = floating_ip["id"]
if delete_instance_id:
log.info("Deleting instance %s: %s", delete_instance_id, instance_name)
resp = service.delete_instance(delete_instance_id)
assert resp.status_code == 204
log.debug("Delete instance request delivered")
if floating_ip_id:
log.info("Deleting Floating IP %s", floating_ip_id)
resp = service.delete_floating_ip(floating_ip_id)
assert resp.status_code == 204
log.debug("Delete IP request delivered")
# The volumes should be always automatically deleted together with the
# instance that they are attached to. So normally, it would be a wasting of
# efforts to try to remove volumes explicitly here (we just requested
# an instance removal, that one is just being removed and the volumes are in
# a 'deleting' state at best). Any additional request for volume removal
# would anyway end up with an exception and an HTTP error 409.
if delete_instance_id:
return
# In cloud-buggy situations, the volume stays around even when instance ID
# is correctly deleted. Such an orphaned volume is later detected by
# Resalloc's `cmd_list` option (it calls the script ibm-cloud-list-vms to
# detect) and then `ibm-cloud-vm delete` command is called periodically by
# Resalloc, even for a non-existing instance (=> delete_instance_id==NULL).
# In such case is very correct, and we try our best, to explicitly remove
# the volume.
volume_ids = []
volumes = service.list_volumes(limit=100).result["volumes"]
for volume in volumes:
if not volume["name"].startswith(instance_name):
continue
log.debug("Found volume %s %s %s", volume["name"], volume["status"],
volume["id"])
volume_ids.append(volume["id"])
if volume_ids:
# hack: we raise the last caught exception only
raised_exception = None
for volume_id in volume_ids:
log.info("Explicitly deleting volume %s", volume_id)
try:
resp = service.delete_volume(volume_id)
except Exception as exc:
log.error("Exception raised while deleting volume %s",
volume_id)
raised_exception = exc
continue # Trying the rest of volumes
if resp.status_code != 204:
log.error("Can't delete volume %s, response status: %s",
volume_id, resp.status_code)
if raised_exception is not None:
# The exception will cause a re-try (re-run of
# delete_instance_attempt)).
raise raised_exception
def _get_arg_parser():
parser = argparse.ArgumentParser()
parser.add_argument("--token-file", default=DEFAULT_TOKEN_FILE)
parser.add_argument("--log-level", default="info")
subparsers = parser.add_subparsers(dest='subparser')
subparsers.required = True
parser_create = subparsers.add_parser(
"create", help="Create an instance in IBM Cloud")
parser_create.add_argument("name")
parser_create.add_argument("--playbook", default=DEFAULT_PLAYBOOK)
parser_create.add_argument("--image-uuid", default=DEFAULT_IMAGE)
parser_create.add_argument("--floating-ip-name", default=None)
zones_default=",".join(ZONES.keys())
parser_create.add_argument("--zones", help=(
"A comma separate list of zone names. Default='{default}'. "
"The script starts the VM randomly randomly in one of the "
"specified zones.".format(default=zones_default)),
default=zones_default)
parser_delete = subparsers.add_parser(
"delete", help="Delete instance by it's name from IBM Cloud")
parser_delete.add_argument("name")
ips = subparsers.add_parser("delete-free-floating-ips",
help="Clean all IPs without an assigned VM")
return parser
def _wait_for_ssh(floating_ip):
cmd = ["resalloc-aws-wait-for-ssh",
"--log", "debug",
"--timeout", "240",
floating_ip]
subprocess.check_call(cmd, stdout=sys.stderr)
def detect_floating_ip_name(opts):
"""
CURRENTLY UNUSED, the IPs are deallocated
We allocate Floating IPS in intervals for each instance.
Production:
- 000-099
- currently we allocate at most 8-16 instances
Devel
- 100-199
- currently we allocate 1 to 2 instances
Manual starting (not via resalloc) (use --floating-ip-name copr-builder-NNN)
- 200-201
Since we only allocate at most 16+2+2, we have 20 IPs pre-allocated
"forever" in the IBM Cloud API. If you increase the numbers, go to the web
UI and reserve more.
"""
# set by command line option?
if opts.floating_ip_name:
return
id_in_pool = int(os.environ.get("RESALLOC_ID_IN_POOL", -1))
if id_in_pool == -1:
opts.log.error("Please specify --floating-ip-name, or RESALLOC_ID_IN_POOL")
sys.exit(1)
if opts.instance == "devel":
id_in_pool += 100
opts.floating_ip_name = "copr-builder-{}".format(str(id_in_pool).zfill(3))
opts.log.info("Using Floating IP copr-builder-%s", opts.floating_ip_name)
def _main():
opts = _get_arg_parser().parse_args()
log_level = getattr(logging, opts.log_level.upper())
logging.basicConfig(format='%(levelname)s: %(message)s', level=log_level)
log = logging.getLogger()
opts.log = log
cmd = "source {} ; echo $IBMCLOUD_API_KEY".format(
shlex.quote(opts.token_file))
output = subprocess.check_output(cmd, shell=True)
token = output.decode("utf-8").strip().rsplit("\n", maxsplit=1)[-1]
authenticator = IAMAuthenticator(token)
now = datetime.datetime.now()
service = VpcV1(now.strftime('%Y-%m-%d'), authenticator=authenticator)
# We work with Tokyo only for now.
service.set_service_url(SERVICE_URL)
if hasattr(opts, "name"):
name = resalloc_to_ibmcloud_name(opts.name)
opts.instance_name = name
opts.instance = "production" if "-prod-" in name else "devel"
if opts.subparser == "create":
allowed_zones = [x.strip() for x in opts.zones.split(",")]
opts.zone = random.choice(allowed_zones)
# detect_floating_ip_name(opts)
create_instance(service, name, opts)
elif opts.subparser == "delete":
delete_instance(service, name, opts)
elif opts.subparser == "delete-free-floating-ips":
delete_all_ips(service, opts)
if __name__ == "__main__":
_main()
# vi: ft=python
exec resalloc-ibm-cloud-vm \
--token-file "{{ ibmcloud_token_file }}" \
--zone "$zone" \
--log-level debug \
"$1" \
"${params[@]}" \
"${@:2}"

View file

@ -186,7 +186,7 @@ copr_ibm_cloud_s390x_tokyo_{% if devel %}dev{% else %}prod{% endif %}:
{% endif %}
- arch_s390x
- arch_s390x_native
cmd_new: '/var/lib/resallocserver/resalloc_provision/ibm-cloud-vm --log-level debug create "$RESALLOC_NAME"'
cmd_new: '/var/lib/resallocserver/resalloc_provision/ibm-cloud-vm create "$RESALLOC_NAME"'
cmd_delete: "/var/lib/resallocserver/resalloc_provision/vm-delete"
cmd_livecheck: "resalloc-check-vm-ip"
cmd_release: "/var/lib/resallocserver/resalloc_provision/vm-release"

View file

@ -27,7 +27,7 @@ copr_osuosl_p09_*)
;;
*s390x*)
# Delegate the rest of the task to the IBM Cloud specific script.
exec /var/lib/resallocserver/resalloc_provision/ibm-cloud-vm --log-level debug delete "$RESALLOC_NAME"
exec /var/lib/resallocserver/resalloc_provision/ibm-cloud-vm delete "$RESALLOC_NAME"
exit 1
;;
*copr_hv_x86_64_*)