diff --git a/roles/copr/backend/tasks/resalloc.yml b/roles/copr/backend/tasks/resalloc.yml index 5cea9fea52..91e848be51 100644 --- a/roles/copr/backend/tasks/resalloc.yml +++ b/roles/copr/backend/tasks/resalloc.yml @@ -13,6 +13,7 @@ - resalloc-openstack - resalloc-aws - resalloc-webui + - resalloc-ibm-cloud - virt-install - name: See if postgreSQL is initialized diff --git a/roles/copr/backend/templates/resalloc/ibm-cloud-list-deleting-vms.j2 b/roles/copr/backend/templates/resalloc/ibm-cloud-list-deleting-vms.j2 index cf3fa11219..22d3bf45bb 100644 --- a/roles/copr/backend/templates/resalloc/ibm-cloud-list-deleting-vms.j2 +++ b/roles/copr/backend/templates/resalloc/ibm-cloud-list-deleting-vms.j2 @@ -1,43 +1,16 @@ -#! /usr/bin/python3 +#! /bin/bash -""" -List all IBM Cloud instances that are in Deleting state -""" +case $RESALLOC_NAME in + *tokyo*) + zone=jp-tok + ;; + *) + echo "Can't decide what location to assign from: $RESALLOC_NAME" + exit 1 + ;; +esac -import argparse -import datetime -import os -import subprocess -import sys - -from ibm_vpc import VpcV1 -from ibm_cloud_sdk_core.authenticators import IAMAuthenticator - -DEFAULT_TOKEN_FILE = "{{ ibmcloud_token_file }}" -SERVICE_URL = "https://jp-tok.iaas.cloud.ibm.com/v1" - - -def _get_arg_parser(): - parser = argparse.ArgumentParser() - parser.add_argument("--token-file", default=DEFAULT_TOKEN_FILE) - parser.add_argument("--pool") - return parser - - -def _main(): - opts = _get_arg_parser().parse_args() - cmd = f"source {opts.token_file} ; echo $IBMCLOUD_API_KEY" - output = subprocess.check_output(cmd, shell=True) - token = output.decode("utf-8").strip().rsplit("\n", maxsplit=1)[-1] - authenticator = IAMAuthenticator(token) - now = datetime.datetime.now() - service = VpcV1(now.strftime('%Y-%m-%d'), authenticator=authenticator) - service.set_service_url(SERVICE_URL) - resp = service.list_instances() - for server in resp.result["instances"]: - # Resalloc works with underscores, which is not allowed in IBM Cloud - if server["status"] == "deleting": - print("{} {}".format(server["id"], server["name"])) - -if __name__ == "__main__": - sys.exit(_main()) +exec resalloc-ibm-cloud-list-deleting-vms \ + --token-file "{{ ibmcloud_token_file }}" \ + --zone "$zone" \ + "$@" diff --git a/roles/copr/backend/templates/resalloc/ibm-cloud-list-vms.j2 b/roles/copr/backend/templates/resalloc/ibm-cloud-list-vms.j2 old mode 100755 new mode 100644 index fe306f47c6..92a159377a --- a/roles/copr/backend/templates/resalloc/ibm-cloud-list-vms.j2 +++ b/roles/copr/backend/templates/resalloc/ibm-cloud-list-vms.j2 @@ -1,74 +1,16 @@ -#! /usr/bin/python3 +#! /bin/bash -import argparse -import datetime -import logging -import os -import subprocess -import sys +case $RESALLOC_NAME in + *tokyo*) + zone=jp-tok + ;; + *) + echo "Can't decide what location to assign from: $RESALLOC_NAME" + exit 1 + ;; +esac -from ibm_vpc import VpcV1 -from ibm_cloud_sdk_core.authenticators import IAMAuthenticator - -DEFAULT_TOKEN_FILE = "{{ ibmcloud_token_file }}" -SERVICE_URL = "https://jp-tok.iaas.cloud.ibm.com/v1" - -# Using the highest value possible -LIMIT = 100 - - -def _get_arg_parser(): - parser = argparse.ArgumentParser() - parser.add_argument("--token-file", default=DEFAULT_TOKEN_FILE) - parser.add_argument("--pool") - parser.add_argument("--log-level", default="info") - return parser - - -def _main(): - opts = _get_arg_parser().parse_args() - log_level = getattr(logging, opts.log_level.upper()) - logging.basicConfig(format='%(levelname)s: %(message)s', level=log_level) - log = logging.getLogger() - - pool_id = opts.pool or os.getenv("RESALLOC_POOL_ID") - if not pool_id: - sys.stderr.write("Specify pool ID by --pool or $RESALLOC_POOL_ID\n") - sys.exit(1) - - cmd = f"source {opts.token_file} ; echo $IBMCLOUD_API_KEY" - output = subprocess.check_output(cmd, shell=True) - token = output.decode("utf-8").strip().rsplit("\n", maxsplit=1)[-1] - authenticator = IAMAuthenticator(token) - now = datetime.datetime.now() - service = VpcV1(now.strftime('%Y-%m-%d'), authenticator=authenticator) - service.set_service_url(SERVICE_URL) - - # Gather the list of all resources here - resources = set() - - instances = service.list_instances(limit=LIMIT).result["instances"] - for server in instances: - # Resalloc works with underscores, which is not allowed in IBM Cloud - name = server["name"].replace("-", "_") - if name.startswith(pool_id): - log.debug("found instance %s in state %s, id=%s", name, server["status"], server["id"]) - resources.add(name) - - volumes = service.list_volumes(limit=LIMIT).result["volumes"] - for volume in volumes: - # Resalloc works with underscores, which is not allowed in IBM Cloud - name = volume["name"].replace("-", "_") - if name.startswith(pool_id): - log.debug("found volume %s in state %s, id: %s", name, volume["status"], volume["id"]) - name = name.rsplit("_", 1)[0] - resources.add(name) - - # Print them out, so upper level tooling can work with the list - for name in resources: - # The only stdout output comes here! - print(name) - - -if __name__ == "__main__": - sys.exit(_main()) +exec resalloc-ibm-cloud-list-vms \ + --token-file "{{ ibmcloud_token_file }}" \ + --zone "$zone" \ + "$@" diff --git a/roles/copr/backend/templates/resalloc/ibm-cloud-vm.j2 b/roles/copr/backend/templates/resalloc/ibm-cloud-vm.j2 old mode 100755 new mode 100644 index c0a1012ff0..5f48f5c997 --- a/roles/copr/backend/templates/resalloc/ibm-cloud-vm.j2 +++ b/roles/copr/backend/templates/resalloc/ibm-cloud-vm.j2 @@ -1,412 +1,39 @@ -#! /usr/bin/python3 +#! /bin/bash -""" -Start a new VM in IBM Cloud under the copr-team account. -""" +case $RESALLOC_NAME in + *tokyo*) + vpc_id=r022-8438169e-d881-4bda-b603-d31fdf0f8b3a + security_group_id=r022-bf49b90e-c00f-4c68-8707-2936b47b286b + ssh_key_id=r022-3918e368-8e00-4e23-9119-5e3ce1eb33bd + instance_type=cz2-2x4 + subnets_ids="jp-tok-1:02e7-84755ffa-97bb-4067-b618-24c788c901cb jp-tok-2:02f7-98674f68-aae1-4ea1-a889-5a0b7a07f4b8 jp-tok-3:02g7-6d92562d-b868-411f-a962-99271d609ba6" + zone=jp-tok + ;; + *) + echo "Can't decide what location to assign from: $RESALLOC_NAME" + exit 1 + ;; +esac -import argparse -import datetime -import logging -import os -import random -import shlex -import subprocess -import sys +params=() -import requests - -from ibm_vpc import VpcV1 -from ibm_cloud_sdk_core.authenticators import IAMAuthenticator - -DEFAULT_PLAYBOOK = "{{ provision_directory }}/libvirt-provision.yml" -DEFAULT_TOKEN_FILE = "{{ ibmcloud_token_file }}" -DEFAULT_IMAGE = "{{ copr_builder_images.ibm_cloud.s390x }}" - -VPC_ID = "r022-8438169e-d881-4bda-b603-d31fdf0f8b3a" -SECURITY_GROUP_ID = "r022-bf49b90e-c00f-4c68-8707-2936b47b286b" -SSH_KEY_ID = "r022-3918e368-8e00-4e23-9119-5e3ce1eb33bd" -INSTANCE_TYPE = "cz2-2x4" -SERVICE_URL = "https://jp-tok.iaas.cloud.ibm.com/v1" - -ZONES = { - "jp-tok-1": { - "subnet_id": "02e7-84755ffa-97bb-4067-b618-24c788c901cb", - }, - "jp-tok-2": { - "subnet_id": "02f7-98674f68-aae1-4ea1-a889-5a0b7a07f4b8", - }, - "jp-tok-3": { - "subnet_id": "02g7-6d92562d-b868-411f-a962-99271d609ba6", - }, -} - - -def resalloc_to_ibmcloud_name(name): - """ - IBM CLoud doesn't like underscores, and non-alphabetical characters at the - beginning of resource names. - """ - return name.replace("_", "-") - - -def bind_floating_ip(service, instance_id, opts): - """ - Assign an existing Floating IP to given instance. - """ - - log = opts.log - log.info("Bind floating IP") - response_list = service.list_floating_ips().get_result()['floating_ips'] - floating_ip_uuid = None - for item in response_list: - if item["name"] != opts.floating_ip_name: - continue - if item["status"] != "available": - log.error("Floating IP %s is already used", opts.floating_ip_name) - sys.exit(1) - floating_ip_uuid = item['id'] - floating_ip_address = item['address'] - - if floating_ip_uuid is None: - log.error("UUID for Floating IP %s not found", opts.floating_ip_name) - sys.exit(1) - - network_interface_id = opts.instance_created["primary_network_interface"]["id"] - log.info("Network interface ID: %s", network_interface_id) - service.add_instance_network_interface_floating_ip( - instance_id, - network_interface_id, - floating_ip_uuid, +if [ "$1" == "create" ]; then + params+=( + --playbook "{{ provision_directory }}/libvirt-provision.yml" + --image-uuid "{{ copr_builder_images.ibm_cloud.s390x }}" + --vpc-id "$vpc_id" + --security-group-id "$security_group_id" + --ssh-key-id "$ssh_key_id" + --instance-type "$instance_type" + --subnets-ids $subnets_ids + -- ) - log.info("Floating IP: %s", floating_ip_address) - return floating_ip_address +fi - -def allocate_and_assign_ip(service, opts): - """ - Allocate and assign a Floating IP to an existing machine in one call. - """ - - url = SERVICE_URL + "/floating_ips" - headers = { - "Accept": "application/json", - "Authorization": "Bearer " + service.authenticator.token_manager.get_token(), - } - params = { - 'version': "2022-01-18", - 'generation': "2", - } - data = { - "name": opts.instance_name, - "target": { - "id": opts.instance_created["primary_network_interface"]["id"], - }, - } - response = requests.post(url, headers=headers, json=data, params=params) - assert response.status_code == 201 - opts.allocated_floating_ip_id = response.json()["id"] - return response.json()["address"] - - -def assign_floating_ip(service, instance_id, opts): - """ - Assign a Floating IP address (and create it, if needed). - """ - - if opts.floating_ip_name: - return bind_floating_ip(service, instance_id, opts) - - return allocate_and_assign_ip(service, opts) - - -def run_playbook(host, opts): - """ - Run ansible-playbook against the given hostname - """ - cmd = ["ansible-playbook", opts.playbook, "--inventory", "{},".format(host)] - subprocess.check_call(cmd, stdout=sys.stderr) - - -def create_instance(service, instance_name, opts): - """ - Start the VM, name it "instance_name" - """ - - log = opts.log - - instance_prototype_model = { - "keys": [{"id": SSH_KEY_ID}], - "name": instance_name, - "profile": {"name": INSTANCE_TYPE}, - "vpc": { - "id": VPC_ID, - }, - "boot_volume_attachment": { - "volume": { - "name": instance_name + "-root", - "profile": { - "name": "general-purpose", - }, - }, - "delete_volume_on_instance_delete": True, - }, - "image": {"id": opts.image_uuid}, - "primary_network_interface": { - 'name': 'primary-network-interface', - 'subnet': { - "id": ZONES[opts.zone]["subnet_id"], - }, - "security_groups": [ - {"id": SECURITY_GROUP_ID}, - ], - }, - "zone": { - "name": opts.zone, - }, - "volume_attachments": [{ - "volume": { - "name": instance_name + "-swap", - "capacity": 168, - "profile": {"name": "general-purpose"}, - }, - "delete_volume_on_instance_delete": True, - }], - } - - ip_address = None - instance_created = None - opts.allocated_floating_ip_id = None - try: - response = service.create_instance(instance_prototype_model) - instance_created = instance_name - opts.instance_created = response.get_result() - log.debug("Instance response: %s", response) - log.debug("Instance response[result]: %s", opts.instance_created) - instance_id = opts.instance_created["id"] - log.info("Instance ID: %s", instance_id) - ip_address = assign_floating_ip(service, instance_id, opts) - _wait_for_ssh(ip_address) - run_playbook(ip_address, opts) - # Tell the Resalloc clients how to connect to this instance. - print(ip_address) - except: - if instance_created: - log.info("Removing the failed machine") - delete_instance(service, instance_name, opts) - raise - - -def delete_all_ips(service, opts): - """ - Go through all reserved IPs, and remove all which are not assigned - to any VM - """ - response_list = service.list_floating_ips().get_result()['floating_ips'] - for fip in response_list: - if fip["status"] != "available": - continue - service.delete_floating_ip(fip["id"]) - - -def delete_instance(service, instance_name, opts): - """ - Repeatedly try to remove the instance, to minimize the chances for - leftovers. - """ - for _ in range(5): - try: - delete_instance_attempt(service, instance_name, opts) - break - except: - opts.log.exception("Attempt to delete instance failed") - pass - - -def delete_instance_attempt(service, instance_name, opts): - """ one attempt to delete instance by it's name """ - log = opts.log - log.info("Searching for instance %s", instance_name) - - delete_instance_id = None - response_list = service.list_instances().get_result()['instances'] - for item in response_list: - if instance_name == item['name']: - log.debug("Found instance: %s %s %s", item['id'], item['name'], item['status']) - delete_instance_id = item['id'] - - floating_ip_id = None - response_list = service.list_floating_ips().get_result()['floating_ips'] - for floating_ip in response_list: - if floating_ip["name"].startswith(instance_name): - log.debug("Found floating IP %s", floating_ip["name"]) - floating_ip_id = floating_ip["id"] - - if delete_instance_id: - log.info("Deleting instance %s: %s", delete_instance_id, instance_name) - resp = service.delete_instance(delete_instance_id) - assert resp.status_code == 204 - log.debug("Delete instance request delivered") - - if floating_ip_id: - log.info("Deleting Floating IP %s", floating_ip_id) - resp = service.delete_floating_ip(floating_ip_id) - assert resp.status_code == 204 - log.debug("Delete IP request delivered") - - # The volumes should be always automatically deleted together with the - # instance that they are attached to. So normally, it would be a wasting of - # efforts to try to remove volumes explicitly here (we just requested - # an instance removal, that one is just being removed and the volumes are in - # a 'deleting' state at best). Any additional request for volume removal - # would anyway end up with an exception and an HTTP error 409. - if delete_instance_id: - return - - # In cloud-buggy situations, the volume stays around even when instance ID - # is correctly deleted. Such an orphaned volume is later detected by - # Resalloc's `cmd_list` option (it calls the script ibm-cloud-list-vms to - # detect) and then `ibm-cloud-vm delete` command is called periodically by - # Resalloc, even for a non-existing instance (=> delete_instance_id==NULL). - # In such case is very correct, and we try our best, to explicitly remove - # the volume. - - volume_ids = [] - volumes = service.list_volumes(limit=100).result["volumes"] - for volume in volumes: - if not volume["name"].startswith(instance_name): - continue - - log.debug("Found volume %s %s %s", volume["name"], volume["status"], - volume["id"]) - volume_ids.append(volume["id"]) - - if volume_ids: - # hack: we raise the last caught exception only - raised_exception = None - - for volume_id in volume_ids: - log.info("Explicitly deleting volume %s", volume_id) - try: - resp = service.delete_volume(volume_id) - except Exception as exc: - log.error("Exception raised while deleting volume %s", - volume_id) - raised_exception = exc - continue # Trying the rest of volumes - - if resp.status_code != 204: - log.error("Can't delete volume %s, response status: %s", - volume_id, resp.status_code) - - if raised_exception is not None: - # The exception will cause a re-try (re-run of - # delete_instance_attempt)). - raise raised_exception - - -def _get_arg_parser(): - parser = argparse.ArgumentParser() - parser.add_argument("--token-file", default=DEFAULT_TOKEN_FILE) - parser.add_argument("--log-level", default="info") - subparsers = parser.add_subparsers(dest='subparser') - subparsers.required = True - parser_create = subparsers.add_parser( - "create", help="Create an instance in IBM Cloud") - parser_create.add_argument("name") - parser_create.add_argument("--playbook", default=DEFAULT_PLAYBOOK) - parser_create.add_argument("--image-uuid", default=DEFAULT_IMAGE) - parser_create.add_argument("--floating-ip-name", default=None) - zones_default=",".join(ZONES.keys()) - parser_create.add_argument("--zones", help=( - "A comma separate list of zone names. Default='{default}'. " - "The script starts the VM randomly randomly in one of the " - "specified zones.".format(default=zones_default)), - default=zones_default) - parser_delete = subparsers.add_parser( - "delete", help="Delete instance by it's name from IBM Cloud") - parser_delete.add_argument("name") - ips = subparsers.add_parser("delete-free-floating-ips", - help="Clean all IPs without an assigned VM") - return parser - - -def _wait_for_ssh(floating_ip): - cmd = ["resalloc-aws-wait-for-ssh", - "--log", "debug", - "--timeout", "240", - floating_ip] - subprocess.check_call(cmd, stdout=sys.stderr) - - -def detect_floating_ip_name(opts): - """ - CURRENTLY UNUSED, the IPs are deallocated - - We allocate Floating IPS in intervals for each instance. - Production: - - 000-099 - - currently we allocate at most 8-16 instances - Devel - - 100-199 - - currently we allocate 1 to 2 instances - Manual starting (not via resalloc) (use --floating-ip-name copr-builder-NNN) - - 200-201 - Since we only allocate at most 16+2+2, we have 20 IPs pre-allocated - "forever" in the IBM Cloud API. If you increase the numbers, go to the web - UI and reserve more. - """ - - # set by command line option? - if opts.floating_ip_name: - return - - id_in_pool = int(os.environ.get("RESALLOC_ID_IN_POOL", -1)) - if id_in_pool == -1: - opts.log.error("Please specify --floating-ip-name, or RESALLOC_ID_IN_POOL") - sys.exit(1) - - if opts.instance == "devel": - id_in_pool += 100 - - opts.floating_ip_name = "copr-builder-{}".format(str(id_in_pool).zfill(3)) - opts.log.info("Using Floating IP copr-builder-%s", opts.floating_ip_name) - - -def _main(): - opts = _get_arg_parser().parse_args() - log_level = getattr(logging, opts.log_level.upper()) - logging.basicConfig(format='%(levelname)s: %(message)s', level=log_level) - log = logging.getLogger() - opts.log = log - - cmd = "source {} ; echo $IBMCLOUD_API_KEY".format( - shlex.quote(opts.token_file)) - output = subprocess.check_output(cmd, shell=True) - token = output.decode("utf-8").strip().rsplit("\n", maxsplit=1)[-1] - authenticator = IAMAuthenticator(token) - - now = datetime.datetime.now() - service = VpcV1(now.strftime('%Y-%m-%d'), authenticator=authenticator) - - # We work with Tokyo only for now. - service.set_service_url(SERVICE_URL) - - if hasattr(opts, "name"): - name = resalloc_to_ibmcloud_name(opts.name) - opts.instance_name = name - opts.instance = "production" if "-prod-" in name else "devel" - - if opts.subparser == "create": - allowed_zones = [x.strip() for x in opts.zones.split(",")] - opts.zone = random.choice(allowed_zones) - # detect_floating_ip_name(opts) - create_instance(service, name, opts) - elif opts.subparser == "delete": - delete_instance(service, name, opts) - elif opts.subparser == "delete-free-floating-ips": - delete_all_ips(service, opts) - -if __name__ == "__main__": - _main() - -# vi: ft=python +exec resalloc-ibm-cloud-vm \ + --token-file "{{ ibmcloud_token_file }}" \ + --zone "$zone" \ + --log-level debug \ + "$1" \ + "${params[@]}" \ + "${@:2}" diff --git a/roles/copr/backend/templates/resalloc/pools.yaml b/roles/copr/backend/templates/resalloc/pools.yaml index 9cf5ad70f4..939ddbdf14 100644 --- a/roles/copr/backend/templates/resalloc/pools.yaml +++ b/roles/copr/backend/templates/resalloc/pools.yaml @@ -186,7 +186,7 @@ copr_ibm_cloud_s390x_tokyo_{% if devel %}dev{% else %}prod{% endif %}: {% endif %} - arch_s390x - arch_s390x_native - cmd_new: '/var/lib/resallocserver/resalloc_provision/ibm-cloud-vm --log-level debug create "$RESALLOC_NAME"' + cmd_new: '/var/lib/resallocserver/resalloc_provision/ibm-cloud-vm create "$RESALLOC_NAME"' cmd_delete: "/var/lib/resallocserver/resalloc_provision/vm-delete" cmd_livecheck: "resalloc-check-vm-ip" cmd_release: "/var/lib/resallocserver/resalloc_provision/vm-release" diff --git a/roles/copr/backend/templates/resalloc/vm-delete.j2 b/roles/copr/backend/templates/resalloc/vm-delete.j2 index 90438198af..e725d8d7df 100755 --- a/roles/copr/backend/templates/resalloc/vm-delete.j2 +++ b/roles/copr/backend/templates/resalloc/vm-delete.j2 @@ -27,7 +27,7 @@ copr_osuosl_p09_*) ;; *s390x*) # Delegate the rest of the task to the IBM Cloud specific script. - exec /var/lib/resallocserver/resalloc_provision/ibm-cloud-vm --log-level debug delete "$RESALLOC_NAME" + exec /var/lib/resallocserver/resalloc_provision/ibm-cloud-vm delete "$RESALLOC_NAME" exit 1 ;; *copr_hv_x86_64_*)