copr-be: allow starting VMs on HVs

This commit is contained in:
Pavel Raiskup 2021-05-11 20:56:39 +02:00
parent e1cc610b7b
commit d0d3b8badc
3 changed files with 410 additions and 0 deletions

View file

@ -16,6 +16,8 @@ copr_builder_images:
x86_64: copr-builder-x86_64-f32-20200914_072608
ppc64le: copr-builder-ppc64le-f31-20200117_132023
aarch64: copr-builder-aarch64-f32-20200914_073754
hypervisor:
x86_64: copr-builder-20210511_184529
aws:
x86_64: ami-05655b44ed8d4f869 # copr-builder-x86_64-f33-20210119_150254
aarch64: ami-0e26990bd41c19eba # copr-builder-aarch64-f33-20210119_145252

View file

@ -30,6 +30,7 @@
mode: 0755
with_items:
- upload-qcow2-images
- libvirt-new
tags:
- provision_config
when:

View file

@ -0,0 +1,407 @@
#! /usr/bin/python3
"""
Spawn a Copr Builder using libvirt
"""
# pylint: disable=invalid-name
import os
import sys
import logging
import subprocess
import tempfile
import shutil
import time
import pipes
import argparse
DEFAULT_POOL = 'images'
VOLUMES = {
'x86_64': '{{ copr_builder_images.hypervisor.x86_64 }}',
}
def get_hv_id_from_pool_id(pool_id):
""" Get unique ID of the hypervisor """
pfx = "copr_hv_x86_64_"
if pool_id.startswith(pfx):
pool_id = pool_id.replace(pfx, "")
pool_id = pool_id.replace("_dev", "")
pool_id = pool_id.replace("_prod", "")
pool_id = pool_id.replace("_stg", "")
return pool_id
raise Exception("can't convert pool_id to hv ID")
class LibvirtSpawner:
"""
Context for all the logic (to avoid working with globals).
"""
# pylint: disable=too-many-instance-attributes
workdir = None
connection = None
vm_name = None
root_disk_pool = "images"
root_vol_size = "6GB"
startup_script = ""
arch = None
swap_vol_size = None
cpu_count = 2
boot_options = []
ipv6 = None
playbook = "/home/copr/provision/_provision_aarch64.yml"
def __init__(self, resalloc_pool_id, log):
hv_id = get_hv_id_from_pool_id(resalloc_pool_id)
self.connection = (
f"qemu+ssh://copr@vmhost-x86-copr{hv_id}"
".rdu-cc.fedoraproject.org/system")
self.arch = "x86_64"
self.workdir = tempfile.mkdtemp()
self.script_path = os.path.dirname(os.path.realpath(__file__))
self.log = log
self.cleanup_actions = {}
def call(self, cmd, *args, **kwargs):
"""
Run CMD, and log info.
"""
self.log.debug("cmd: %s", ' '.join([pipes.quote(str(x)) for x in cmd]))
start = time.time()
status = subprocess.call(cmd, *args, **kwargs)
self.log.debug(" -> exit_status=%s, time=%ss",
status, round(time.time() - start, 3))
return status
def virsh_silent(self, args):
"""
Call virsh without polluting stdout.
"""
return self.call(['virsh', '-c', self.connection] + args, stdout=sys.stderr)
def wait_for_ssh(self, host):
"""
Knowing the IP address of recently started VM, wait for the SSH server
responding on that IP.
"""
script = "/usr/bin/wait-for-ssh"
if self.call([script, f"root@{host}"]):
raise Exception("waiting not successful")
def execute_spinup_playbook(self, host, playbook):
""" Run given playbook agains the given host """
cmd = ['timeout', '600', 'ansible-playbook', playbook, '-i', host + ","]
if self.call(cmd, stdout=sys.stderr):
raise Exception("can't spinup")
def cleanup(self, success):
"""
Perform cleanups (e.g. upon failure)
"""
self.log.debug("Cleaning up ...")
for action in sorted(self.cleanup_actions):
self.log.debug("cleanup {0}".format(action))
command = self.cleanup_actions[action]
counter = 0
while True:
counter += 1
always = command[0]
method = command[1]
args = command[2:]
if success and not always:
self.log.info("Cleanup action %s skipped", action)
break
status = method(args)
if status == 0:
break
if counter >= 3:
# give up :-(
self.log.error("Giving up the cleanup action '%s'", action)
break
sleeptime = 15
self.log.debug("sleeping %ss before retry", sleeptime)
time.sleep(sleeptime)
shutil.rmtree(self.workdir)
def cleanup_action(self, name, function, args, always=False):
"""
Schedule a cleanup actin; when always is False, and the script
succeeds, the action isn't executed. When always is True, the cleanup
action is executed no matter the script result.
"""
self.cleanup_actions[name] = [always, function] + args
def alloc_disk(self, name, size, pool=DEFAULT_POOL):
"""
Allocated disk of SIZE size in POOL
"""
if isinstance(size, int):
size = "{0}G".format(size)
if self.virsh_silent(['vol-create-as', pool, name, str(size)]) != 0:
raise Exception("can't create '{0}' disk".format(name))
self.cleanup_action(
'80_delete_disk_{0}'.format(name),
self.virsh_silent,
['vol-delete', name, '--pool', pool],
)
def append_startup_script(self, content):
""" Add shell script contents to pre-network-script.sh """
self.startup_script += "\n" + content + "\n"
def unused1(self, ip):
""" setup static IPv6 address """
self.append_startup_script("\n".join([
"nmcli con add con-name '{con_name}' ifname {device} "
"type ethernet ip4 {ip}/23 gw4 38.145.49.254",
"nmcli con mod '{con_name}' ipv4.dns '8.8.8.8,1.1.1.1'",
"nmcli con up '{con_name}' iface {device}",
]).format(
ip=ip,
con_name="copr-static",
device='eth0',
))
def resizeroot(self, device, partition):
""" Resize root partition after start """
dev = "/dev/{}".format(device)
part = "/dev/{}{}".format(device, partition)
self.append_startup_script("\n".join([
f"growpart {dev} {partition}",
f"resize2fs {part}",
"mount -o remount /",
]))
def generate_config_iso(self):
"""
Generate the ISO file that is attached to the VM and used by cloud-init
to pre-configure the box.
"""
if not self.startup_script:
return None
script = "#! /bin/bash\nset -e\n" + self.startup_script
config_dir = os.path.join(self.workdir, "config")
os.makedirs(config_dir)
pn_script = os.path.join(config_dir, "eimg-early-script.sh")
with open(pn_script, 'w') as file:
file.write(script)
image = os.path.join(self.workdir, 'config.iso')
# the 'eimg_config' label is important, we search for /dev/disk/by-label/...
if self.call(['mkisofs', '-o', image, '-V', 'eimg_config', '-r', '-J',
'--quiet', config_dir]) != 0:
raise Exception("mkisofs failed")
return image
def create_volume_from_iso(self, name, prealloc_size, iso, pool=DEFAULT_POOL):
""" Create libvirt volume from ISO file """
self.alloc_disk(name, prealloc_size, pool)
if self.virsh_silent(['vol-upload', name, iso, '--pool', pool]):
raise Exception("can not vol-upload the config disk")
def create_volume_from_volume(self, name, volume, pool=DEFAULT_POOL, size=None):
"""
Clone VOLUME as a NAME, and increase size to SIZE.
"""
if self.virsh_silent(['vol-clone', volume, name, '--pool', pool]):
raise Exception("vol-clone failed")
self.cleanup_action(
'80_delete_disk_{0}'.format(name),
self.virsh_silent,
['vol-delete', name, '--pool', pool],
)
if size:
if self.virsh_silent(['vol-resize', '--vol', name, '--capacity',
str(size), '--pool', pool]):
raise Exception(['cant resize ' + name])
def boot_machine(self, volumes, vcpus):
"""
Use virt-install to start the VM according to previously given
configuration.
"""
cmd = [
'virt-install',
'--connect', self.connection,
'--ram', '4096',
'--os-type', 'generic',
'--vcpus', str(vcpus),
'--vnc',
'--features', 'acpi=off',
'--noautoconsole',
'--import',
'-n', self.vm_name,
'--channel', "unix,target_type=virtio,name='org.qemu.guest_agent.0'",
'--rng', '/dev/random',
# '--boot', 'kernel_args="ds=nocloud-net"'
] + self.boot_options
for vol in volumes:
cmd += ['--disk', 'vol={0},device={1},bus={2}'.format(*vol)]
if self.call(cmd, stdout=sys.stderr):
raise Exception("can not boot the machine")
self.cleanup_action(
'50_shut_down_vm_destroy',
self.virsh_silent,
['destroy', self.vm_name],
)
self.cleanup_action(
'51_shut_down_vm_undefine',
self.virsh_silent,
['undefine', self.vm_name, '--nvram'],
)
def add_bridged_network(self, con_name, device, ipv6_addr, ipv6_gw):
"""
Add bridged networking device, visible from the outside world.
"""
self.boot_options += ['--network', 'bridge=br0,model=virtio']
self.append_startup_script("\n".join([
"echo ahoj >> /var/tmp/ahoj",
f"nmcli con add con-name '{con_name}' ifname {device} "
"type ethernet",
f"nmcli con modify '{con_name}' ipv6.address {ipv6_addr}",
f"nmcli con modify '{con_name}' ipv6.gateway {ipv6_gw}",
f"nmcli con modify '{con_name}' ipv4.method disabled",
]))
self.ipv6 = ipv6_addr.split("/")[0]
def add_nat_network(self):
""" Start the VM with NATed network device """
self.boot_options += ["--network", "network=default,model=virtio"]
def spawn(self):
"""
Spawn the machine, or raise a traceback, caller is responsible for
calling self.cleanup().
"""
pool = "images"
config_iso = self.generate_config_iso()
config_vol_name = None
if config_iso:
self.log.info("using config image %s", config_iso)
config_vol_name = self.vm_name + "_config"
self.create_volume_from_iso(config_vol_name, '1M', config_iso,
pool=pool)
root_image_volume = VOLUMES[self.arch]
vol_root = self.vm_name + '_root'
self.create_volume_from_volume(
vol_root,
root_image_volume,
pool=pool,
size=self.root_vol_size)
# swap volume
swap_volume = None
if self.swap_vol_size:
swap_volume = self.vm_name + "_swap"
self.alloc_disk(swap_volume, self.swap_vol_size, pool=pool)
volumes = []
volumes += [("{}/{}".format(pool, vol_root), 'disk', 'virtio')]
if config_vol_name:
volume = "{}/{}".format(pool, config_vol_name)
volumes += [(volume, 'cdrom', 'scsi')]
if swap_volume:
volume = "{}/{}".format(pool, swap_volume)
volumes += [(volume, "disk", "virtio")]
self.boot_machine(volumes, self.cpu_count)
self.wait_for_ssh(self.ipv6)
self.execute_spinup_playbook(self.ipv6, self.playbook)
def get_arg_parser():
""" Get the argparse object """
parser = argparse.ArgumentParser()
parser.add_argument('--swap-vol-size', metavar='GB', type=int, default=20)
parser.add_argument('--root-vol-size', metavar='GB', type=int)
parser.add_argument('--cpu-count', default=2)
parser.add_argument('--ram-size', metavar='MB', default=4096)
parser.add_argument('--name')
parser.add_argument('--resalloc-pool-id')
parser.add_argument('--resalloc-id-in-pool')
return parser
def get_fedora_ipv6_address(pool_id, id_in_pool, dev=False):
"""
Statically assign IPv6 + Gateway based on id_in_pool.
"""
gateway = "2620:52:3:1:ffff:ffff:ffff:fffe"
base = "2620:52:3:1:dead:beef:cafe:c1"
offset = 0
# 01 => 0, 02 => 1, ...
hv_id = int(get_hv_id_from_pool_id(pool_id)) - 1
# we have block of 256 ipv6 addresses for builder, and 4 hypervisors
block = int(256 / 4)
# give 48 IPs to each hv (32 prod, some dev), currently 4*48=192 ips
offset = hv_id * block
if not dev:
# give the dev only 8 addresses
offset += 8
addr_number = offset + int(id_in_pool)
addr_number = hex(addr_number).replace("0x", "")
return base + addr_number, gateway
def _main():
logging.basicConfig(level=logging.DEBUG)
log = logging.getLogger()
args = get_arg_parser().parse_args()
def _arange_default(attr, env_var):
if getattr(args, attr) is None:
setattr(args, attr, os.environ.get(env_var))
if getattr(args, attr) is None:
log.error("Either use --%s or set %s",
attr.replace("_", "-"), env_var)
sys.exit(1)
_arange_default("name", "RESALLOC_NAME")
_arange_default("resalloc_pool_id", "RESALLOC_POOL_ID")
_arange_default("resalloc_id_in_pool", "RESALLOC_ID_IN_POOL")
ip6_a, ip6_g = get_fedora_ipv6_address(args.resalloc_pool_id,
args.resalloc_id_in_pool)
spawner = LibvirtSpawner(args.resalloc_pool_id, log)
spawner.vm_name = args.name
spawner.add_nat_network()
spawner.add_bridged_network("Wired connection 2", "eth1", ip6_a, ip6_g)
success = False
try:
spawner.spawn()
sys.stdout.write("{0}\n".format(spawner.ipv6))
sys.stdout.flush()
success = True
finally:
spawner.cleanup(success)
if __name__ == "__main__":
_main()