openqa/worker: prepare to handle multiple tap worker classes
I'm going to try splitting the tap jobs across multiple worker hosts. We have quite a lot of tap jobs, now, and I have seen sometimes a situation where all non-tap jobs have been run and the non-tap worker hosts are sitting idle, but the single tap worker host has a long queue of tap jobs to get through. We can't just put multiple hosts per instance into the tap class, because then we might get a case where job A from a tap group is run on one host and job B from a tap group is run on a different host, and they can't communicate. It's actually possible to set this up so it works, but it needs yet more complex networking stuff I don't want to mess with. So instead I'm just gonna split the tap job groups across two classes, 'tap' and 'tap2'. That way we can have one 'tap' worker host and one 'tap2' worker host per instance and arch, and they will each get about half the tap jobs. Unfortunately since we only have one aarch64 worker for lab it will still have to run all the jobs, but for all other cases we do have at least two workers, so we can split the load. Signed-off-by: Adam Williamson <awilliam@redhat.com>
This commit is contained in:
parent
31ff414ca8
commit
28110d34be
8 changed files with 43 additions and 9 deletions
1
inventory/group_vars/openqa_tap12_workers
Normal file
1
inventory/group_vars/openqa_tap12_workers
Normal file
|
@ -0,0 +1 @@
|
|||
openqa_tap: tap,tap2
|
1
inventory/group_vars/openqa_tap1_workers
Normal file
1
inventory/group_vars/openqa_tap1_workers
Normal file
|
@ -0,0 +1 @@
|
|||
openqa_tap: tap
|
1
inventory/group_vars/openqa_tap2_workers
Normal file
1
inventory/group_vars/openqa_tap2_workers
Normal file
|
@ -0,0 +1 @@
|
|||
openqa_tap: tap2
|
|
@ -7,4 +7,3 @@ host_group: openqa-tap-workers
|
|||
nat_rules: [
|
||||
# masquerade for openQA openvswitch workers to reach the outside
|
||||
'-A POSTROUTING -o {{ openqa_tap_iface }} -j MASQUERADE']
|
||||
openqa_tap: true
|
||||
|
|
|
@ -470,7 +470,10 @@ openqa-x86-worker05.iad2.fedoraproject.org
|
|||
openqa-x86-worker06.iad2.fedoraproject.org
|
||||
|
||||
# the workers that can run networked jobs. each server should have *one* of these per arch
|
||||
[openqa_tap_workers]
|
||||
# per tap worker class defined in os-autoinst-distri-fedora (currently there are two
|
||||
# classes; the purpose of multiple classes is to split the load of tap jobs across
|
||||
# multiple workers)
|
||||
[openqa_tap1_workers]
|
||||
# prod
|
||||
openqa-x86-worker01.iad2.fedoraproject.org
|
||||
openqa-a64-worker02.iad2.fedoraproject.org
|
||||
|
@ -478,6 +481,18 @@ openqa-a64-worker02.iad2.fedoraproject.org
|
|||
openqa-x86-worker04.iad2.fedoraproject.org
|
||||
openqa-a64-worker01.iad2.fedoraproject.org
|
||||
openqa-p09-worker01.iad2.fedoraproject.org
|
||||
[openqa_tap2_workers]
|
||||
# prod
|
||||
openqa-x86-worker02.iad2.fedoraproject.org
|
||||
openqa-a64-worker03.iad2.fedoraproject.org
|
||||
# lab
|
||||
openqa-x86-worker05.iad2.fedoraproject.org
|
||||
openqa-p09-worker02.iad2.fedoraproject.org
|
||||
# these do both tap and tap2 - because we only have one aarch64 worker
|
||||
# for lab, it has to handle all tap jobs
|
||||
[openqa_tap12_workers]
|
||||
# lab
|
||||
openqa-a64-worker01.iad2.fedoraproject.org
|
||||
|
||||
# the workers that run createhdds to create the base disk images. Again,
|
||||
# only one per arch per instance should be present.
|
||||
|
@ -502,6 +517,12 @@ openqa_lab_workers
|
|||
openqa
|
||||
openqa_lab
|
||||
|
||||
# common group for all tap workers
|
||||
[openqa_tap_workers:children]
|
||||
openqa_tap1_workers
|
||||
openqa_tap2_workers
|
||||
openqa_tap12_workers
|
||||
|
||||
[packages]
|
||||
|
||||
[packages_stg]
|
||||
|
|
|
@ -2,5 +2,5 @@ openqa_hostname: localhost
|
|||
openqa_repo: updates
|
||||
openqa_createhdds_branch: main
|
||||
openqa_nfs_worker: false
|
||||
openqa_tap: false
|
||||
openqa_tap: ""
|
||||
openqa_hdds_worker: false
|
||||
|
|
|
@ -23,9 +23,20 @@
|
|||
## per deployment
|
||||
## default - false
|
||||
# - openqa_tap
|
||||
## bool - whether this is the tap- and swtpm-enabled host or not
|
||||
## each deployment should have *one* tap-capable worker host
|
||||
## default - false
|
||||
## string - tap worker classes this worker should be part of, a comma-
|
||||
## separated string, e.g. "tap" or "tap,tap2". If this is
|
||||
## empty, the worker will not be configured for tap and
|
||||
## swtpm at all; if it's not empty, the worker will be
|
||||
## configured for tap and swtpm and the string substituted
|
||||
## into the workers.ini config file. Only *one* worker
|
||||
## should be in each tap worker class (so there should not
|
||||
## be two workers in the "tap" class), but if you are short
|
||||
## on workers, one worker can be in *multiple* tap classes,
|
||||
## meaning it will pick up more tap jobs. The purpose of
|
||||
## having multiple tap classes is to split the load of tap
|
||||
## jobs across more than one host, when enough hosts are
|
||||
## available
|
||||
## default - empty string (disabled)
|
||||
|
||||
# Optional vars
|
||||
# - openqa_rngd
|
||||
|
|
|
@ -4,10 +4,10 @@ HOST = http://{{ openqa_hostname|default('localhost') }}
|
|||
WORKER_CLASS = {{ openqa_worker_class }}
|
||||
{% elif openqa_tap|bool %}
|
||||
{% if ansible_architecture == 'ppc64' or ansible_architecture == 'ppc64le' %}
|
||||
WORKER_CLASS = tap,tpm,qemu_ppc64le,qemu_ppc64
|
||||
WORKER_CLASS = {{ openqa_tap }},tpm,qemu_ppc64le,qemu_ppc64
|
||||
{% elif ansible_architecture == 'aarch64' %}
|
||||
WORKER_CLASS = tap,tpm,qemu_aarch64,qemu_arm
|
||||
WORKER_CLASS = {{ openqa_tap }},tpm,qemu_aarch64,qemu_arm
|
||||
{% else %}
|
||||
WORKER_CLASS = tap,tpm,qemu_x86_64,qemu_i686,qemu_i586
|
||||
WORKER_CLASS = {{ openqa_tap }},tpm,qemu_x86_64,qemu_i686,qemu_i586
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue