From 180138b5b9793766ce8d8992563839f50004191a Mon Sep 17 00:00:00 2001 From: Adam Williamson Date: Fri, 3 Mar 2023 10:00:14 -0800 Subject: [PATCH] openQA worker hosts: switch 03 and 06, cut workers on 05 openqa-x86-worker03 seems to be a bit poorly lately, it quite often fails jobs in 'hardware blip' looking ways, even after a reboot. It's also the equal-worst hardware in the worker host pool with 05. So let's swap 03 and 06 so prod has most of the best hardware, and lab has the poorly box. Also while doing a quick hardware survey I noticed 05 is equally as underpowered as 03 (it has 2x E5-2680v3, total 24 physical CPUs, all the other hosts aside from those two have 2x16 core CPUs), so this cuts its worker count to the same as 03 (and makes the comment more accurate for both). Added comments to inventory with the CPU info for each box for future reference. Signed-off-by: Adam Williamson --- .../openqa-x86-worker03.iad2.fedoraproject.org | 4 ++-- .../openqa-x86-worker05.iad2.fedoraproject.org | 6 ++++-- inventory/inventory | 14 ++++++++++++-- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/inventory/host_vars/openqa-x86-worker03.iad2.fedoraproject.org b/inventory/host_vars/openqa-x86-worker03.iad2.fedoraproject.org index d15ca455aa..748605cac4 100644 --- a/inventory/host_vars/openqa-x86-worker03.iad2.fedoraproject.org +++ b/inventory/host_vars/openqa-x86-worker03.iad2.fedoraproject.org @@ -58,8 +58,8 @@ nrpe_procs_crit: 1600 nrpe_procs_warn: 1400 # Has a hardware RNG openqa_rngd: true -# this machines has 48 CPUs, 22 is our "just under half the number of -# CPUs" rule-of-thumb, but it seems to be running out of memory so +# this machines has 24 CPUs, 22 is our "just under one worker per +# CPU" rule-of-thumb, but it seems to be running out of memory so # cut to 20 openqa_workers: 20 sudoers: "{{ private }}/files/sudo/qavirt-sudoers" diff --git a/inventory/host_vars/openqa-x86-worker05.iad2.fedoraproject.org b/inventory/host_vars/openqa-x86-worker05.iad2.fedoraproject.org index 92b31f77f4..adae21e026 100644 --- a/inventory/host_vars/openqa-x86-worker05.iad2.fedoraproject.org +++ b/inventory/host_vars/openqa-x86-worker05.iad2.fedoraproject.org @@ -59,8 +59,10 @@ nrpe_procs_warn: 1400 # Has a hardware RNG openqa_rngd: true openqa_tap_iface: em1 -# this is a powerful machine, can handle more openQA workers -openqa_workers: 30 +# this machines has 24 CPUs, 22 is our "just under one worker per +# CPU" rule-of-thumb, but it seems to be running out of memory so +# cut to 20 +openqa_workers: 20 sudoers: "{{ private }}/files/sudo/qavirt-sudoers" # firewall ports for server->worker websockets connections # this port is 'QEMUPORT plus 1' diff --git a/inventory/inventory b/inventory/inventory index c96e60274d..03c690cd4f 100644 --- a/inventory/inventory +++ b/inventory/inventory @@ -443,9 +443,13 @@ odcs_backend openqa01.iad2.fedoraproject.org [openqa_workers] +# xeon gold 5218 (best) openqa-x86-worker01.iad2.fedoraproject.org +# xeon gold 6130 (second best) openqa-x86-worker02.iad2.fedoraproject.org -openqa-x86-worker03.iad2.fedoraproject.org +# swapped from lab as 03 is kinda poorly +# xeon e5-2683v4 (third best) +openqa-x86-worker06.iad2.fedoraproject.org openqa-a64-worker02.iad2.fedoraproject.org openqa-a64-worker03.iad2.fedoraproject.org @@ -457,9 +461,15 @@ openqa-lab01.iad2.fedoraproject.org openqa-a64-worker01.iad2.fedoraproject.org openqa-p09-worker01.iad2.fedoraproject.org openqa-p09-worker02.iad2.fedoraproject.org +# xeon gold 6130 (second best) openqa-x86-worker04.iad2.fedoraproject.org +# xeon e5-2680 (worst) openqa-x86-worker05.iad2.fedoraproject.org -openqa-x86-worker06.iad2.fedoraproject.org +# prod used to have 01, 02, 03 and lab had 04, 05, 06 which was nice +# and clean, but 03 is weaker hardware and seems to be struggling +# lately, so we swapped 03 and 06 to give prod the best hw +# xeon e5-2680 (worst) +openqa-x86-worker03.iad2.fedoraproject.org # the workers that can run networked jobs. each server should have *one* of these per arch # per tap worker class defined in os-autoinst-distri-fedora (currently there are two