openQA: allow all workers NFS write access, other tweaks

The main goal of these changes is to allow all workers in each
deployment NFS write access to the factory share. This is because
I want to try using os-autoinst's at-job-run-time decompression
of disk images instead of openQA's at-asset-download-time
decompression; it avoids some awkwardness with the asset file
name, and should also actually allow us to drop the decompression
code from openQA I think.

I also rejigged various other things at the same time as they
kinda logically go together. It's mostly cleanups and tweaks to
group variables. I tried to handle more things explicitly with
variables, as it's better for use of these plays outside of
Fedora infra.

Signed-off-by: Adam Williamson <awilliam@redhat.com>
This commit is contained in:
Adam Williamson 2020-11-05 16:06:45 -08:00
parent 35c65637ce
commit 95f062c07a
14 changed files with 85 additions and 20 deletions

View file

@ -26,5 +26,8 @@ openqa_resultsdb_url: http://resultsdb01.iad2.fedoraproject.org/resultsdb_api/ap
openqa_compose_arches: x86_64,aarch64
openqa_update_arches: ['x86_64']
# all our workers need NFS access
openqa_nfs_workers: {{ groups['openqa_workers'] }}
deployment_type: prod
freezes: false

View file

@ -0,0 +1,9 @@
# here we keep variables that are used on all openQA boxes, servers
# and workers, prod and lab.
# this is UID and GID of geekotest on the server (the user openQA
# runs as). The asset dirs are owned by this user and group. We also
# create geekotest on the workers with this GID and add _openqa-worker
# to that group, so os-autoinst can write to the asset dirs, which it
# needs to do when uncompressing compressed disk assets.
openqa_static_uid: 601

View file

@ -0,0 +1 @@
openqa_hdds_worker: true

View file

@ -46,6 +46,9 @@ openqa_resultsdb_url: http://resultsdb01.stg.iad2.fedoraproject.org/resultsdb_ap
openqa_compose_arches: x86_64,aarch64,ppc64le
openqa_update_arches: ['x86_64', 'ppc64le']
# all our workers need NFS access
openqa_nfs_workers: {{ groups['openqa_lab_workers'] }}
# FIXME: disable consumers that write to wiki until auth key
# is working again:
# https://pagure.io/fedora-infrastructure/issue/8381

View file

@ -18,5 +18,8 @@ openqa_repo: updates-testing
# 2020-11 git bump, dep
openqa_scratch: ["54880919", "54872388", "54879672", "54878601"]
# we are all NFS workers for now at least
openqa_nfs_worker: true
deployment_type: stg
freezes: false

View file

@ -1,10 +1,8 @@
# here we keep variables that are the same between prod and staging
# here we keep variables that are the same between prod and lab
# openQA servers. these are mostly things that are set as variables
# in the plays so we can change them over time and also so the plays
# can be used for non-infra deployments.
openqa_static_uid: 601
openqa_hostname: localhost
openqa_email: adamwill@fedoraproject.org
openqa_nickname: adamwill

View file

@ -11,7 +11,8 @@ openqa_secret: "{{ prod_openqa_apisecret }}"
openqa_env_suffix:
openqa_env: production
# These boxes are F30+, so we need Python 3 ansible
# we are all NFS workers for now at least
openqa_nfs_worker: true
deployment_type: prod
freezes: false

View file

@ -442,11 +442,6 @@ odcs_backend_stg
odcs_frontend
odcs_backend
# common group for variables shared between prod and stg servers
[openqa_common]
openqa01.iad2.fedoraproject.org
openqa-lab01.iad2.fedoraproject.org
[openqa]
openqa01.iad2.fedoraproject.org
@ -477,7 +472,7 @@ openqa-x86-worker04.iad2.fedoraproject.org
openqa-a64-worker01.iad2.fedoraproject.org
openqa-p09-worker01.iad2.fedoraproject.org
# the workers that need rw access to the factory share to run createhdds. again,
# the workers that run createhdds to create the base disk images. Again,
# only one per arch per instance should be present.
[openqa_hdds_workers]
# prod
@ -488,6 +483,18 @@ openqa-x86-worker04.iad2.fedoraproject.org
openqa-a64-worker01.iad2.fedoraproject.org
openqa-p09-worker01.iad2.fedoraproject.org
# common group for variables shared between all openQA boxes
[openqa_all_common]
openqa
openqa_lab
openqa_workers
openqa_lab_workers
# common group for variables shared between prod and stg servers
[openqa_servers_common]
openqa
openqa_lab
[packages]
[packages_stg]

View file

@ -1,5 +1,5 @@
- name: configure openQA workers
hosts: openqa_workers:openqa_hdds_workers:openqa_lab_workers
hosts: openqa_workers:openqa_lab_workers
user: root
gather_facts: True

View file

@ -80,6 +80,12 @@
## be able to run scratch builds to see if a new release is
## really bad before submitting it as an update, we don't want
## to send new builds to updates-testing if they have problems
# - openqa_nfs_workers
## list - A list of hostnames of workers that will share the factory
## directory via NFS. All these will be granted rw access to
## the share (they need write access to be able to decompress
## compressed disk images on the fly). If not defined, the
## factory dir will not be shared via NFS at all.
# If openqa_dbhost is set, the other openqa_db* variables must be too,
# and the server will be configured to use a pgsql database accordingly.
@ -190,7 +196,7 @@
file: path=/root/openqa_fedora_tools state=absent
- name: Create asset directories
file: path={{ item }} state=directory owner=geekotest group=root mode=0755
file: path={{ item }} state=directory owner=geekotest group=geekotest mode=0755
with_items:
- /var/lib/openqa/share/factory/iso
- /var/lib/openqa/share/factory/iso/fixed
@ -214,11 +220,13 @@
- name: Create exports file
template: src=exports.j2 dest=/etc/exports.d/openqa.exports owner=root group=root mode=0644
register: exportsfile
when: openqa_nfs_workers is defined
tags:
- config
- name: Enable and start NFS server
service: name=nfs-server enabled=yes state=started
when: openqa_nfs_clients is defined
- name: Refresh exports
command: exportfs -r

View file

@ -1 +1 @@
/var/lib/openqa/share *(ro,insecure,all_squash) {% for host in groups['openqa_hdds_workers'] %} {{ host }}(rw,insecure,no_root_squash) {% endfor %}
/var/lib/openqa/share {% for host in {{ openqa_nfs_workers }} %} {{ host }}(rw,insecure,no_root_squash) {% endfor %}

View file

@ -1,3 +1,6 @@
openqa_hostname: localhost
openqa_repo: updates
openqa_createhdds_branch: master
openqa_nfs_worker: false
openqa_tap: false
openqa_hdds_worker: false

View file

@ -10,11 +10,24 @@
## string - Repo to enable when updating openQA packages. Set to
## 'updates-testing' to use packages from updates-testing
## default - 'updates', which is effectively a no-op
# Optional vars
# - openqa_nfs_worker
## bool - whether this worker expects to share the factory dir with
## the server via NFS. If set, the nfs-client task will be
## run to set up the share. The worker should also be included
## in the server's openqa_nfs_clients var so it is granted
## access to the share
## default - false
# - openqa_hdds_worker
## bool - whether this worker creates base disk images for a shared
## factory dir. There should be only *one* of these per arch
## per deployment
## default - false
# - openqa_tap
## bool - whether this is the tap- and swtpm-enabled host or not
## each deployment should have *one* tap-capable worker host
## default - false
# Optional vars
# - openqa_rngd
## string - if set to any value, rng-tools package will be
## installed and rngd.service enabled/started
@ -28,6 +41,14 @@
## be able to run scratch builds to see if a new release is
## really bad before submitting it as an update, we don't want
## to send new builds to updates-testing if they have problems
# - openqa_static_uid
## int - a static ID for the geekotest group if desired. this is useful
## for NFS mounting openQA data files. The _openqa_worker user,
## which os-autoinst runs as, will be added to this group. The
## idea is that the same group with the same GID exists on the
## NFS server and is the group of the shared asset directories,
## so os-autoinst can write to the shared asset directories,
## which it needs to do when uncompressing compressed disk assets
- name: Remove scratch repo directory
file: path=/var/tmp/scratchrepo state=absent
@ -82,6 +103,14 @@
tags:
- packages
- name: Create geekotest group with static GID
group: "name=geekotest gid={{ openqa_static_uid }} system=yes"
when: "openqa_static_uid is defined"
- name: Add _openqa_worker user to geekotest group
user: name=_openqa-worker groups=geekotest append=yes
when: "openqa_static_uid is defined"
- name: Install UEFI firmware package (x86_64 only)
package: name=edk2-ovmf state=present
tags:
@ -129,10 +158,10 @@
when: "openqa_rngd is defined and openqa_rngd"
- include_tasks: nfs-client.yml
when: openqa_hostname is defined and openqa_hostname != "localhost"
when: openqa_nfs_worker|bool
- include_tasks: tap-setup.yml
when: openqa_tap is defined and openqa_tap
when: openqa_tap|bool
# this is kinda lazy - we could have a separate openqa_swtpm var so we
# we could potentially separate tap worker hosts from swtpm ones - but
@ -141,7 +170,7 @@
# for now it's fine to just assume the tap host(s) is/are also the
# swtpm host(s)
- include_tasks: swtpm-setup.yml
when: openqa_tap is defined and openqa_tap
when: openqa_tap|bool
- name: openQA client config
template: src=client.conf.j2 dest=/etc/openqa/client.conf owner=_openqa-worker group=root mode=0600
@ -156,7 +185,7 @@
- config
- include_tasks: createhdds.yml
when: "inventory_hostname in groups['openqa_hdds_workers']"
when: openqa_hdds_worker|bool
- name: Enable and start worker services
service: name=openqa-worker@{{ item }} enabled=yes state=started

View file

@ -2,7 +2,7 @@
HOST = http://{{ openqa_hostname|default('localhost') }}
{% if openqa_worker_class is defined %}
WORKER_CLASS = {{ openqa_worker_class }}
{% elif openqa_tap is defined and openqa_tap %}
{% elif openqa_tap|bool %}
{% if ansible_architecture == 'ppc64' or ansible_architecture == 'ppc64le' %}
WORKER_CLASS = tap,tpm,qemu_ppc64le,qemu_ppc64
{% elif ansible_architecture == 'aarch64' %}