From 362f4b7b63f7d594faaaf1c6095a93ee85869431 Mon Sep 17 00:00:00 2001 From: Pavel Raiskup Date: Tue, 15 Oct 2024 13:50:18 +0200 Subject: [PATCH] logdetective02: new box https://lists.fedoraproject.org/archives/list/infrastructure@lists.fedoraproject.org/thread/4ZZQBIJ5XS7HSP44EXMD4OKGXDUPBV34/ --- .../logdetective01.fedorainfracloud.org | 2 + .../logdetective02.fedorainfracloud.org | 22 ++++++ inventory/inventory | 1 + .../logdetective01.fedorainfracloud.org.yml | 79 +------------------ roles/logdetective/tasks/main.yml | 77 ++++++++++++++++++ 5 files changed, 103 insertions(+), 78 deletions(-) create mode 100644 inventory/host_vars/logdetective02.fedorainfracloud.org create mode 100644 roles/logdetective/tasks/main.yml diff --git a/inventory/host_vars/logdetective01.fedorainfracloud.org b/inventory/host_vars/logdetective01.fedorainfracloud.org index 747593e798..86cb385733 100644 --- a/inventory/host_vars/logdetective01.fedorainfracloud.org +++ b/inventory/host_vars/logdetective01.fedorainfracloud.org @@ -11,6 +11,8 @@ csi_relationship: | csi_security_category: Low description: Machine for developers of Logdetective +drive_device: 38e6c8c2-5747-47bf-b3d2-9f0dea371edc + datacenter: aws devel: false root_auth_users: msuchy frostyx praiskup nikromen ttomecek jpodivin diff --git a/inventory/host_vars/logdetective02.fedorainfracloud.org b/inventory/host_vars/logdetective02.fedorainfracloud.org new file mode 100644 index 0000000000..74c3e376e9 --- /dev/null +++ b/inventory/host_vars/logdetective02.fedorainfracloud.org @@ -0,0 +1,22 @@ +--- +ansible_ssh_user: fedora +ansible_python_interpreter: /usr/bin/python3 +csi_primary_contact: "msuchy (mirek), ttomecek" +csi_purpose: Development machine for Logdetective 02 - training +csi_relationship: | + - Training of models for Logdetective +# These variables are pushed into /etc/system_identification by the base role. +# Groups and individual hosts should override them with specific info. +# See http://infrastructure.fedoraproject.org/csi/security-policy/ +csi_security_category: Low +description: Machine for developers of Logdetective + +datacenter: aws +devel: false +root_auth_users: msuchy frostyx praiskup nikromen ttomecek jpodivin + +nrpe_client_uid: 500 + +tcp_ports: [ + 22, 80, 443, +] diff --git a/inventory/inventory b/inventory/inventory index 2481732df7..50b995b543 100644 --- a/inventory/inventory +++ b/inventory/inventory @@ -773,6 +773,7 @@ buildvmhost [logdetective] logdetective01.fedorainfracloud.org +logdetective02.fedorainfracloud.org [copr_front_aws] copr-fe.aws.fedoraproject.org diff --git a/playbooks/hosts/logdetective01.fedorainfracloud.org.yml b/playbooks/hosts/logdetective01.fedorainfracloud.org.yml index 029bc3328f..5f4be2e4a5 100644 --- a/playbooks/hosts/logdetective01.fedorainfracloud.org.yml +++ b/playbooks/hosts/logdetective01.fedorainfracloud.org.yml @@ -23,8 +23,6 @@ become: True become_user: root gather_facts: True - vars: - drive_device: 38e6c8c2-5747-47bf-b3d2-9f0dea371edc vars_files: - /srv/web/infra/ansible/vars/global.yml - /srv/private/ansible/vars.yml @@ -32,82 +30,6 @@ pre_tasks: - import_tasks: "{{ tasks_path }}/yumrepos.yml" - - tasks: - - import_tasks: "{{ tasks_path }}/cloud_setup_basic.yml" - - name: Install basic packages - ansible.builtin.dnf: - name: - - python3-pip - - python3-devel - - pciutils - - git - - podman - - podman-compose - - wget - - gcc-c++ - - - name: Download and install cuda drivers repo - ansible.builtin.shell: - cmd: | - dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/fedora39/x86_64/cuda-fedora39.repo - dnf clean all - dnf -y install cuda-toolkit-12-6 - dnf -y module install nvidia-driver:open-dkms - register: cuda_installation - - - name: Restart the system - ansible.builtin.reboot: - when: cuda_installation.changed - - - name: Ensure state of secondary drive - ignore_errors: true - block: - - name: Ensure mountpoint - ansible.builtin.file: - path: /mnt/srv - state: directory - - - name: Mount the drive on boot - ansible.posix.mount: - src: "UUID={{ drive_device }}" - path: /mnt/srv - boot: true - state: mounted - fstype: ext4 - - - name: Create pip cache dir - ansible.builtin.file: - state: directory - path: /mnt/srv/.cache/pip - mode: "0777" - recurse: true - - - name: Create Hugging Face cache dir - ansible.builtin.file: - state: directory - path: /mnt/srv/.cache/huggingface - mode: "0777" - recurse: true - - - name: Set cache locations to the secondary drive - ansible.builtin.blockinfile: - create: true - path: /etc/profile.d/externalcaches.sh - block: | - export HUGGINGFACE_HUB_CACHE=/mnt/srv/.cache/huggingface - export PIP_CACHE_DIR=/mnt/srv/.cache/pip - - - name: Set up CUDA binary paths - ansible.builtin.lineinfile: - path: /etc/profile.d/cudapath.sh - line: export PATH=/usr/local/cuda-12.6/bin${PATH:+:${PATH}} - -# this should be set to ansible_hostname -# - name: "set hostname (required by some services, at least postfix need it)" -# hostname: name="{{copr_hostbase}}.cloud.fedoraproject.org" -# when: env != 'production' - - name: provision instance hosts: logdetective become: True @@ -122,4 +44,5 @@ # Roles are run first, before tasks, regardless of where you place them here. roles: - base + - logdetective - nagios_client diff --git a/roles/logdetective/tasks/main.yml b/roles/logdetective/tasks/main.yml new file mode 100644 index 0000000000..33c08202fe --- /dev/null +++ b/roles/logdetective/tasks/main.yml @@ -0,0 +1,77 @@ +--- +- import_tasks: "{{ tasks_path }}/cloud_setup_basic.yml" + +- name: Install basic packages + ansible.builtin.dnf: + name: + - python3-pip + - python3-devel + - pciutils + - git + - podman + - podman-compose + - wget + - gcc-c++ + +- name: Download and install cuda drivers repo + ansible.builtin.shell: + cmd: | + dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/fedora39/x86_64/cuda-fedora39.repo + dnf clean all + dnf -y install cuda-toolkit-12-6 + dnf -y module install nvidia-driver:open-dkms + register: cuda_installation + +- name: Restart the system + ansible.builtin.reboot: + when: cuda_installation.changed + +- name: Ensure state of secondary drive + ignore_errors: true + when: + - drive_device is defined + block: + - name: Ensure mountpoint + ansible.builtin.file: + path: /mnt/srv + state: directory + + - name: Mount the drive on boot + ansible.posix.mount: + src: "UUID={{ drive_device }}" + path: /mnt/srv + boot: true + state: mounted + fstype: ext4 + + - name: Create pip cache dir + ansible.builtin.file: + state: directory + path: /mnt/srv/.cache/pip + mode: "0777" + recurse: true + + - name: Create Hugging Face cache dir + ansible.builtin.file: + state: directory + path: /mnt/srv/.cache/huggingface + mode: "0777" + recurse: true + + - name: Set cache locations to the secondary drive + ansible.builtin.blockinfile: + create: true + path: /etc/profile.d/externalcaches.sh + block: | + export HUGGINGFACE_HUB_CACHE=/mnt/srv/.cache/huggingface + export PIP_CACHE_DIR=/mnt/srv/.cache/pip + + - name: Set up CUDA binary paths + ansible.builtin.lineinfile: + path: /etc/profile.d/cudapath.sh + line: export PATH=/usr/local/cuda-12.6/bin${PATH:+:${PATH}} + +# this should be set to ansible_hostname +# - name: "set hostname (required by some services, at least postfix need it)" +# hostname: name="{{copr_hostbase}}.cloud.fedoraproject.org" +# when: env != 'production'