Pavel Raiskup 2024-10-15 13:50:18 +02:00
parent e325a03d40
commit 362f4b7b63
5 changed files with 103 additions and 78 deletions

View file

@ -11,6 +11,8 @@ csi_relationship: |
csi_security_category: Low
description: Machine for developers of Logdetective
drive_device: 38e6c8c2-5747-47bf-b3d2-9f0dea371edc
datacenter: aws
devel: false
root_auth_users: msuchy frostyx praiskup nikromen ttomecek jpodivin

View file

@ -0,0 +1,22 @@
---
ansible_ssh_user: fedora
ansible_python_interpreter: /usr/bin/python3
csi_primary_contact: "msuchy (mirek), ttomecek"
csi_purpose: Development machine for Logdetective 02 - training
csi_relationship: |
- Training of models for Logdetective
# These variables are pushed into /etc/system_identification by the base role.
# Groups and individual hosts should override them with specific info.
# See http://infrastructure.fedoraproject.org/csi/security-policy/
csi_security_category: Low
description: Machine for developers of Logdetective
datacenter: aws
devel: false
root_auth_users: msuchy frostyx praiskup nikromen ttomecek jpodivin
nrpe_client_uid: 500
tcp_ports: [
22, 80, 443,
]

View file

@ -773,6 +773,7 @@ buildvmhost
[logdetective]
logdetective01.fedorainfracloud.org
logdetective02.fedorainfracloud.org
[copr_front_aws]
copr-fe.aws.fedoraproject.org

View file

@ -23,8 +23,6 @@
become: True
become_user: root
gather_facts: True
vars:
drive_device: 38e6c8c2-5747-47bf-b3d2-9f0dea371edc
vars_files:
- /srv/web/infra/ansible/vars/global.yml
- /srv/private/ansible/vars.yml
@ -32,82 +30,6 @@
pre_tasks:
- import_tasks: "{{ tasks_path }}/yumrepos.yml"
tasks:
- import_tasks: "{{ tasks_path }}/cloud_setup_basic.yml"
- name: Install basic packages
ansible.builtin.dnf:
name:
- python3-pip
- python3-devel
- pciutils
- git
- podman
- podman-compose
- wget
- gcc-c++
- name: Download and install cuda drivers repo
ansible.builtin.shell:
cmd: |
dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/fedora39/x86_64/cuda-fedora39.repo
dnf clean all
dnf -y install cuda-toolkit-12-6
dnf -y module install nvidia-driver:open-dkms
register: cuda_installation
- name: Restart the system
ansible.builtin.reboot:
when: cuda_installation.changed
- name: Ensure state of secondary drive
ignore_errors: true
block:
- name: Ensure mountpoint
ansible.builtin.file:
path: /mnt/srv
state: directory
- name: Mount the drive on boot
ansible.posix.mount:
src: "UUID={{ drive_device }}"
path: /mnt/srv
boot: true
state: mounted
fstype: ext4
- name: Create pip cache dir
ansible.builtin.file:
state: directory
path: /mnt/srv/.cache/pip
mode: "0777"
recurse: true
- name: Create Hugging Face cache dir
ansible.builtin.file:
state: directory
path: /mnt/srv/.cache/huggingface
mode: "0777"
recurse: true
- name: Set cache locations to the secondary drive
ansible.builtin.blockinfile:
create: true
path: /etc/profile.d/externalcaches.sh
block: |
export HUGGINGFACE_HUB_CACHE=/mnt/srv/.cache/huggingface
export PIP_CACHE_DIR=/mnt/srv/.cache/pip
- name: Set up CUDA binary paths
ansible.builtin.lineinfile:
path: /etc/profile.d/cudapath.sh
line: export PATH=/usr/local/cuda-12.6/bin${PATH:+:${PATH}}
# this should be set to ansible_hostname
# - name: "set hostname (required by some services, at least postfix need it)"
# hostname: name="{{copr_hostbase}}.cloud.fedoraproject.org"
# when: env != 'production'
- name: provision instance
hosts: logdetective
become: True
@ -122,4 +44,5 @@
# Roles are run first, before tasks, regardless of where you place them here.
roles:
- base
- logdetective
- nagios_client

View file

@ -0,0 +1,77 @@
---
- import_tasks: "{{ tasks_path }}/cloud_setup_basic.yml"
- name: Install basic packages
ansible.builtin.dnf:
name:
- python3-pip
- python3-devel
- pciutils
- git
- podman
- podman-compose
- wget
- gcc-c++
- name: Download and install cuda drivers repo
ansible.builtin.shell:
cmd: |
dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/fedora39/x86_64/cuda-fedora39.repo
dnf clean all
dnf -y install cuda-toolkit-12-6
dnf -y module install nvidia-driver:open-dkms
register: cuda_installation
- name: Restart the system
ansible.builtin.reboot:
when: cuda_installation.changed
- name: Ensure state of secondary drive
ignore_errors: true
when:
- drive_device is defined
block:
- name: Ensure mountpoint
ansible.builtin.file:
path: /mnt/srv
state: directory
- name: Mount the drive on boot
ansible.posix.mount:
src: "UUID={{ drive_device }}"
path: /mnt/srv
boot: true
state: mounted
fstype: ext4
- name: Create pip cache dir
ansible.builtin.file:
state: directory
path: /mnt/srv/.cache/pip
mode: "0777"
recurse: true
- name: Create Hugging Face cache dir
ansible.builtin.file:
state: directory
path: /mnt/srv/.cache/huggingface
mode: "0777"
recurse: true
- name: Set cache locations to the secondary drive
ansible.builtin.blockinfile:
create: true
path: /etc/profile.d/externalcaches.sh
block: |
export HUGGINGFACE_HUB_CACHE=/mnt/srv/.cache/huggingface
export PIP_CACHE_DIR=/mnt/srv/.cache/pip
- name: Set up CUDA binary paths
ansible.builtin.lineinfile:
path: /etc/profile.d/cudapath.sh
line: export PATH=/usr/local/cuda-12.6/bin${PATH:+:${PATH}}
# this should be set to ansible_hostname
# - name: "set hostname (required by some services, at least postfix need it)"
# hostname: name="{{copr_hostbase}}.cloud.fedoraproject.org"
# when: env != 'production'