MM: use logrotate for crawler logs, and give them more space

Signed-off-by: Aurélien Bompard <aurelien@bompard.org>
2024-03-13 17:20:29 +01:00 · 2024-03-13 17:20:29 +01:00 · fa91a361ac
commit fa91a361ac
parent 2ca3bb74e0
10 changed files with 87 additions and 11 deletions
--- a/roles/openshift-apps/mirrormanager/files/deploymentconfig.yml
+++ b/roles/openshift-apps/mirrormanager/files/deploymentconfig.yml
@ -39,7 +39,9 @@ spec:
          mountPath: "/etc/mirrormanager-secrets"
          readOnly: true
        - name: data
-          mountPath: /var/lib/mirrormanager
+          mountPath: "/var/lib/mirrormanager"
+        - name: logs
+          mountPath: "/var/log/mirrormanager"
        - name: wsgi-script
          mountPath: "/opt/app-root/src/deploy"
          readOnly: true
@ -80,6 +82,9 @@ spec:
      - name: data
        persistentVolumeClaim:
          claimName: data
+      - name: logs
+        persistentVolumeClaim:
+          claimName: logs
      - name: wsgi-script
        configMap:
          name: wsgi-script
--- a/roles/openshift-apps/mirrormanager/files/storage.yml
+++ b/roles/openshift-apps/mirrormanager/files/storage.yml
@ -12,6 +12,19 @@ spec:
      storage: 1Gi
  storageClassName: ocs-storagecluster-cephfs
 ---
+# Crawler logs, made available to mirror admins
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: logs
+spec:
+  accessModes:
+    - ReadWriteMany
+  resources:
+    requests:
+      storage: 5Gi
+  storageClassName: ocs-storagecluster-cephfs
+---
 # This contains:
 # - the extracted netblocks, around 30Mb
 # - the mirrorlist cache, around 300Mb
--- a/roles/openshift-apps/mirrormanager/templates/Dockerfile-builder-image
+++ b/roles/openshift-apps/mirrormanager/templates/Dockerfile-builder-image
@ -8,4 +8,6 @@ USER root
 RUN dnf install -y python3-pyrpmmd
 # Add Rust
 RUN dnf install -y cargo
+# Add Logrotate
+RUN dnf install -y logrotate
 USER 1001
--- a/roles/openshift-apps/mirrormanager/templates/configmap.yml
+++ b/roles/openshift-apps/mirrormanager/templates/configmap.yml
@ -21,6 +21,8 @@ items:
      {{ load_file('scan-primary-mirror.toml') | indent(6) }}
    scan-primary-mirror-centos.toml: |-
      {{ load_file('scan-primary-mirror-centos.toml') | indent(6) }}
+    logrotate.conf: |-
+      {{ load_file('logrotate.conf') | indent(6) }}
 - apiVersion: v1
  kind: ConfigMap
  metadata:
--- a/roles/openshift-apps/mirrormanager/templates/cron-crawler.yml
+++ b/roles/openshift-apps/mirrormanager/templates/cron-crawler.yml
@ -45,6 +45,8 @@ spec:
                readOnly: true
              - name: data
                mountPath: "/var/lib/mirrormanager"
+              - name: logs
+                mountPath: "/var/log/mirrormanager"
          volumes:
            - name: config
              configMap:
@ -52,8 +54,53 @@ spec:
            - name: data
              persistentVolumeClaim:
                claimName: data
+            - name: logs
+              persistentVolumeClaim:
+                claimName: logs
 {% endfor %}
 #
+### Rotate logs
+#
+---
+apiVersion: batch/v1
+kind: CronJob
+metadata:
+  name: logrotate-crawler-logs
+spec:
+  successfulJobsHistoryLimit: 3
+  failedJobsHistoryLimit: 3
+  concurrencyPolicy: Forbid
+  schedule: "56 23 * * *"
+  startingDeadlineSeconds: 500
+  jobTemplate:
+    spec:
+      template:
+        spec:
+          restartPolicy: Never
+          containers:
+          - name: mirrormanager
+            image: image-registry.openshift-image-registry.svc:5000/mirrormanager/mirrormanager2:latest
+            command:
+              - "logrotate"
+              - "--state"
+              - "/var/log/mirrormanager/logrotate.state"
+              - "/etc/mirrormanager/logrotate.conf"
+            volumeMounts:
+              - name: config
+                mountPath: "/etc/mirrormanager"
+                readOnly: true
+              - name: logs
+                mountPath: "/var/log/mirrormanager"
+          volumes:
+            - name: config
+              configMap:
+                name: config
+            - name: logs
+              persistentVolumeClaim:
+                claimName: logs
+          securityContext:
+            supplementalGroups: [1001280000]
+#
 ### Download GeoIP database
 #
 ---
--- a/roles/openshift-apps/mirrormanager/templates/cron-primary-mirror.yml
+++ b/roles/openshift-apps/mirrormanager/templates/cron-primary-mirror.yml
@ -32,6 +32,8 @@ spec:
                readOnly: true
              - name: data
                mountPath: "/var/lib/mirrormanager"
+              - name: logs
+                mountPath: "/var/log/mirrormanager"
              - name: mirror
                mountPath: "/srv/pub"
              - name: mirror-archive
@ -46,6 +48,9 @@ spec:
            - name: data
              persistentVolumeClaim:
                claimName: data
+            - name: logs
+              persistentVolumeClaim:
+                claimName: logs
            - name: mirror
              persistentVolumeClaim:
                claimName: primary-mirror
--- a/roles/openshift-apps/mirrormanager/templates/logrotate.conf
+++ b/roles/openshift-apps/mirrormanager/templates/logrotate.conf
@ -0,0 +1,9 @@
+/var/log/mirrormanager/crawler/*.log {
+    missingok
+    notifempty
+    weekly
+    dateext
+    rotate 15
+    copytruncate
+    compress
+}
--- a/roles/openshift-apps/mirrormanager/templates/mirrormanager2.cfg.py
+++ b/roles/openshift-apps/mirrormanager/templates/mirrormanager2.cfg.py
@ -134,7 +134,7 @@ GEOIP_BASE = "/var/lib/mirrormanager/geoip"
 # which can the be used in the web interface by the mirror admins.
 # Other parts besides the crawler are also using this variable to
 # decide where to store log files.
-MM_LOG_DIR = "/var/lib/mirrormanager/logs"
+MM_LOG_DIR = "/var/log/mirrormanager"

 # This is used to exclude certain protocols to be entered
 # for host category URLs at all.
--- a/roles/openshift-apps/mirrormanager/templates/sync-crawler-logs.sh
+++ b/roles/openshift-apps/mirrormanager/templates/sync-crawler-logs.sh
@ -1,7 +0,0 @@
-#!/bin/bash
-
-CRAWLERS="{% for host in groups['mm_crawler'] %} {{ host }} {% endfor %}"
-
-for i in ${CRAWLERS}; do
-	rsync -aq ${i}::crawler/*log /var/log/mirrormanager/crawler/
-done
--- a/vars/apps/mirrormanager.yml
+++ b/vars/apps/mirrormanager.yml
@ -39,9 +39,9 @@ mirrormanager_cron_crawler:
  - category: "Fedora Archive"
    schedule: "0 2 * * *"
    threads: 20
-    timeout: 600
+    timeout: 720
    # This can be a veeery long crawl per host
-    host_timeout: 300
+    host_timeout: 600
    include_disabled: false
  # Other content
  - category: "Fedora Other"