MM: use logrotate for crawler logs, and give them more space

Signed-off-by: Aurélien Bompard <aurelien@bompard.org>
This commit is contained in:
Aurélien Bompard 2024-03-13 17:20:29 +01:00
parent 2ca3bb74e0
commit fa91a361ac
No known key found for this signature in database
GPG key ID: 31584CFEB9BF64AD
10 changed files with 87 additions and 11 deletions

View file

@ -39,7 +39,9 @@ spec:
mountPath: "/etc/mirrormanager-secrets"
readOnly: true
- name: data
mountPath: /var/lib/mirrormanager
mountPath: "/var/lib/mirrormanager"
- name: logs
mountPath: "/var/log/mirrormanager"
- name: wsgi-script
mountPath: "/opt/app-root/src/deploy"
readOnly: true
@ -80,6 +82,9 @@ spec:
- name: data
persistentVolumeClaim:
claimName: data
- name: logs
persistentVolumeClaim:
claimName: logs
- name: wsgi-script
configMap:
name: wsgi-script

View file

@ -12,6 +12,19 @@ spec:
storage: 1Gi
storageClassName: ocs-storagecluster-cephfs
---
# Crawler logs, made available to mirror admins
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: logs
spec:
accessModes:
- ReadWriteMany
resources:
requests:
storage: 5Gi
storageClassName: ocs-storagecluster-cephfs
---
# This contains:
# - the extracted netblocks, around 30Mb
# - the mirrorlist cache, around 300Mb

View file

@ -8,4 +8,6 @@ USER root
RUN dnf install -y python3-pyrpmmd
# Add Rust
RUN dnf install -y cargo
# Add Logrotate
RUN dnf install -y logrotate
USER 1001

View file

@ -21,6 +21,8 @@ items:
{{ load_file('scan-primary-mirror.toml') | indent(6) }}
scan-primary-mirror-centos.toml: |-
{{ load_file('scan-primary-mirror-centos.toml') | indent(6) }}
logrotate.conf: |-
{{ load_file('logrotate.conf') | indent(6) }}
- apiVersion: v1
kind: ConfigMap
metadata:

View file

@ -45,6 +45,8 @@ spec:
readOnly: true
- name: data
mountPath: "/var/lib/mirrormanager"
- name: logs
mountPath: "/var/log/mirrormanager"
volumes:
- name: config
configMap:
@ -52,8 +54,53 @@ spec:
- name: data
persistentVolumeClaim:
claimName: data
- name: logs
persistentVolumeClaim:
claimName: logs
{% endfor %}
#
### Rotate logs
#
---
apiVersion: batch/v1
kind: CronJob
metadata:
name: logrotate-crawler-logs
spec:
successfulJobsHistoryLimit: 3
failedJobsHistoryLimit: 3
concurrencyPolicy: Forbid
schedule: "56 23 * * *"
startingDeadlineSeconds: 500
jobTemplate:
spec:
template:
spec:
restartPolicy: Never
containers:
- name: mirrormanager
image: image-registry.openshift-image-registry.svc:5000/mirrormanager/mirrormanager2:latest
command:
- "logrotate"
- "--state"
- "/var/log/mirrormanager/logrotate.state"
- "/etc/mirrormanager/logrotate.conf"
volumeMounts:
- name: config
mountPath: "/etc/mirrormanager"
readOnly: true
- name: logs
mountPath: "/var/log/mirrormanager"
volumes:
- name: config
configMap:
name: config
- name: logs
persistentVolumeClaim:
claimName: logs
securityContext:
supplementalGroups: [1001280000]
#
### Download GeoIP database
#
---

View file

@ -32,6 +32,8 @@ spec:
readOnly: true
- name: data
mountPath: "/var/lib/mirrormanager"
- name: logs
mountPath: "/var/log/mirrormanager"
- name: mirror
mountPath: "/srv/pub"
- name: mirror-archive
@ -46,6 +48,9 @@ spec:
- name: data
persistentVolumeClaim:
claimName: data
- name: logs
persistentVolumeClaim:
claimName: logs
- name: mirror
persistentVolumeClaim:
claimName: primary-mirror

View file

@ -0,0 +1,9 @@
/var/log/mirrormanager/crawler/*.log {
missingok
notifempty
weekly
dateext
rotate 15
copytruncate
compress
}

View file

@ -134,7 +134,7 @@ GEOIP_BASE = "/var/lib/mirrormanager/geoip"
# which can the be used in the web interface by the mirror admins.
# Other parts besides the crawler are also using this variable to
# decide where to store log files.
MM_LOG_DIR = "/var/lib/mirrormanager/logs"
MM_LOG_DIR = "/var/log/mirrormanager"
# This is used to exclude certain protocols to be entered
# for host category URLs at all.

View file

@ -1,7 +0,0 @@
#!/bin/bash
CRAWLERS="{% for host in groups['mm_crawler'] %} {{ host }} {% endfor %}"
for i in ${CRAWLERS}; do
rsync -aq ${i}::crawler/*log /var/log/mirrormanager/crawler/
done

View file

@ -39,9 +39,9 @@ mirrormanager_cron_crawler:
- category: "Fedora Archive"
schedule: "0 2 * * *"
threads: 20
timeout: 600
timeout: 720
# This can be a veeery long crawl per host
host_timeout: 300
host_timeout: 600
include_disabled: false
# Other content
- category: "Fedora Other"