diff --git a/roles/copr/backend/files/copr-lighty-logger b/roles/copr/backend/files/copr-lighty-logger new file mode 100755 index 0000000000..2bfe24060b --- /dev/null +++ b/roles/copr/backend/files/copr-lighty-logger @@ -0,0 +1,33 @@ +#! /bin/bash + +# Something like 'cat' fucntionality, but automatically re-open the output file +# upon the SIGHUP signal. We used to use cronolog instead in Copr, but (a) we +# haven't used it for rotating at all, and (b) that tool doesn't react on +# SIGHUP. Long story in: https://pagure.io/copr/copr/issue/2001 +# +# One may object this is slow, I tested this locally with throughput about +# 12k lines per second: +# +# $ for i in `seq 1000000`; do printf "%79d\n" "0" >> /tmp/test-file; done +# $ time cat /tmp/test-file | /tmp/logger /tmp/output-measured +# real 1m24.354s +# user 0m39.895s +# sys 1m6.402s +# +# But we would get much higher throughput if implemented in C. + +logfile=$1 +cmd="$0 $*" + +handler() +{ + exec 1>> "$logfile" + echo "=== start: $cmd ===" +} + +trap handler SIGHUP +handler + +while IFS= read -r line; do + echo "$line" +done diff --git a/roles/copr/backend/files/copr-lighty-reopen-logs b/roles/copr/backend/files/copr-lighty-reopen-logs new file mode 100755 index 0000000000..84c8fa5487 --- /dev/null +++ b/roles/copr/backend/files/copr-lighty-reopen-logs @@ -0,0 +1,15 @@ +#! /bin/bash + +main_pid=$(systemctl show --property MainPID --value lighttpd) +for child in $(cat /proc/$main_pid/task/$main_pid/children); do + cmd_to_kill=$(readlink -f "/proc/$child/exe") + case $cmd_to_kill in + *bash) + echo "sending SIGHUP to PID $child as that's bash" + kill -HUP "$child" + ;; + *) + echo "Not sending to $cmd_to_kill" + ;; + esac +done diff --git a/roles/copr/backend/tasks/main.yml b/roles/copr/backend/tasks/main.yml index 27f96c8314..db5e7acbe5 100644 --- a/roles/copr/backend/tasks/main.yml +++ b/roles/copr/backend/tasks/main.yml @@ -29,7 +29,6 @@ - python3-keystoneclient - python3-dateutil - php-cli - - cronolog - nfs-utils - golang-github-prometheus-node-exporter @@ -146,8 +145,27 @@ - name: install custom lighttpd template for directory listings template: src="lighttpd/dir-generator.php.j2" dest="/var/lib/copr/public_html/dir-generator.php" owner=copr group=copr mode=0755 +- name: install the helper scripts for lighttpd log rotation + copy: + src: "{{ item }}" + dest: /usr/local/bin/{{ item }} + mode: 0755 + owner: root + group: root + with_items: + - copr-lighty-logger + - copr-lighty-reopen-logs + register: logrotate_scripts + tags: logrotate + +- name: fix selinux context on helper scripts + command: restorecon -irv /usr/local/bin/copr-lighty* + when: logrotate_scripts.changed + tags: logrotate + - name: install custom logrotate config for lighttpd template: src="logrotate/lighttpd.j2" dest=/etc/logrotate.d/lighttpd owner=root group=root mode=644 + tags: logrotate - name: start webserver service: state=started enabled=yes name=lighttpd diff --git a/roles/copr/backend/templates/lighttpd/lighttpd.conf b/roles/copr/backend/templates/lighttpd/lighttpd.conf index f37738f15f..33040f9326 100644 --- a/roles/copr/backend/templates/lighttpd/lighttpd.conf +++ b/roles/copr/backend/templates/lighttpd/lighttpd.conf @@ -18,7 +18,7 @@ var.server_root = "/var/www" var.state_dir = "/run" var.home_dir = "/var/lib/lighttpd" var.conf_dir = "/etc/lighttpd" -var.cronolog_pipe = "| /usr/sbin/cronolog " +var.log_pipe = "| /usr/local/bin/copr-lighty-logger " ## ## run the server chrooted. @@ -149,7 +149,7 @@ server.pid-file = state_dir + "/lighttpd.pid" ## ## Path to the error log file ## -server.errorlog = cronolog_pipe + log_root + "/error.log" +server.errorlog = log_pipe + log_root + "/error.log" ## ## If you want to log to syslog you have to unset the @@ -527,13 +527,13 @@ $HTTP["url"] =~ "^/archive/spacewalk/" { } # This may break many things, per: https://redmine.lighttpd.net/projects/1/wiki/Server_max-workerDetails -# At least we have to use cronolog: https://redmine.lighttpd.net/projects/1/wiki/Docs_ModAccesslog +# At least we have to use cronolog or alternative: https://redmine.lighttpd.net/projects/1/wiki/Docs_ModAccesslog # But we need logrotate for logcounter, see /etc/logrotate.d/lighttpd, and # logrotate doesn't work well with logcounter. server.max-worker = 6 $HTTP["url"] !~ "^/archive/spacewalk($|/)" { - accesslog.filename = cronolog_pipe + log_root + "/access.log" + accesslog.filename = log_pipe + log_root + "/access.log" $HTTP["url"] =~ "\.log\.gz$" { magnet.attract-physical-path-to = ( "/etc/lighttpd/content-encoding-gzip-if-exists.lua" ) mimetype.assign = ("" => "text/plain" ) diff --git a/roles/copr/backend/templates/logrotate/lighttpd.j2 b/roles/copr/backend/templates/logrotate/lighttpd.j2 index 94e460e7ec..7560a12273 100644 --- a/roles/copr/backend/templates/logrotate/lighttpd.j2 +++ b/roles/copr/backend/templates/logrotate/lighttpd.j2 @@ -3,20 +3,24 @@ # (likely impossible) situations when access.log is empty and error.log not, we # would call hitcounter too on an empty file. # -# Note that lighttpd server runs with max-workers, and thus we pipe the logs -# through cronolog. So the only reason we actually need logrotate is that -# (a) it implements compression and (b) it allows us to trigger the hitcounter -# script below. +# Note that lighttpd server runs with max-workers, and thus we should pipe the +# logs through cronolog to have consistent logs (see mod_accesslog docs). On +# the other hand, we still use logrotate. That's needed because it supports +# compressison, and because it can trigger the hitcounter script below. # -# Note there are certain problems with SIGHUP and SIGUSR1 signals sent to -# lighttpd main process directly (to re-open log descriptors), at least when -# cronolog and logrotate are both enabled (see the headaches in -# https://pagure.io/copr/copr/issue/2001). -# -# So we send -HUP to all cronolog processes (we have two only on the -# copr-backend instance), which leads to gently termination. This is -# fortunately detected by Lighty and the cronolog processses are automatically -# restarted. +# Running both "log rotation" programs turned out to be very complicated: +# - sending -HUP to lighty doesn't restart the piped cronolog, so it never +# reopens the file after rotation +# - cronolog doesn't handle -HUP correctly, it just terminates +# - when cronolog terminates, lighttpd restarts all the child processes (not +# just cronolog processes), leading to whole bunch of problems (but there's at +# least short downtime) +# There's no reasonable way-around this than logrotate's copytruncate option +# (which is racy by definition). Lighty is innocent here (doesn't ever expect +# the log pipe to terminate, and if it happens it's good reason to restart +# everything). Cronolog is innocent here, it doesn't actaully do any rotation. +# Lograte is innocent as well. +# See https://pagure.io/copr/copr/issue/2001 for more info. /var/log/lighttpd/*log { rotate 5 @@ -30,6 +34,6 @@ /usr/bin/copr_log_hitcounter.py /var/log/lighttpd/access.log --ignore-subnets 172.25.144.0/20 209.132.184.33/24 &>>/var/log/copr-backend/hitcounter-logrotate.log || : endscript postrotate - /usr/bin/killall -HUP cronolog &>/dev/null || : + /usr/local/bin/copr-lighty-reopen-logs || : endscript }