copr-be: use cronolog also for error.log

I haven't realized this can actually be done (mod_accesslog supports error.log, too). And this finally _should be_ the working solution for now. At least till we rework the hitcounter entirely, to also support the AWS CloudFronts logs: https://pagure.io/copr/copr/issue/1263 This will allow us to never reload the Lighty server processes for the log rotation purposes, which turned out to be very problematic for no obvious reason. Simply, when the Lighty server is under certain "production" load (not reproducible via /bin/ab), Lighty fails to reload (both on SIGHUP and SIGUSR1 signals). Something simply hangs the processes. If I had to guess, writes to the pipe to the cronolog process are blocked causing some weird deadlock? Since we still have to SIGHUP the cronolog process, Lighty fails to handle both (a) SIGHUP/SIGUSR1 and (b) detect cronolog exitted at the same time? But I'm tired of the debugging this now.
2021-12-08 10:16:55 +01:00 · 2021-12-08 10:16:55 +01:00 · 83673506b6
commit 83673506b6
parent 9f5ae51805
2 changed files with 17 additions and 8 deletions
--- a/roles/copr/backend/templates/lighttpd/lighttpd.conf
+++ b/roles/copr/backend/templates/lighttpd/lighttpd.conf
@ -18,6 +18,7 @@ var.server_root = "/var/www"
 var.state_dir   = "/run"
 var.home_dir    = "/var/lib/lighttpd"
 var.conf_dir    = "/etc/lighttpd"
+var.cronolog_pipe = "| /usr/sbin/cronolog "

 ##
 ## run the server chrooted.
@ -148,7 +149,7 @@ server.pid-file = state_dir + "/lighttpd.pid"
 ##
 ## Path to the error log file
 ##
-server.errorlog             = log_root + "/error.log"
+server.errorlog = cronolog_pipe + log_root + "/error.log"

 ##
 ## If you want to log to syslog you have to unset the
@ -532,7 +533,7 @@ $HTTP["url"] =~ "^/archive/spacewalk/" {
 server.max-worker = 6

 $HTTP["url"] !~ "^/archive/spacewalk($|/)" {
-  accesslog.filename = "|/usr/sbin/cronolog /var/log/lighttpd/access.log"
+  accesslog.filename = cronolog_pipe + log_root + "/access.log"
  $HTTP["url"] =~ "\.log\.gz$" {
    magnet.attract-physical-path-to = ( "/etc/lighttpd/content-encoding-gzip-if-exists.lua" )
    mimetype.assign = ("" => "text/plain" )
--- a/roles/copr/backend/templates/logrotate/lighttpd.j2
+++ b/roles/copr/backend/templates/logrotate/lighttpd.j2
@ -3,11 +3,20 @@
 # (likely impossible) situations when access.log is empty and error.log not, we
 # would call hitcounter too on an empty file.
 #
-# Note that lighttpd server runs with max-workers, and thus we pipe the access
-# logs through cronolog to the access.log.  So we send -HUP to Lighty (leads to
-# file-descriptor refresh on error.log, but doesn't restart cronolog process),
-# and we also send -HUP to cronolog process (which is fortunately re-started by
-# Lighty/mod_accesslog).
+# Note that lighttpd server runs with max-workers, and thus we pipe the logs
+# through cronolog.  So the only reason we actually need logrotate is that
+# (a) it implements compression and (b) it allows us to trigger the hitcounter
+# script below.
+#
+# Note there are certain problems with SIGHUP and SIGUSR1 signals sent to
+# lighttpd main process directly (to re-open log descriptors), at least when
+# cronolog and logrotate are both enabled (see the headaches in
+# https://pagure.io/copr/copr/issue/2001).
+#
+# So we send -HUP to all cronolog processes (we have two only on the
+# copr-backend instance), which leads to gently termination.  This is
+# fortunately detected by Lighty and the cronolog processses are automatically
+# restarted.

 /var/log/lighttpd/*log {
    rotate 5
@ -21,7 +30,6 @@
        /usr/bin/copr_log_hitcounter.py /var/log/lighttpd/access.log --ignore-subnets 172.25.144.0/20 209.132.184.33/24 &>>/var/log/copr-backend/hitcounter-logrotate.log || :
    endscript
    postrotate
-        /usr/bin/kill -HUP $(systemctl show --property MainPID --value lighttpd) || :
        /usr/bin/killall -HUP cronolog &>/dev/null || :
    endscript
 }