remove hotspot analysis from web-data-analysis. It has been broken for over six months with no requests to fix

This commit is contained in:
Stephen Smoogen 2022-01-11 05:42:47 -05:00
parent 8f6381c245
commit 0a7ac90878
5 changed files with 6 additions and 195 deletions

View file

@ -1 +0,0 @@
0 07 * * * root /usr/local/bin/condense-hotspot.sh

View file

@ -1,88 +0,0 @@
#!/bin/bash
# This file is part of Fedora Project Infrastructure Ansible
# Repository.
#
# Fedora Project Infrastructure Ansible Repository is free software:
# you can redistribute it and/or modify it under the terms of the GNU
# General Public License as published by the Free Software Foundation,
# either version 3 of the License, or (at your option) any later
# version.
#
# Fedora Project Infrastructure Ansible Repository is distributed in
# the hope that it will be useful, but WITHOUT ANY WARRANTY; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE. See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License
# along with Fedora Project Infrastructure Ansible Repository. If
# not, see <http://www.gnu.org/licenses/>.
# There is a multiday delay involved in processing the logs. It
# may take up to 4 days to get the logs to the main-server. It may
# take a day to combine all the logs onto combined-httpd. So we assume
# we are 5 days behind.
let NUMDAYS=5
let OLDDAYS=$(( $NUMDAYS+1 ))
PROJECT=hotspot
WEBLOG=fedoraproject.org
# This is the year/month/day for a N days ago.
YEAR=$(/bin/date -d "-${NUMDAYS} days" +%Y)
MONTH=$(/bin/date -d "-${NUMDAYS} days" +%m)
DAY=$(/bin/date -d "-${NUMDAYS} days" +%d)
# And we have have to deal with year/month/day boundaries for our later grep.
OLDDATE=$(/bin/date -d "-${OLDDAYS} days" +%Y-%m-%d)
OLDYEAR=$(/bin/date -d "-${OLDDAYS} days" +%Y)
NFSDIR=/mnt/fedora_stats/combined-http
TARGET=${NFSDIR}/${YEAR}/${MONTH}/${DAY}
LOGFILE=${TARGET}/${WEBLOG}-access.log
WORKDIR=/mnt/fedora_stats/data/${PROJECT}
WORKFILE=${WORKDIR}/${YEAR}/${MONTH}/raw-${DAY}
WEBDIR=/var/www/html/csv-reports/${PROJECT}
TEMPDIR=$( mktemp -d /tmp/web-data-analysis.XXXXXXXXX )
LBIN=/usr/local/bin/
LSHARE=/usr/local/share/web-data-analysis
mkdir -p ${WORKDIR}/${YEAR}/${MONTH}
if [[ ! -f ${WORKDIR}/${YEAR}/out-${YEAR}-${MONTH} ]]; then
touch ${WORKDIR}/${YEAR}/out-${YEAR}-${MONTH}
fi
if [[ ! -f ${WORKDIR}/out-${YEAR} ]]; then
touch ${WORKDIR}/out-${YEAR}
fi
if [[ ! -f ${LOGFILE} ]]; then
echo "No logfile found for ${YEAR}/${MONTH}/${DAY}. Please fix."
else
awk -f ${LSHARE}/${PROJECT}.awk ${LOGFILE} > ${WORKFILE}
fi
# So the data isn't strictly across month boundries due to the end of
# the logfiles being at 04:00 versus 23:59. Also log files might get
# stuck and you end up with days or weeks of data in a single
# file. Because the data is pretty small we can get away with adding up the data every day.
find ${WORKDIR} -type f | grep raw- | xargs cat | sort -u | awk 'BEGIN{x=0; y=0}; {if (x != $1){ print x,y; x=$1; y=$2} else {y=y+$2}}' > ${WORKDIR}/worked-all
awk -f ${LSHARE}/${PROJECT}-data.awk ${WORKDIR}/worked-all | grep -v "1970-01-01,0,0,0" | sort -u > ${WEBDIR}/${PROJECT}data-all.csv
# Make the seven day moving average file
#/usr/local/bin/hotspot-moving_avg.py > ${WEBDIR}/${PROJECT}data-all-7day-ma.csv
gnuplot ${LSHARE}/${PROJECT}.gp
# cleanup the temp data
rm -rf ${TEMPDIR}

View file

@ -1,95 +0,0 @@
#
# Take the apache log line
# 83.163.161.147 - - [30/Sep/2012:13:54:19 +0000] "GET /static/hotspot.txt HTTP/1.1" 200 3 "-" "dnssec-trigger/0.11"
# Convert to
# 1349013000 1
function convertdate(str) {
gsub(/\[/, "", str)
gsub(/\]/, "", str)
split(str,a,":");
split(a[1],b,"/");
temp="";
switch (b[2]) {
case "Jan":
temp="01"
break;
case "Feb":
temp="02"
break;
case "Mar":
temp="03"
break;
case "Apr":
temp="04"
break;
case "May":
temp="05"
break;
case "Jun":
temp="06"
break;
case "Jul":
temp="07"
break;
case "Aug":
temp="08"
break;
case "Sep":
temp="09"
break;
case "Oct":
temp="10"
break;
case "Nov":
temp="11"
break;
case "Dec":
temp="12"
break;
default:
temp="00"
break;
}
x=b[3]" "temp" "b[1]" "a[2]" "a[3] " "a[4]
y=int(mktime(x)/300) # 300 seconds make 5 minutes (I NEED A GLOBAL VAR)
return y
}
BEGIN{
timestamp=0;
num_ts = 0;
ts_hotspots=0;
total_hotsponts=0;
}
#
# We assume that every 300 seconds a system will log in at least 1
# time because the Networkmanager addon does so.
# Convert our date stamp to the nearest 5 minute block and add data to
# it. If the log file goes backwards or jumps etc this will mean
# multiple outputs for a timestamp. A later process will need to deal
# with that. All this will do is output how many it saw at that block
# in the log file.
#
$7 ~/hotspot.txt/ && $6 ~/GET/ {
date = convertdate($4)
if (timestamp != date) {
num_ts = num_ts +1;
print (timestamp*300),ts_hotspots # GLOBAL VAR GOES HERE
timestamp = date;
ts_hotspots = 1;
} else {
ts_hotspots = ts_hotspots +1;
total_hotspots = total_hotspots +1;
}
}
END {
num_ts = num_ts +1;
print int(timestamp*300),ts_hotspots # LOOK GLOBAL VAR AGAIN
}
## END OF FILE

View file

@ -1,5 +0,0 @@
"] \
# '/var/www/html/csv-reports/hotspot/hotspotdata-all-7day-ma.csv' using 1:2 title 'Average every 5min' with lines lw 4, \
# '/var/www/html/csv-reports/hotspot/hotspotdata-all-7day-ma.csv' using 1:3 title 'Least 5min' with lines lw 4, \
# '/var/www/html/csv-reports/hotspot/hotspotdata-all-7day-ma.csv' using 1:4 title 'Max 5min' with lines lw 4
# unset output