remove hotspot analysis from web-data-analysis. It has been broken for over six months with no requests to fix

2022-01-11 05:42:47 -05:00 · 2022-01-11 05:42:47 -05:00 · 0a7ac90878
commit 0a7ac90878
parent 8f6381c245
5 changed files with 6 additions and 195 deletions
--- a/roles/web-data-analysis/files/condense-hotspot.cron
+++ b/roles/web-data-analysis/files/condense-hotspot.cron
@ -1 +0,0 @@
 0 07  * * * root /usr/local/bin/condense-hotspot.sh
--- a/roles/web-data-analysis/files/condense-hotspot.sh
+++ b/roles/web-data-analysis/files/condense-hotspot.sh
@ -1,88 +0,0 @@
 #!/bin/bash
 # This file is part of Fedora Project Infrastructure Ansible
 # Repository.
 #
 # Fedora Project Infrastructure Ansible Repository is free software:
 # you can redistribute it and/or modify it under the terms of the GNU
 # General Public License as published by the Free Software Foundation,
 # either version 3 of the License, or (at your option) any later
 # version.
 #
 # Fedora Project Infrastructure Ansible Repository is distributed in
 # the hope that it will be useful, but WITHOUT ANY WARRANTY; without
 # even the implied warranty of MERCHANTABILITY or FITNESS FOR A
 # PARTICULAR PURPOSE.  See the GNU General Public License for more
 # details.
 #
 # You should have received a copy of the GNU General Public License
 # along with Fedora Project Infrastructure Ansible Repository.  If
 # not, see <http://www.gnu.org/licenses/>.
 # There is a multiday delay involved in processing the logs. It
 # may take up to 4 days to get the logs to the main-server. It may
 # take a day to combine all the logs onto combined-httpd. So we assume 
 # we are 5 days behind.
 let NUMDAYS=5
 let OLDDAYS=$(( $NUMDAYS+1 ))
 PROJECT=hotspot
 WEBLOG=fedoraproject.org
 # This is the year/month/day for a N days ago.
 YEAR=$(/bin/date -d "-${NUMDAYS} days" +%Y)
 MONTH=$(/bin/date -d "-${NUMDAYS} days" +%m)
 DAY=$(/bin/date -d "-${NUMDAYS} days" +%d)
 # And we have have to deal with year/month/day boundaries for our later grep.
 OLDDATE=$(/bin/date -d "-${OLDDAYS} days" +%Y-%m-%d)
 OLDYEAR=$(/bin/date -d "-${OLDDAYS} days" +%Y)
 NFSDIR=/mnt/fedora_stats/combined-http
 TARGET=${NFSDIR}/${YEAR}/${MONTH}/${DAY}
 LOGFILE=${TARGET}/${WEBLOG}-access.log
 WORKDIR=/mnt/fedora_stats/data/${PROJECT}
 WORKFILE=${WORKDIR}/${YEAR}/${MONTH}/raw-${DAY}
 WEBDIR=/var/www/html/csv-reports/${PROJECT}
 TEMPDIR=$( mktemp -d /tmp/web-data-analysis.XXXXXXXXX )
 LBIN=/usr/local/bin/
 LSHARE=/usr/local/share/web-data-analysis
 mkdir -p ${WORKDIR}/${YEAR}/${MONTH}
 if [[ ! -f ${WORKDIR}/${YEAR}/out-${YEAR}-${MONTH} ]]; then
    touch ${WORKDIR}/${YEAR}/out-${YEAR}-${MONTH}
 fi
 if [[ ! -f ${WORKDIR}/out-${YEAR} ]]; then
    touch ${WORKDIR}/out-${YEAR}
 fi
 if [[ ! -f ${LOGFILE} ]]; then
    echo "No logfile found for ${YEAR}/${MONTH}/${DAY}. Please fix."
 else
    awk -f ${LSHARE}/${PROJECT}.awk ${LOGFILE} > ${WORKFILE}
 fi
 # So the data isn't strictly across month boundries due to the end of
 # the logfiles being at 04:00 versus 23:59. Also log files might get
 # stuck and you end up with days or weeks of data in a single
 # file. Because the data is pretty small we can get away with adding up the data every day.
 find ${WORKDIR} -type f | grep raw- | xargs cat  | sort -u | awk 'BEGIN{x=0; y=0}; {if (x != $1){ print x,y; x=$1; y=$2} else {y=y+$2}}' > ${WORKDIR}/worked-all
 awk -f ${LSHARE}/${PROJECT}-data.awk ${WORKDIR}/worked-all | grep -v "1970-01-01,0,0,0" | sort -u > ${WEBDIR}/${PROJECT}data-all.csv
 # Make the seven day moving average file
 #/usr/local/bin/hotspot-moving_avg.py > ${WEBDIR}/${PROJECT}data-all-7day-ma.csv
 gnuplot  ${LSHARE}/${PROJECT}.gp
 # cleanup the temp data
 rm -rf ${TEMPDIR}
--- a/roles/web-data-analysis/files/hotspot.awk
+++ b/roles/web-data-analysis/files/hotspot.awk
@ -1,95 +0,0 @@
 #
 # Take the apache log line
 # 83.163.161.147 - - [30/Sep/2012:13:54:19 +0000] "GET /static/hotspot.txt HTTP/1.1" 200 3 "-" "dnssec-trigger/0.11"
 # Convert to
 # 1349013000 1
 function convertdate(str) {
  gsub(/\[/, "", str)
  gsub(/\]/, "", str)
  split(str,a,":");
  split(a[1],b,"/");
  temp="";
  switch (b[2]) {
  case "Jan":
    temp="01"
    break;
  case "Feb":
    temp="02"
    break;
  case "Mar":
    temp="03"
    break;
  case "Apr":
    temp="04"
    break;
  case "May":
    temp="05"
    break;
  case "Jun":
    temp="06"
    break;
  case "Jul":
    temp="07"
    break;
  case "Aug":
    temp="08"
    break;
  case "Sep":
    temp="09"
    break;
  case "Oct":
    temp="10"
    break;
  case "Nov":
    temp="11"
    break;
  case "Dec":
    temp="12"
    break;
  default:
    temp="00"
    break;
  }
  x=b[3]" "temp" "b[1]" "a[2]" "a[3] " "a[4]
  y=int(mktime(x)/300) # 300 seconds make 5 minutes (I NEED A GLOBAL VAR)
  return y
 }
 BEGIN{
  timestamp=0;
  num_ts = 0;
  ts_hotspots=0;
  total_hotsponts=0;
 }
 #
 # We assume that every 300 seconds a system will log in at least 1
 # time because the Networkmanager addon does so.
 # Convert our date stamp to the nearest 5 minute block and add data to
 # it. If the log file goes backwards or jumps etc this will mean
 # multiple outputs for a timestamp. A later process will need to deal
 # with that. All this will do is output how many it saw at that block
 # in the log file.
 #
 $7 ~/hotspot.txt/ && $6 ~/GET/ {
  date = convertdate($4)
  if (timestamp != date) {
    num_ts = num_ts +1;
    print (timestamp*300),ts_hotspots # GLOBAL VAR GOES HERE
    timestamp = date;
    ts_hotspots = 1;
  } else {
    ts_hotspots = ts_hotspots +1;
    total_hotspots = total_hotspots +1;
  }
 }
 END {
  num_ts = num_ts +1;
  print int(timestamp*300),ts_hotspots # LOOK GLOBAL VAR AGAIN                                                                                                             
 }
 ## END OF FILE
--- a/roles/web-data-analysis/files/hotspot.gp
+++ b/roles/web-data-analysis/files/hotspot.gp
@ -1,5 +0,0 @@
 "] \
 #      '/var/www/html/csv-reports/hotspot/hotspotdata-all-7day-ma.csv' using 1:2 title 'Average every 5min' with lines lw 4, \
 #      '/var/www/html/csv-reports/hotspot/hotspotdata-all-7day-ma.csv' using 1:3 title 'Least 5min' with lines lw 4, \
 #      '/var/www/html/csv-reports/hotspot/hotspotdata-all-7day-ma.csv' using 1:4 title 'Max 5min' with lines lw 4
 # unset output
--- a/roles/web-data-analysis/tasks/main.yml
+++ b/roles/web-data-analysis/tasks/main.yml
@ -17,7 +17,7 @@
 - name: make the data subdirs
  file: path=/mnt/fedora_stats/data/{{item}} state=directory mode=0755
-  with_items: [hotspot, mirrors]
+  with_items: [mirrors]
  tags:
    - web-data
@ -45,7 +45,7 @@
 - name: make the web subdirs
  file: path=/var/www/html/csv-reports/{{item}} state=directory mode=0755
-  with_items: [images, hotspot, mirrors]
+  with_items: [images, mirrors]
  tags:
    - web-data
@ -56,7 +56,7 @@
 - name: scripts to condense data down for further processing
  copy: src={{item}} dest=/usr/local/bin/ mode=0755
-  with_items: [condense-mirrorlogs.sh, condense-hotspot.sh ]
+  with_items: [condense-mirrorlogs.sh ]
  tags:
    - web-data
@ -68,19 +68,19 @@
 - name: awk files for csv creation
  copy: src={{item}} dest=/usr/local/share/web-data-analysis mode=0644
-  with_items: [mirrors-data.awk, hotspot-data.awk, hotspot.awk ]
+  with_items: [mirrors-data.awk, ]
  tags:
    - web-data
 - name: gnuplot file for image creation
  copy: src={{item}} dest=/usr/local/share/web-data-analysis mode=0644
-  with_items: [ mirrors-data.gp, hotspot.gp ]
+  with_items: [ mirrors-data.gp]
  tags:
    - web-data
 - name: daily cron file to run the log files
  copy: src={{item}} dest=/etc/cron.d/ mode=0644
-  with_items: [condense-mirrorlogs.cron, condense-hotspot.cron]
+  with_items: [condense-mirrorlogs.cron]
  tags:
    - web-data
    - cron
		`@ -1 +0,0 @@`
			`0 07 * * * root /usr/local/bin/condense-hotspot.sh`