diff --git a/roles/web-data-analysis/files/condense-hotspot.cron b/roles/web-data-analysis/files/condense-hotspot.cron deleted file mode 100644 index b4fe6d07c1..0000000000 --- a/roles/web-data-analysis/files/condense-hotspot.cron +++ /dev/null @@ -1 +0,0 @@ -0 07 * * * root /usr/local/bin/condense-hotspot.sh diff --git a/roles/web-data-analysis/files/condense-hotspot.sh b/roles/web-data-analysis/files/condense-hotspot.sh deleted file mode 100644 index 5ae1c06542..0000000000 --- a/roles/web-data-analysis/files/condense-hotspot.sh +++ /dev/null @@ -1,88 +0,0 @@ -#!/bin/bash - -# This file is part of Fedora Project Infrastructure Ansible -# Repository. -# -# Fedora Project Infrastructure Ansible Repository is free software: -# you can redistribute it and/or modify it under the terms of the GNU -# General Public License as published by the Free Software Foundation, -# either version 3 of the License, or (at your option) any later -# version. -# -# Fedora Project Infrastructure Ansible Repository is distributed in -# the hope that it will be useful, but WITHOUT ANY WARRANTY; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR A -# PARTICULAR PURPOSE. See the GNU General Public License for more -# details. -# -# You should have received a copy of the GNU General Public License -# along with Fedora Project Infrastructure Ansible Repository. If -# not, see . - -# There is a multiday delay involved in processing the logs. It -# may take up to 4 days to get the logs to the main-server. It may -# take a day to combine all the logs onto combined-httpd. So we assume -# we are 5 days behind. - -let NUMDAYS=5 -let OLDDAYS=$(( $NUMDAYS+1 )) - -PROJECT=hotspot -WEBLOG=fedoraproject.org - -# This is the year/month/day for a N days ago. -YEAR=$(/bin/date -d "-${NUMDAYS} days" +%Y) -MONTH=$(/bin/date -d "-${NUMDAYS} days" +%m) -DAY=$(/bin/date -d "-${NUMDAYS} days" +%d) - -# And we have have to deal with year/month/day boundaries for our later grep. -OLDDATE=$(/bin/date -d "-${OLDDAYS} days" +%Y-%m-%d) -OLDYEAR=$(/bin/date -d "-${OLDDAYS} days" +%Y) - -NFSDIR=/mnt/fedora_stats/combined-http -TARGET=${NFSDIR}/${YEAR}/${MONTH}/${DAY} - -LOGFILE=${TARGET}/${WEBLOG}-access.log - -WORKDIR=/mnt/fedora_stats/data/${PROJECT} -WORKFILE=${WORKDIR}/${YEAR}/${MONTH}/raw-${DAY} - -WEBDIR=/var/www/html/csv-reports/${PROJECT} - -TEMPDIR=$( mktemp -d /tmp/web-data-analysis.XXXXXXXXX ) - -LBIN=/usr/local/bin/ -LSHARE=/usr/local/share/web-data-analysis - -mkdir -p ${WORKDIR}/${YEAR}/${MONTH} -if [[ ! -f ${WORKDIR}/${YEAR}/out-${YEAR}-${MONTH} ]]; then - touch ${WORKDIR}/${YEAR}/out-${YEAR}-${MONTH} -fi - -if [[ ! -f ${WORKDIR}/out-${YEAR} ]]; then - touch ${WORKDIR}/out-${YEAR} -fi - -if [[ ! -f ${LOGFILE} ]]; then - echo "No logfile found for ${YEAR}/${MONTH}/${DAY}. Please fix." -else - awk -f ${LSHARE}/${PROJECT}.awk ${LOGFILE} > ${WORKFILE} -fi - -# So the data isn't strictly across month boundries due to the end of -# the logfiles being at 04:00 versus 23:59. Also log files might get -# stuck and you end up with days or weeks of data in a single -# file. Because the data is pretty small we can get away with adding up the data every day. - -find ${WORKDIR} -type f | grep raw- | xargs cat | sort -u | awk 'BEGIN{x=0; y=0}; {if (x != $1){ print x,y; x=$1; y=$2} else {y=y+$2}}' > ${WORKDIR}/worked-all - - -awk -f ${LSHARE}/${PROJECT}-data.awk ${WORKDIR}/worked-all | grep -v "1970-01-01,0,0,0" | sort -u > ${WEBDIR}/${PROJECT}data-all.csv - -# Make the seven day moving average file -#/usr/local/bin/hotspot-moving_avg.py > ${WEBDIR}/${PROJECT}data-all-7day-ma.csv - -gnuplot ${LSHARE}/${PROJECT}.gp - -# cleanup the temp data -rm -rf ${TEMPDIR} diff --git a/roles/web-data-analysis/files/hotspot.awk b/roles/web-data-analysis/files/hotspot.awk deleted file mode 100644 index f47da6958e..0000000000 --- a/roles/web-data-analysis/files/hotspot.awk +++ /dev/null @@ -1,95 +0,0 @@ -# -# Take the apache log line -# 83.163.161.147 - - [30/Sep/2012:13:54:19 +0000] "GET /static/hotspot.txt HTTP/1.1" 200 3 "-" "dnssec-trigger/0.11" -# Convert to -# 1349013000 1 - -function convertdate(str) { - gsub(/\[/, "", str) - gsub(/\]/, "", str) - split(str,a,":"); - split(a[1],b,"/"); - temp=""; - switch (b[2]) { - case "Jan": - temp="01" - break; - case "Feb": - temp="02" - break; - case "Mar": - temp="03" - break; - case "Apr": - temp="04" - break; - case "May": - temp="05" - break; - case "Jun": - temp="06" - break; - case "Jul": - temp="07" - break; - case "Aug": - temp="08" - break; - case "Sep": - temp="09" - break; - case "Oct": - temp="10" - break; - case "Nov": - temp="11" - break; - case "Dec": - temp="12" - break; - default: - temp="00" - break; - } - x=b[3]" "temp" "b[1]" "a[2]" "a[3] " "a[4] - y=int(mktime(x)/300) # 300 seconds make 5 minutes (I NEED A GLOBAL VAR) - return y -} - - -BEGIN{ - timestamp=0; - num_ts = 0; - ts_hotspots=0; - total_hotsponts=0; -} - -# -# We assume that every 300 seconds a system will log in at least 1 -# time because the Networkmanager addon does so. -# Convert our date stamp to the nearest 5 minute block and add data to -# it. If the log file goes backwards or jumps etc this will mean -# multiple outputs for a timestamp. A later process will need to deal -# with that. All this will do is output how many it saw at that block -# in the log file. -# - -$7 ~/hotspot.txt/ && $6 ~/GET/ { - date = convertdate($4) - if (timestamp != date) { - num_ts = num_ts +1; - print (timestamp*300),ts_hotspots # GLOBAL VAR GOES HERE - timestamp = date; - ts_hotspots = 1; - } else { - ts_hotspots = ts_hotspots +1; - total_hotspots = total_hotspots +1; - } -} - -END { - num_ts = num_ts +1; - print int(timestamp*300),ts_hotspots # LOOK GLOBAL VAR AGAIN -} - -## END OF FILE diff --git a/roles/web-data-analysis/files/hotspot.gp b/roles/web-data-analysis/files/hotspot.gp deleted file mode 100644 index 948fcd48ba..0000000000 --- a/roles/web-data-analysis/files/hotspot.gp +++ /dev/null @@ -1,5 +0,0 @@ -"] \ -# '/var/www/html/csv-reports/hotspot/hotspotdata-all-7day-ma.csv' using 1:2 title 'Average every 5min' with lines lw 4, \ -# '/var/www/html/csv-reports/hotspot/hotspotdata-all-7day-ma.csv' using 1:3 title 'Least 5min' with lines lw 4, \ -# '/var/www/html/csv-reports/hotspot/hotspotdata-all-7day-ma.csv' using 1:4 title 'Max 5min' with lines lw 4 -# unset output diff --git a/roles/web-data-analysis/tasks/main.yml b/roles/web-data-analysis/tasks/main.yml index f32e1c427d..c58bf16a20 100644 --- a/roles/web-data-analysis/tasks/main.yml +++ b/roles/web-data-analysis/tasks/main.yml @@ -17,7 +17,7 @@ - name: make the data subdirs file: path=/mnt/fedora_stats/data/{{item}} state=directory mode=0755 - with_items: [hotspot, mirrors] + with_items: [mirrors] tags: - web-data @@ -45,7 +45,7 @@ - name: make the web subdirs file: path=/var/www/html/csv-reports/{{item}} state=directory mode=0755 - with_items: [images, hotspot, mirrors] + with_items: [images, mirrors] tags: - web-data @@ -56,7 +56,7 @@ - name: scripts to condense data down for further processing copy: src={{item}} dest=/usr/local/bin/ mode=0755 - with_items: [condense-mirrorlogs.sh, condense-hotspot.sh ] + with_items: [condense-mirrorlogs.sh ] tags: - web-data @@ -68,19 +68,19 @@ - name: awk files for csv creation copy: src={{item}} dest=/usr/local/share/web-data-analysis mode=0644 - with_items: [mirrors-data.awk, hotspot-data.awk, hotspot.awk ] + with_items: [mirrors-data.awk, ] tags: - web-data - name: gnuplot file for image creation copy: src={{item}} dest=/usr/local/share/web-data-analysis mode=0644 - with_items: [ mirrors-data.gp, hotspot.gp ] + with_items: [ mirrors-data.gp] tags: - web-data - name: daily cron file to run the log files copy: src={{item}} dest=/etc/cron.d/ mode=0644 - with_items: [condense-mirrorlogs.cron, condense-hotspot.cron] + with_items: [condense-mirrorlogs.cron] tags: - web-data - cron