and we have the initial items for a getfedora daily stats
This commit is contained in:
parent
5ddbf54811
commit
bcb3611abb
7 changed files with 348 additions and 15 deletions
|
@ -0,0 +1 @@
|
|||
0 07 * * * root /usr/local/bin/condense-getfedoralogs.sh
|
90
roles/web-data-analysis/files/condense-getfedoralogs.sh
Normal file
90
roles/web-data-analysis/files/condense-getfedoralogs.sh
Normal file
|
@ -0,0 +1,90 @@
|
|||
#!/bin/bash
|
||||
|
||||
# This file is part of Fedora Project Infrastructure Ansible
|
||||
# Repository.
|
||||
#
|
||||
# Fedora Project Infrastructure Ansible Repository is free software:
|
||||
# you can redistribute it and/or modify it under the terms of the GNU
|
||||
# General Public License as published by the Free Software Foundation,
|
||||
# either version 3 of the License, or (at your option) any later
|
||||
# version.
|
||||
#
|
||||
# Fedora Project Infrastructure Ansible Repository is distributed in
|
||||
# the hope that it will be useful, but WITHOUT ANY WARRANTY; without
|
||||
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
|
||||
# PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
# details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with Fedora Project Infrastructure Ansible Repository. If
|
||||
# not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
# There is a multiday delay involved in processing the logs. It
|
||||
# may take up to 4 days to get the logs to the main-server. It may
|
||||
# take a day to combine all the logs onto combined-httpd. So we assume
|
||||
# we are 5 days behind.
|
||||
|
||||
let NUMDAYS=5
|
||||
let OLDDAYS=$(( $NUMDAYS+1 ))
|
||||
|
||||
PROJECT=getfedora
|
||||
WEBLOG=${PROJECT}.org
|
||||
|
||||
# This is the year/month/day for a N days ago.
|
||||
YEAR=$(/bin/date -d "-${NUMDAYS} days" +%Y)
|
||||
MONTH=$(/bin/date -d "-${NUMDAYS} days" +%m)
|
||||
DAY=$(/bin/date -d "-${NUMDAYS} days" +%d)
|
||||
|
||||
# And we have have to deal with year/month/day boundaries for our later grep.
|
||||
OLDDATE=$(/bin/date -d "-${OLDDAYS} days" +%Y-%m-%d)
|
||||
OLDYEAR=$(/bin/date -d "-${OLDDAYS} days" +%Y)
|
||||
|
||||
NFSDIR=/mnt/fedora_stats/combined-http
|
||||
TARGET=${NFSDIR}/latest
|
||||
|
||||
LOGFILE=${TARGET}/${WEBLOG}-access.log
|
||||
|
||||
WORKDIR=/mnt/fedora_stats/data/${PROJECT}
|
||||
WORKFILE=${WORKDIR}/${YEAR}/${MONTH}/worked-${DAY}
|
||||
|
||||
WEBDIR=/var/www/html/csv-reports/${PROJECT}
|
||||
|
||||
TEMPDIR=$( mktemp -d /tmp/web-data-analysis.XXXXXXXXX )
|
||||
|
||||
LBIN=/usr/local/bin/
|
||||
LSHARE=/usr/local/share/web-data-analysis
|
||||
|
||||
mkdir -p ${WORKDIR}/${YEAR}/${MONTH}
|
||||
|
||||
if [[ ! -f ${LOGFILE} ]]; then
|
||||
echo "No logfile found for ${YEAR}/${MONTH}/${DAY}. Please fix."
|
||||
elif [[ -f ${WORKFILE} ]]; then
|
||||
echo "The workfile for ${YEAR}/${MONTH}/${DAY} already existed."
|
||||
else
|
||||
egrep -iv 'slurp|bot|yandex|spider|crawler|check_http' ${LOGFILE} | awk -f ${LSHARE}/${PROJECT}.awk > ${WORKFILE}
|
||||
sort -o ${WORKFILE} -u ${WORKFILE}
|
||||
fi
|
||||
|
||||
# So the data isn't strictly across month boundries due to the end of
|
||||
# the logfiles being at 04:00 versus 23:59. Also log files might get
|
||||
# stuck and you end up with days or weeks of data in a single
|
||||
# file. Doing a continual sort clears up that.
|
||||
|
||||
sort -o ${WORKDIR}/${YEAR}/out-${YEAR}-${MONTH} -S 4G -u -m ${WORKDIR}/${YEAR}/out-${YEAR}-${MONTH} ${WORKFILE}
|
||||
sort -o ${WORKDIR}/out-${YEAR} -S 4G -u -m ${WORKDIR}/${YEAR}/out-${YEAR}-${MONTH}
|
||||
|
||||
# Because the logs stop at 04:00 we can only get 24 hours from 6 days before.
|
||||
egrep "${OLDDATE}" ${WORKDIR}/out-${OLDYEAR} > ${TEMPDIR}/watched-day
|
||||
|
||||
# Grab the data and put it in the two files. This makes it a lot
|
||||
# faster to process as a whole year may take an hour to go through.
|
||||
|
||||
for i in ${OLDYEAR} all; do
|
||||
awk -f ${LSHARE}/${PROJECT}-data.awk ${TEMPDIR}/watched-day >> ${WEBDIR}/${PROJECT}data-${i}.csv
|
||||
sort -o ${WEBDIR}/mirrordata-${i}.csv -u ${WEBDIR}/${PROJECT}data-${i}.csv
|
||||
done
|
||||
|
||||
gnuplot ${LSHARE}/${PROJECT}-data.gp
|
||||
|
||||
# cleanup the temp data
|
||||
rm -rf ${TEMPDIR}
|
|
@ -27,6 +27,9 @@
|
|||
let NUMDAYS=5
|
||||
let OLDDAYS=$(( $NUMDAYS+1 ))
|
||||
|
||||
PROJECT=getfedora
|
||||
WEBLOG=${PROJECT}.org
|
||||
|
||||
# This is the year/month/day for a N days ago.
|
||||
YEAR=$(/bin/date -d "-${NUMDAYS} days" +%Y)
|
||||
MONTH=$(/bin/date -d "-${NUMDAYS} days" +%m)
|
||||
|
@ -39,15 +42,17 @@ OLDYEAR=$(/bin/date -d "-${OLDDAYS} days" +%Y)
|
|||
NFSDIR=/mnt/fedora_stats/combined-http
|
||||
TARGET=${NFSDIR}/latest
|
||||
|
||||
LOGFILE=${TARGET}/mirrors.fedoraproject.org-access.log
|
||||
LOGFILE=${TARGET}/${WEBLOG}-access.log
|
||||
|
||||
WORKDIR=/mnt/fedora_stats/data/mirrors
|
||||
WORKDIR=/mnt/fedora_stats/data/${PROJECT}
|
||||
WORKFILE=${WORKDIR}/${YEAR}/${MONTH}/worked-${DAY}
|
||||
|
||||
WEBDIR=/var/www/html/csv-reports/mirrors
|
||||
WEBDIR=/var/www/html/csv-reports/${PROJECT}
|
||||
|
||||
TEMPDIR=$( mktemp -d /tmp/web-data-analysis.XXXXXXXXX )
|
||||
|
||||
LBIN=/usr/local/bin/
|
||||
LSHARE=/usr/local/share/web-data-analysis
|
||||
|
||||
mkdir -p ${WORKDIR}/${YEAR}/${MONTH}
|
||||
|
||||
|
@ -56,7 +61,7 @@ if [[ ! -f ${LOGFILE} ]]; then
|
|||
elif [[ -f ${WORKFILE} ]]; then
|
||||
echo "The workfile for ${YEAR}/${MONTH}/${DAY} already existed."
|
||||
else
|
||||
/usr/local/bin/mirrorlist.py -o ${WORKFILE} ${LOGFILE};
|
||||
${LBIN}/mirrorlist.py -o ${WORKFILE} ${LOGFILE};
|
||||
fi
|
||||
|
||||
# So the data isn't strictly across month boundries due to the end of
|
||||
|
@ -74,11 +79,11 @@ egrep "${OLDDATE}" ${WORKDIR}/out-${OLDYEAR} > ${TEMPDIR}/watched-day
|
|||
# faster to process as a whole year may take an hour to go through.
|
||||
|
||||
for i in ${OLDYEAR} all; do
|
||||
awk -f /usr/local/share/web-data-analysis/mirror-data.awk ${TEMPDIR}/watched-day >> ${WEBDIR}/mirrordata-${i}.csv
|
||||
sort -o ${WEBDIR}/mirrordata-${i}.csv -u ${WEBDIR}/mirrordata-${i}.csv
|
||||
awk -f ${LSHARE}/${PROJECT}-data.awk ${TEMPDIR}/watched-day >> ${WEBDIR}/${PROJECT}data-${i}.csv
|
||||
sort -o ${WEBDIR}/mirrordata-${i}.csv -u ${WEBDIR}/${PROJECT}data-${i}.csv
|
||||
done
|
||||
|
||||
gnuplot /usr/local/share/web-data-analysis/mirror-data.gp
|
||||
gnuplot ${LSHARE}/${PROJECT}-data.gp
|
||||
|
||||
# cleanup the temp data
|
||||
rm -rf ${TEMPDIR}
|
||||
|
|
106
roles/web-data-analysis/files/getfedora-data.awk
Normal file
106
roles/web-data-analysis/files/getfedora-data.awk
Normal file
|
@ -0,0 +1,106 @@
|
|||
|
||||
|
||||
BEGIN{
|
||||
olddate="1970-01-01"
|
||||
dt = 0;
|
||||
#edition
|
||||
atomic=0;
|
||||
cloud=0;
|
||||
server=0;
|
||||
workstation=0;
|
||||
unk_edt=0;
|
||||
# release
|
||||
f21=0;
|
||||
f22=0;
|
||||
f23=0;
|
||||
f24=0;
|
||||
f25=0;
|
||||
f26=0;
|
||||
f27=0;
|
||||
f28=0;
|
||||
f29=0;
|
||||
unk_rel=0;
|
||||
# arch
|
||||
arm_32=0;
|
||||
arm_64=0;
|
||||
x86_32=0;
|
||||
x86_64=0;
|
||||
ppc_le=0;
|
||||
ppc_he=0;
|
||||
s390x=0;
|
||||
unk_arc=0;
|
||||
# additional data
|
||||
netinstall=0;
|
||||
print olddate ",dt,atomic,cloud,server,workstation,unk_edt,f21,f22,f23,f24,f25,f26,f27,f28,f29,unk_rel,arm_32,arm_64,ppc_le,ppc_he,s390x,x86_32,x86_64,unk_arc,netinstall"
|
||||
}
|
||||
|
||||
{
|
||||
if ($1 == olddate) {
|
||||
if (($3 ~/\.x86_64\./) || ($3 ~/-x86_64-/)) { x86_64 = x86_64 +1; }
|
||||
else if (($3 ~/\.i686\./) || ($3 ~/-i686-/) || ($3 ~/\.i386\./) || ($3 ~/-i386-/)) { x86_32 = x86_32 +1; }
|
||||
else if (($3 ~/\.armhfp\./) || ($3 ~/-armhfp-/)){ arm_32 = arm_32 +1; }
|
||||
else if (($3 ~/\.aarch64\./) || ($3 ~/-aarch64-/)){ arm_64= arm_64 +1; }
|
||||
else if (($3 ~/\.ppc64le\./) || ($3 ~/-ppc64le-/)){ ppc_le = ppc_le +1; }
|
||||
else if (($3 ~/\.ppc64\./) || ($3 ~/-ppc64-/)){ ppc_he = ppc_he +1; }
|
||||
else if (($3 ~/\.s390x\./) || ($3 ~/-s390x-/)){ s390x = s390x +1; }
|
||||
else { unk_arc = unk_arc +1 };
|
||||
|
||||
if (($3 ~/-21\./) || ($3 ~/-21-/)) { f21 = f21 + 1 }
|
||||
else if (($3 ~/-22\./) || ($3 ~/-22-/)) { f22 = f22 + 1 }
|
||||
else if (($3 ~/-23\./) || ($3 ~/-23-/)) { f23 = f23 + 1 }
|
||||
else if (($3 ~/-24\./) || ($3 ~/-24-/)) { f24 = f24 + 1 }
|
||||
else if (($3 ~/-25\./) || ($3 ~/-25-/)) { f25 = f25 + 1 }
|
||||
else if (($3 ~/-26\./) || ($3 ~/-26-/)) { f26 = f26 + 1 }
|
||||
else if (($3 ~/-27\./) || ($3 ~/-27-/)) { f27 = f27 + 1 }
|
||||
else if (($3 ~/-28\./) || ($3 ~/-28-/)) { f28 = f28 + 1 }
|
||||
else if (($3 ~/-29\./) || ($3 ~/-29-/)) { f29 = f29 + 1 }
|
||||
else {unk_rel = unk_rel +1 }
|
||||
|
||||
if (($3 ~/Cloud-Atomic/) || ($3 ~/Cloud_Atomic/)) { atomic = atomic +1 ; dt = dt +1 }
|
||||
else if (($3 ~/Cloud-Base/) || ($3 ~/Cloud_Base/)) { cloud = cloud +1 ; dt = dt +1 }
|
||||
else if (($3 ~/Server-DVD/) || ($3 ~/Server_DVD/)) { server = server +1; dt = dt +1 }
|
||||
else if (($3 ~/Server-netinst/) || ($3 ~/Server_netinst/)) { server = server +1; netinstall = netinstall +1 ; dt = dt +1 }
|
||||
else if (($3 ~/Workstation-netinst/) || ($3 ~/Workstation_netinst/)) { workstation = workstation +1; netinstall = netinstall +1; dt = dt +1 }
|
||||
else if (($3 ~/Live-Workstation/) || ($3 ~/Live_Workstation/)) { workstation = workstation +1; dt = dt +1 }
|
||||
else { unk_edt = unk_edt + 1; dt = dt +1}
|
||||
|
||||
} else {
|
||||
print olddate "," dt "," atomic "," cloud "," server "," workstation "," unk_edt "," f21 "," f22 "," f23 "," f24 "," f25 "," f26 "," f27 "," f28 "," f29 "," unk_rel "," arm_32 "," arm_64 "," ppc_le "," ppc_he "," s390x "," x86_32 "," x86_64 "," unk_arc "," netinstall
|
||||
olddate=$1
|
||||
dt = 0;
|
||||
#edition
|
||||
atomic=0;
|
||||
cloud=0;
|
||||
server=0;
|
||||
workstation=0;
|
||||
unk_edt=0;
|
||||
# release
|
||||
f21=0;
|
||||
f22=0;
|
||||
f23=0;
|
||||
f24=0;
|
||||
f25=0;
|
||||
f26=0;
|
||||
f27=0;
|
||||
f28=0;
|
||||
f29=0;
|
||||
unk_rel=0;
|
||||
# arch
|
||||
arm_32=0;
|
||||
arm_64=0;
|
||||
x86_32=0;
|
||||
x86_64=0;
|
||||
ppc_le=0;
|
||||
ppc_he=0;
|
||||
s390x=0;
|
||||
unk_arc=0;
|
||||
# additional data
|
||||
netinstall=0;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
END {
|
||||
print olddate "," dt "," atomic "," cloud "," server "," workstation "," unk_edt "," f21 "," f22 "," f23 "," f24 "," f25 "," f26 "," f27 "," f28 "," f29 "," unk_rel "," arm_32 "," arm_64 "," ppc_le "," ppc_he "," s390x "," x86_32 "," x86_64 "," unk_arc "," netinstall
|
||||
}
|
||||
|
53
roles/web-data-analysis/files/getfedora-data.gp
Normal file
53
roles/web-data-analysis/files/getfedora-data.gp
Normal file
|
@ -0,0 +1,53 @@
|
|||
set grid
|
||||
set xdata time
|
||||
set format x "%Y-%m-%d"
|
||||
set timefmt "%Y-%m-%d"
|
||||
|
||||
set datafile separator ","
|
||||
set term png size 1600,1200
|
||||
|
||||
##
|
||||
set output "getfedora-editions.png"
|
||||
set title "Daily Editions Total"
|
||||
plot ["2014-12-03":"2016-01-18"] \
|
||||
'getfedora-all.csv' using 1:2 title 'Total number' with lines lw 4,\
|
||||
'getfedora-all.csv' using 1:3 title 'Atomic' with lines lw 4,\
|
||||
'getfedora-all.csv' using 1:4 title 'Cloud' with lines lw 4,\
|
||||
'getfedora-all.csv' using 1:5 title 'Server' with lines lw 4,\
|
||||
'getfedora-all.csv' using 1:6 title 'Workstation' with lines lw 4,\
|
||||
'getfedora-all.csv' using 1:26 title 'Netinstall' with lines lw 4,\
|
||||
'getfedora-all.csv' using 1:7 title 'Unknown' with lines lw 4
|
||||
unset output
|
||||
|
||||
##
|
||||
set output "getfedora-versions.png"
|
||||
set title "Daily Version Totals"
|
||||
plot ["2015-01-01":"2015-12-31"] \
|
||||
'getfedora-all.csv' using 1:2 title 'Total number' with lines lw 4,\
|
||||
'getfedora-all.csv' using 1:8 title 'Fedora-21' with lines lw 4,\
|
||||
'getfedora-all.csv' using 1:9 title 'Fedora-22' with lines lw 4,\
|
||||
'getfedora-all.csv' using 1:10 title 'Fedora-23' with lines lw 4,\
|
||||
'getfedora-all.csv' using 1:11 title 'Fedora-24' with lines lw 4,\
|
||||
'getfedora-all.csv' using 1:12 title 'Fedora-25' with lines lw 4,\
|
||||
'getfedora-all.csv' using 1:13 title 'Fedora-26' with lines lw 4,\
|
||||
'getfedora-all.csv' using 1:14 title 'Fedora-27' with lines lw 4,\
|
||||
'getfedora-all.csv' using 1:15 title 'Fedora-28' with lines lw 4,\
|
||||
'getfedora-all.csv' using 1:16 title 'Fedora-29' with lines lw 4,\
|
||||
'getfedora-all.csv' using 1:17 title 'Unknown' with lines lw 4
|
||||
unset output
|
||||
|
||||
##
|
||||
set output "getfedora-arch.png"
|
||||
set title "Daily Architectures Totals"
|
||||
plot ["2015-01-01":"2015-12-31"] \
|
||||
'getfedora-all.csv' using 1:2 title 'Total number' with lines lw 4,\
|
||||
'getfedora-all.csv' using 1:18 title 'arm_32' with lines lw 4,\
|
||||
'getfedora-all.csv' using 1:19 title 'arm_64' with lines lw 4,\
|
||||
'getfedora-all.csv' using 1:20 title 'ppc_le' with lines lw 4,\
|
||||
'getfedora-all.csv' using 1:21 title 'ppc_he' with lines lw 4,\
|
||||
'getfedora-all.csv' using 1:22 title 's390x' with lines lw 4,\
|
||||
'getfedora-all.csv' using 1:23 title 'x86_32' with lines lw 4,\
|
||||
'getfedora-all.csv' using 1:24 title 'x86_64' with lines lw 4,\
|
||||
'getfedora-all.csv' using 1:25 title 'unknown' with lines lw 4
|
||||
unset output
|
||||
|
74
roles/web-data-analysis/files/getfedora.awk
Normal file
74
roles/web-data-analysis/files/getfedora.awk
Normal file
|
@ -0,0 +1,74 @@
|
|||
#
|
||||
# Take the apache log line
|
||||
# 123.115.133.104 - - [01/Jan/2015:04:02:01 +0000] "GET /zh_CN/server/download/server-download-splash?file=http://download.fedoraproject.org/pub/fedora/linux/releases/21/Server/i386/iso/Fedora-Server-DVD-i386-21.iso HTTP/1.1" 200 4355 "https://getfedora.org/zh_CN/server/download/" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36"
|
||||
# Convert to
|
||||
# 2012-10-30:13:5 123.115.133.104 Fedora-Server-DVD-i386-21.iso
|
||||
|
||||
function convertdate(str) {
|
||||
gsub(/\[/, "", str)
|
||||
split(str,a,":");
|
||||
split(a[1],b,"/");
|
||||
temp="";
|
||||
switch (b[2]) {
|
||||
case "Jan":
|
||||
temp="01"
|
||||
break;
|
||||
case "Feb":
|
||||
temp="02"
|
||||
break;
|
||||
case "Mar":
|
||||
temp="03"
|
||||
break;
|
||||
case "Apr":
|
||||
temp="04"
|
||||
break;
|
||||
case "May":
|
||||
temp="05"
|
||||
break;
|
||||
case "Jun":
|
||||
temp="06"
|
||||
break;
|
||||
case "Jul":
|
||||
temp="07"
|
||||
break;
|
||||
case "Aug":
|
||||
temp="08"
|
||||
break;
|
||||
case "Sep":
|
||||
temp="09"
|
||||
break;
|
||||
case "Oct":
|
||||
temp="10"
|
||||
break;
|
||||
case "Nov":
|
||||
temp="11"
|
||||
break;
|
||||
case "Dec":
|
||||
temp="12"
|
||||
break;
|
||||
default:
|
||||
temp="00"
|
||||
break;
|
||||
}
|
||||
foo=substr(a[3],1,1);
|
||||
return b[3]"-"temp"-"b[1]
|
||||
}
|
||||
|
||||
function getimage(str) {
|
||||
if (str ~/=/) {
|
||||
split(str,a,"=");
|
||||
x=split(a[2],b,"/");
|
||||
return b[x]
|
||||
} else {
|
||||
x=split(str,b,"/");
|
||||
return b[x]
|
||||
}
|
||||
}
|
||||
|
||||
$7 ~/\.qcow2$|\.iso$|\.raw\.xz$|\.box$/ && $6 ~/GET/ && $9 ~/302|200/ {
|
||||
date = convertdate($4)
|
||||
iso = getimage($7)
|
||||
ip = $1
|
||||
print date, ip, iso
|
||||
}
|
||||
|
|
@ -32,28 +32,32 @@
|
|||
tags:
|
||||
- web-data
|
||||
|
||||
- name: script to run the daily mirror log condenser
|
||||
copy: src=condense-mirrorlogs.sh dest=/usr/local/bin/ mode=0755
|
||||
- name: scripts to condense data down for further processing
|
||||
copy: src={{item}} dest=/usr/local/bin/ mode=0755
|
||||
with_items: [condense-mirrorlogs.sh, condense-getfedoralogs.sh]
|
||||
tags:
|
||||
- web-data
|
||||
|
||||
- name: python script to calculate mirrorlist
|
||||
- name: python scripts to calculate various data
|
||||
copy: src=mirrorlist.py dest=/usr/local/bin mode=0755
|
||||
tags:
|
||||
- web-data
|
||||
|
||||
- name: awk file for csv creation
|
||||
copy: src=mirror-data.awk dest=/usr/local/share/web-data-analysis mode=0644
|
||||
- name: awk files for csv creation
|
||||
copy: src={{item}} dest=/usr/local/share/web-data-analysis mode=0644
|
||||
with_items: [mirror-data.awk, getfedora-data.awk]
|
||||
tags:
|
||||
- web-data
|
||||
|
||||
- name: gnuplot file for csv creation
|
||||
copy: src=mirror-data.gp dest=/usr/local/share/web-data-analysis mode=0644
|
||||
- name: gnuplot file for image creation
|
||||
copy: src={{item} dest=/usr/local/share/web-data-analysis mode=0644
|
||||
with_items: [ mirror-data.gp, getfedora-data.gp ]
|
||||
tags:
|
||||
- web-data
|
||||
|
||||
- name: daily cron file to run the log files
|
||||
copy: src=condense-mirrorlogs.cron dest=/etc/cron.d/ mode=0644
|
||||
copy: src={{item}} dest=/etc/cron.d/ mode=0644
|
||||
with_items: [condense-mirrorlogs.cron, condense-getfedoralogs.cron]
|
||||
tags:
|
||||
- web-data
|
||||
- cron
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue