ansible/roles/web-data-analysis/files/combineHttpLogs.sh
Nils Philippsen aac912e4c9 Set nullglob for combining HTTP log files
This can cause files named '*' in places where you wouldn’t expect it
otherwise.

Signed-off-by: Nils Philippsen <nils@redhat.com>
2023-11-09 20:42:31 +00:00

132 lines
4.5 KiB
Bash
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
# This file is part of Fedora Project Infrastructure Ansible
# Repository.
#
# Fedora Project Infrastructure Ansible Repository is free software:
# you can redistribute it and/or modify it under the terms of the GNU
# General Public License as published by the Free Software Foundation,
# either version 3 of the License, or (at your option) any later
# version.
#
# Fedora Project Infrastructure Ansible Repository is distributed in
# the hope that it will be useful, but WITHOUT ANY WARRANTY; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE. See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License
# along with Fedora Project Infrastructure Ansible Repository. If
# not, see <http://www.gnu.org/licenses/>.
# Dont create files named '*'.
shopt -s nullglob
export MSGTOPIC_PREFIX=logging.stats
export MSGBODY_PRESET="loghost=$(hostname) run_id=$(uuidgen -r)"
simple_message_to_bus combinehttplogs.start
# Some constants / standard paths
LOGDIR=/var/log/hosts
NFSDIR=/mnt/fedora_stats/combined-http
LOGMERGE=/usr/share/awstats/tools/logresolvemerge.pl
# Because sync-http may not get all logs immediately, we look back
# a couple days to find the "latest" logs to merge.
LATEST_LOG_DATE="2 days ago"
# Funtion to parse a DATE_STR and print YYYY/MM/DD
ymd() { date -d "$*" +%Y/%m/%d; }
# Get YYYY/MM/DD for LATEST_LOG_DATE, for later use
LATEST_YMD=$(ymd $LATEST_LOG_DATE)
# Prints usage. Also serves as docs for anyone reading the source (hi there!)
usage() {
cat <<__USAGE__
usage: $0 [DATE_STR]
combine daily logs from $LOGDIR to $NFSDIR.
Default date is '$LATEST_LOG_DATE' (currently $LATEST_YMD).
DATE_STR may be any date older than that, in any format understood by date(1):
"June 9"
"2020-06-23 -2weeks"
__USAGE__
}
# Check CLI args to set LOG_DATE
case $# in
0) LOG_DATE="$LATEST_LOG_DATE"; UPDATE_LATEST=1 ;;
1) [ "$1" == "-h" -o "$1" == "--help" ] && usage && exit 0
LOG_DATE="$1" ;;
*) usage; exit 2 ;;
esac
# Parse LOG_DATE
YMD=$(ymd $LOG_DATE) || exit 2
# Safety check for dates that are too new for us to handle.
# (Also catches weird "dates" that date(1) allows, like '' or '0' or 'wet')
if [[ "$YMD" > "$LATEST_YMD" ]]; then
echo "$0: error: DATE_STR '$LOG_DATE' ($YMD) newer than LATEST_LOG_DATE ($LATEST_YMD)" >&2
exit 3
fi
# Okay we're good. Set paths, make directories, and do some merging!
PROXYLOG=${LOGDIR}/proxy*/${YMD}/http/
DL_LOG=${LOGDIR}/dl*/${YMD}/http/
PEOPLE=${LOGDIR}/people*/${YMD}/http/
TARGET=${NFSDIR}/${YMD}
mkdir -p ${TARGET}
##
## Merge the Proxies
FILES=$( ls -1 ${PROXYLOG}/*access.log.xz | awk '{x=split($0,a,"/"); print a[x]}' | sort -u )
for FILE in ${FILES}; do
TEMP=$(echo ${FILE} | sed 's/\.xz$//')
simple_message_to_bus combinehttplogs.logmerge.proxy.start log="$PROXYLOG" file="$FILE" target="$TARGET" temp="$TEMP"
perl ${LOGMERGE} ${PROXYLOG}/${FILE} > ${TARGET}/${TEMP}
simple_message_to_bus combinehttplogs.logmerge.proxy.finish log="$PROXYLOG" file="$FILE" target="$TARGET" temp="$TEMP" result="$?"
done
##
## Merge the Downloads
FILES=$( ls -1 ${DL_LOG}/dl*access.log.xz | awk '{x=split($0,a,"/"); print a[x]}' | sort -u )
for FILE in ${FILES}; do
TEMP=$(echo ${FILE} | sed 's/\.xz$//')
simple_message_to_bus combinehttplogs.logmerge.download.start proxylog="$DL_LOG" file="$FILE" target="$TARGET" temp="$TEMP"
perl ${LOGMERGE} ${DL_LOG}/${FILE} > ${TARGET}/${TEMP}
simple_message_to_bus combinehttplogs.logmerge.download.finish proxylog="$DL_LOG" file="$FILE" target="$TARGET" temp="$TEMP" result="$?"
done
##
## Merge the People
##
## Merge the Downloads
FILES=$( ls -1 ${PEOPLE}/fedora*access.log.xz | awk '{x=split($0,a,"/"); print a[x]}' | sort -u )
for FILE in ${FILES}; do
TEMP=$(echo ${FILE} | sed 's/\.xz$//')
simple_message_to_bus combinehttplogs.logmerge.people.start proxylog="$PEOPLE" file="$FILE" target="$TARGET" temp="$TEMP"
perl ${LOGMERGE} ${PEOPLE}/${FILE} > ${TARGET}/${TEMP}
simple_message_to_bus combinehttplogs.logmerge.people.finish proxylog="$PEOPLE" file="$FILE" target="$TARGET" temp="$TEMP" result="$?"
done
# Now we link up the files into latest directory
# 1. make sure the latest directory exists
# 2. go into it.
# 3. remove the old links
# 4. link up all the files we merged over
if [[ "$UPDATE_LATEST" && -d ${NFSDIR}/latest ]]; then
pushd ${NFSDIR}/latest &> /dev/null
/bin/rm -f *
for file in ../${YMD}/*; do
ln -s ${file} .
done
popd &> /dev/null
fi
simple_message_to_bus combinehttplogs.finish