roles/awstats/files/combineHttpLogs.sh: accept DATE_STR

This commit tweaks combineHttpLogs.sh so that you can provide an
optional DATE_STR on the CLI to have it run for a date other than the
default (which is currently 2 days ago, despite the comments about it
being somewhere between 5-3 days..). Links in `latest/` are not updated
when running for user-provided dates.

There's some other refactoring involved but AFAICT this doesn't change
its default behavior at all.

This should let us re-run this script manually to combine logs for the
days that we missed during the colo move etc.
This commit is contained in:
Will Woods 2020-06-26 16:08:50 -04:00
parent cd81f49e72
commit 5c9d080009

View file

@ -19,26 +19,58 @@
# along with Fedora Project Infrastructure Ansible Repository. If
# not, see <http://www.gnu.org/licenses/>.
# Because sync-http may not get all logs for 3 days, we only merge
# things after 4 days.
# 2019-10-01 Dropped this down to 3 days..
NUMDAYS=2
YEAR=$(/bin/date -d "-${NUMDAYS} days" +%Y)
MONTH=$(/bin/date -d "-${NUMDAYS} days" +%m)
DAY=$(/bin/date -d "-${NUMDAYS} days" +%d)
# Some constants / standard paths
LOGDIR=/var/log/hosts
NFSDIR=/mnt/fedora_stats/combined-http
PROXYLOG=${LOGDIR}/proxy*/${YEAR}/${MONTH}/${DAY}/http/
DL_LOG=${LOGDIR}/dl*/${YEAR}/${MONTH}/${DAY}/http/
PEOPLE=${LOGDIR}/people*/${YEAR}/${MONTH}/${DAY}/http/
TARGET=${NFSDIR}/${YEAR}/${MONTH}/${DAY}
LOGMERGE=/usr/share/awstats/tools/logresolvemerge.pl
# Because sync-http may not get all logs immediately, we look back
# a couple days to find the "latest" logs to merge.
LATEST_LOG_DATE="2 days ago"
# Funtion to parse a DATE_STR and print YYYY/MM/DD
ymd() { date -d "$*" +%Y/%m/%d; }
# Get YYYY/MM/DD for LATEST_LOG_DATE, for later use
LATEST_YMD=$(ymd $LATEST_LOG_DATE)
# Prints usage. Also serves as docs for anyone reading the source (hi there!)
usage() {
cat <<__USAGE__
usage: $0 [DATE_STR]
combine daily logs from $LOGDIR to $NFSDIR.
Default date is '$LATEST_LOG_DATE' (currently $LATEST_YMD).
DATE_STR may be any date older than that, in any format understood by date(1):
"June 9"
"2020-06-23 -2weeks"
__USAGE__
}
# Check CLI args to set LOG_DATE
case $# in
0) LOG_DATE="$LATEST_LOG_DATE"; UPDATE_LATEST=1 ;;
1) [ "$1" == "-h" -o "$1" == "--help" ] && usage && exit 0
LOG_DATE="$1" ;;
*) usage; exit 2 ;;
esac
# Parse LOG_DATE
YMD=$(ymd $LOG_DATE) || exit 2
# Safety check for dates that are too new for us to handle.
# (Also catches weird "dates" that date(1) allows, like '' or '0' or 'wet')
if [[ "$YMD" > "$LATEST_YMD" ]]; then
echo "$0: error: DATE_STR '$LOG_DATE' ($YMD) newer than LATEST_LOG_DATE ($LATEST_YMD)" >&2
exit 3
fi
# Okay we're good. Set paths, make directories, and do some merging!
PROXYLOG=${LOGDIR}/proxy*/${YMD}/http/
DL_LOG=${LOGDIR}/dl*/${YMD}/http/
PEOPLE=${LOGDIR}/people*/${YMD}/http/
TARGET=${NFSDIR}/${YMD}
mkdir -p ${TARGET}
##
@ -76,10 +108,10 @@ done
# 3. remove the old links
# 4. link up all the files we merged over
if [[ -d ${NFSDIR}/latest ]]; then
if [[ "$UPDATE_LATEST" && -d ${NFSDIR}/latest ]]; then
pushd ${NFSDIR}/latest &> /dev/null
/bin/rm -f *
for file in ../${YEAR}/${MONTH}/${DAY}/*; do
for file in ../${YMD}/*; do
ln -s ${file} .
done
popd &> /dev/null