#!/usr/bin/python # This file is part of Fedora Project Infrastructure Ansible # Repository. # # Fedora Project Infrastructure Ansible Repository is free software: # you can redistribute it and/or modify it under the terms of the GNU # General Public License as published by the Free Software Foundation, # either version 3 of the License, or (at your option) any later # version. # # Fedora Project Infrastructure Ansible Repository is distributed in # the hope that it will be useful, but WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. See the GNU General Public License for more # details. # # You should have received a copy of the GNU General Public License # along with Fedora Project Infrastructure Ansible Repository. If # not, see <http://www.gnu.org/licenses/>. import sys import re import optparse import os import string ''' Mirror list will go through the file given as an argument and parse out which releases and architectures were looked for, and by how much. ''' log_line = [ r"(?P<host>([\d\.]+|[0-9a-fA-F\:]+))\s", r"(?P<identity>\S*)\s", r"(?P<user>\S*)\s", r"\[(?P<time>.*?)\]\s", r'"(?P<request>.*?)"\s', r"(?P<status>\d+)\s", r"(?P<bytes>\S*)\s", r'"(?P<referrer>.*?)"\s', r'"(?P<user_agent>.*?)"\s*', ] pattern = re.compile("".join(log_line)) repo_dict = { "epel4" : "epel4", "epel4.0" : "epel4", "epel4.1" : "epel4", "epel4.2" : "epel4", "epel4.3" : "epel4", "epel4.4" : "epel4", "epel4.5" : "epel4", "epel4.6" : "epel4", "epel4.7" : "epel4", "epel4.8" : "epel4", "epel4.9" : "epel4", "epel4.10" : "epel4", "epel4.11" : "epel4", "epel4.12" : "epel4", "epel4.13" : "epel4", "epel4.14" : "epel4", "epel4.15" : "epel4", "epel4.16" : "epel4", "epel4.17" : "epel4", "epel4.18" : "epel4", "epel4.19" : "epel4", "epel4.20" : "epel4", "epel5" : "epel5", "epel5.0" : "epel5", "epel5.1" : "epel5", "epel5.2" : "epel5", "epel5.3" : "epel5", "epel5.4" : "epel5", "epel5.5" : "epel5", "epel5.6" : "epel5", "epel5.7" : "epel5", "epel5.8" : "epel5", "epel5.9" : "epel5", "epel5.10" : "epel5", "epel5.11" : "epel5", "epel5.12" : "epel5", "epel5.13" : "epel5", "epel5.14" : "epel5", "epel5.15" : "epel5", "epel5.16" : "epel5", "epel5.17" : "epel5", "epel5.18" : "epel5", "epel5.19" : "epel5", "epel5.20" : "epel5", "epel6" : "epel6", "epel6.0" : "epel6", "epel6.1" : "epel6", "epel6.2" : "epel6", "epel6.3" : "epel6", "epel6.4" : "epel6", "epel6.5" : "epel6", "epel6.6" : "epel6", "epel6.7" : "epel6", "epel6.8" : "epel6", "epel6.9" : "epel6", "epel6.10" : "epel6", "epel6.11" : "epel6", "epel6.12" : "epel6", "epel6.13" : "epel6", "epel6.14" : "epel6", "epel6.15" : "epel6", "epel6.16" : "epel6", "epel6.17" : "epel6", "epel6.18" : "epel6", "epel6.19" : "epel6", "epel6.20" : "epel6", "epel7" : "epel7", "epel7.0" : "epel7", "epel7.1" : "epel7", "epel7.2" : "epel7", "epel7.3" : "epel7", "epel7.4" : "epel7", "epel7.5" : "epel7", "epel7.6" : "epel7", "epel7.7" : "epel7", "epel7.8" : "epel7", "epel7.9" : "epel7", "epel7.10" : "epel7", "epel7.11" : "epel7", "epel7.12" : "epel7", "epel7.13" : "epel7", "epel7.14" : "epel7", "epel7.15" : "epel7", "epel7.16" : "epel7", "epel7.17" : "epel7", "epel7.18" : "epel7", "epel7.19" : "epel7", "epel7.20" : "epel7", "epel8" : "epel8", "epel8.0" : "epel8", "epel8.1" : "epel8", "epel8.2" : "epel8", "epel8.3" : "epel8", "epel8.4" : "epel8", "epel8.5" : "epel8", "epel8.6" : "epel8", "epel8.7" : "epel8", "epel8.8" : "epel8", "epel8.9" : "epel8", "epel8.10" : "epel8", "epel8.11" : "epel8", "epel8.12" : "epel8", "epel8.13" : "epel8", "epel8.14" : "epel8", "epel8.15" : "epel8", "epel8.16" : "epel8", "epel8.17" : "epel8", "epel8.18" : "epel8", "epel8.19" : "epel8", "epel8.20" : "epel8", "epel9" : "epel9", "epel9.0" : "epel9", "epel9.1" : "epel9", "epel9.2" : "epel9", "epel9.3" : "epel9", "epel9.4" : "epel9", "epel9.5" : "epel9", "epel9.6" : "epel9", "epel9.7" : "epel9", "epel9.8" : "epel9", "epel9.9" : "epel9", "epel9.10" : "epel9", "epel9.11" : "epel9", "epel9.12" : "epel9", "epel9.13" : "epel9", "epel9.14" : "epel9", "epel9.15" : "epel9", "epel9.16" : "epel9", "epel9.17" : "epel9", "epel9.18" : "epel9", "epel9.19" : "epel9", "epel9.20" : "epel9", "rawhide" : "rawhide", "frawhide" : "rawhide", "rawhidemodular" : "rawhide_modular", "3" : "f03", "4" : "f04", "5" : "f05", "6" : "f06", "7" : "f07", "8" : "f08", "9" : "f09", "10" : "f10", "11" : "f11", "12" : "f12", "13" : "f13", "14" : "f14", "15" : "f15", "16" : "f16", "17" : "f17", "18" : "f18", "19" : "f19", "20" : "f20", "21" : "f21", "22" : "f22", "23" : "f23", "24" : "f24", "25" : "f25", "26" : "f26", "27" : "f27", "28" : "f28", "29" : "f29", "30" : "f30", "31" : "f31", "32" : "f32", "33" : "f33", "6.89" : "f07", "6.90" : "f07", "6.91" : "f07", "6.92" : "f07", "6.93" : "f07", "7.89" : "f08", "7.90" : "f08", "7.91" : "f08", "7.92" : "f08", "7.93" : "f08", "8.90" : "f09", "8.91" : "f09", "8.92" : "f09", "8.93" : "f09", "9.90" : "f10", "9.90.1" : "f10", "9.91" : "f10", "9.92" : "f10", "9.93" : "f10", "10.89" : "f11", "10.90" : "f11", "10.91" : "f11", "10.92" : "f11", "10.93" : "f11", "11.89" : "f12", "11.90" : "f12", "11.91" : "f12", "11.92" : "f12", "11.93" : "f12", "12.89" : "f13", "12.90" : "f13", "12.91" : "f13", "12.92" : "f13", "12.93" : "f13", "f6.89" : "f07", "f6.90" : "f07", "f6.91" : "f07", "f6.92" : "f07", "f6.93" : "f07", "f7.89" : "f08", "f7.90" : "f08", "f7.91" : "f08", "f7.92" : "f08", "f7.93" : "f08", "f8.90" : "f09", "f8.91" : "f09", "f8.92" : "f09", "f8.93" : "f09", "f9.90" : "f10", "f9.90.1" : "f10", "f9.91" : "f10", "f9.92" : "f10", "f9.93" : "f10", "f10.89" : "f11", "f10.90" : "f11", "f10.91" : "f11", "f10.92" : "f11", "f10.93" : "f11", "f11.89" : "f12", "f11.90" : "f12", "f11.91" : "f12", "f11.92" : "f12", "f11.93" : "f12", "f12.89" : "f13", "f12.90" : "f13", "f12.91" : "f13", "f12.92" : "f13", "f12.93" : "f13", 'f3' : 'f03', 'f4' : 'f04', 'f5' : 'f05', 'f6' : 'f06', 'f7' : 'f07', 'f8' : 'f08', 'f9' : 'f09', 'f03' : 'f03', 'f04' : 'f04', 'f05' : 'f05', 'f06' : 'f06', 'f07' : 'f07', 'f08' : 'f08', 'f09' : 'f09', 'f10' : 'f10', 'f11' : 'f11', 'f12' : 'f12', 'f13' : 'f13', 'f14' : 'f14', 'f15' : 'f15', 'f16' : 'f16', 'f17' : 'f17', 'f18' : 'f18', 'f19' : 'f19', 'f20' : 'f20', 'f21' : 'f21', 'f22' : 'f22', 'f23' : 'f23', 'f24' : 'f24', 'f25' : 'f25', 'f26' : 'f26', 'f27' : 'f27', 'f28' : 'f28', 'f29' : 'f29', 'f30' : 'f30', 'f31' : 'f31', 'f32' : 'f32', 'f33' : 'f33', 'fmodular27' : 'modular_f27', 'fmodular28' : 'modular_f28', 'fmodular29' : 'modular_f29', 'fmodular30' : 'modular_f30', 'fmodular31' : 'modular_f31', 'fmodular32' : 'modular_f32', 'fmodular33' : 'modular_f33', 'modularf27' : 'modular_f27', 'modularf28' : 'modular_f28', 'modularf29' : 'modular_f29', 'modularf30' : 'modular_f30', 'modularf31' : 'modular_f31', 'modularf32' : 'modular_f32', 'modularf33' : 'modular_f33', 'rhel4' : 'rhel4', 'rhel5' : 'rhel5', 'rhel6' : 'rhel6', 'rhel7' : 'rhel7', 'rhel8' : 'rhel8', 'rhel9' : 'rhel9', } repo_keys = repo_dict.keys() def breakoutdate(givendate): Apache_Months = { 'Jan' : '01', 'Feb' : '02', 'Mar' : '03', 'Apr' : '04', 'May' : '05', 'Jun' : '06', 'Jul' : '07', 'Aug' : '08', 'Sep' : '09', 'Oct' : '10', 'Nov' : '11', 'Dec' : '12', } date_part = givendate.split() try: [day, month, year] = givendate.split(":")[0].split('/') except: # string out of index because date corrupted? [day, month, year ] = ['01', '01', '1970'] # epoch ret_str = "%s-%s-%s" % (year, Apache_Months[month], day) return ret_str def breakoutrepo(request): try: parts = request.split()[1].split("?")[1].split("&") repo="" arch="" for i in parts: if 'repo=' in i: repo = i.split('=')[1] if 'arch=' in i: arch = i.split('=')[1] return (repo,arch) except: return ("unknown_repo","unknown_arch") def figureoutrepo(asked_repo): global repo_dict global repo_keys crap_chars = ['/', '$', '!', '#', '%', '&', "'", '"', "(", ")", "*", "+", ",", "_", ":", ";", "<", ">", "=", "?", "@", "[", "^", "|"] spew = asked_repo.lower() for char in crap_chars: if char in spew: spew.split(char)[0] f_phrases = ["core", "fedora", "extras", "legacy", "fc"] for word in f_phrases: if word in spew: spew = spew.replace(word, "f") repo_phrases = [".newkey", "install", "alpha", "beta", "client", "debug", "devel", "info", "optional", "preview", "released", "source", "testing", "updates"] for word in repo_phrases: if word in spew: spew = spew.replace(word, "") if "centosplus" in spew: spew = spew.replace("centosplus", "centos") if "client" in spew: spew = re.sub("client.*", "", spew) if "cloud" in spew: spew = re.sub("cloud.*", "", spew) if "server" in spew: spew = re.sub("server.*", "", spew) if "workstation" in spew: spew = re.sub("workstation.*", "", spew) if "-" in spew: spew = re.sub("-+", "", spew) sanitize = spew.strip() if sanitize in repo_dict.keys(): return repo_dict[sanitize] else: # sys.stderr.write("asked_repo: %s. Thought it was %s\n" % (asked_repo,spew)) return "unknown_repo" def figureoutarch(asked_arch): arch_dict = { 'i386' : 'i386', 'i486' : 'i386', 'i586' : 'i386', 'i686' : 'i386', 'athlon' : 'i386', 'pentium' : 'i386', 'pentium3' : 'i386', 'pentium4' : 'i386', 'pentium5' : 'i386', 'ia32' : 'i386', 'x86' : 'i386', 'x86_32' : 'i386', 'x86_64' : 'x86_64', 'amd64' : 'x86_64', 'aarch64' : 'aarch64', 'alpha' : 'alpha', 'arm' : 'arm', 'arm64' : 'aarch64', 'armhfp' : 'arm', 'armv3l' : 'arm', 'armv5tel' : 'arm', 'armv7hl' : 'arm', 'ia64' : 'ia64', 'mips' : 'mips', 'mips64' : 'mips64', 'mips64el' : 'mips64', 'powepc' : 'ppc', 'ppc' : 'ppc', 'ppc32' : 'ppc', 'ppc64' : 'ppc64', 'ppc64le' : 'ppc64le', 's390' : 's390', 's390x' : 's390', 'sparc' : 'sparc', 'sparc64' : 'sparc64', 'tilegx' : 'tilegx', } spew = asked_arch.split("/")[0] spew = spew.split("!")[0] spew = spew.split("#")[0] spew = spew.split("%")[0] spew = spew.split("&")[0] spew = spew.split("'")[0] spew = spew.split("(")[0] spew = spew.split("*")[0] spew = spew.split("+")[0] spew = spew.split(",")[0] spew = spew.split("-")[0] spew = spew.split(".")[0] spew = spew.split(":")[0] spew = spew.split(";")[0] spew = spew.split("<")[0] spew = spew.split("=")[0] spew = spew.split(">")[0] spew = spew.split("?")[0] spew = spew.split("@")[0] spew = spew.split("[")[0] spew = spew.split("]")[0] spew = spew.split("^")[0] spew = spew.split('"')[0] spew = spew.split('\\')[0] spew = spew.split('|')[0] spew = spew.split('$')[0] sanitize = spew.lower() if sanitize in arch_dict.keys(): return arch_dict[sanitize] else: #sys.stderr.write("asked_arch: %s\n" % asked_arch) return "unknown_arch" def parseline(our_line): ## ## Figure out if line is something we want to work on more global pattern if (('/metalink' in our_line) or ('/mirrorlist' in our_line)): our_blob = pattern.match(our_line) if our_blob: our_dict = our_blob.groupdict() ip = our_dict['host'] time = breakoutdate(our_dict['time']) r,a = breakoutrepo(our_dict['request']) repo = figureoutrepo(r) arch = figureoutarch(a) return "%s %s %s %s" % (time,ip,repo,arch) else: return "" else: return "" def parselog(our_file, out_file): our_file = our_file yumclients_set = set() output_file = out_file try: data = open(our_file, "r") except: sys.stderr.write("Unable to open %s\n" % our_file ) sys.exit(-1) for line in data: parsed = parseline(line) if parsed == "": pass else: yumclients_set.add(parsed) data.close() our_list = list(yumclients_set) our_list.sort() try: output = open(output_file,"a") sys.stderr.write("Outputting data: %s\n" % our_file) except: sys.stderr.write("Unable to open outputfile\n") sys.exit(-1) for line in our_list: output.write(line + os.linesep) output.close() return def main(): parser = optparse.OptionParser( description = "A program to parse Fedora mirrorlist apache common log format files.", prog = "mirrorlist.py", version = "1.0.2", usage = "%prog [-o output-filename] logfile1 [logfile2...]" ) parser.add_option("-o", "--output", default = "output.txt", help = "Sets the name of the output file for the run.", dest = "output") (options, args) = parser.parse_args() if options.output: out_file = options.output else: out_file = "output.txt" for our_file in args: parselog(our_file,out_file) if __name__ == '__main__': main()