mirror of
https://github.com/correl/melpa.git
synced 2024-11-14 19:19:32 +00:00
add process_log.py script to maintain download counts
This commit is contained in:
parent
0c3633c23b
commit
722d460914
1 changed files with 117 additions and 0 deletions
117
scripts/process_log.py
Normal file
117
scripts/process_log.py
Normal file
|
@ -0,0 +1,117 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
|
||||
import argparse
|
||||
from datetime import datetime
|
||||
import gzip
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
|
||||
LOGFILE = "/var/log/nginx/melpa/melpa.access.log"
|
||||
LOGREGEX = r'(?P<ip>[\d.]+) [ -]+ \[(?P<date>[\w/: -]+)\] \
|
||||
"GET /packages/(?P<package>[^ ]+)-[0-9.]+.(?:el|tar) HTTP/\d.\d" 200'
|
||||
|
||||
|
||||
def json_handler(obj):
|
||||
if isinstance(obj, datetime):
|
||||
return obj.isoformat()
|
||||
elif isinstance(obj, set):
|
||||
return list(obj)
|
||||
raise TypeError(
|
||||
'Object of type {0} with value {0} is not JSON serializable'.format(
|
||||
type(obj), repr(obj)))
|
||||
|
||||
|
||||
def json_dump(data, jsonfile, indent=None):
|
||||
"""
|
||||
jsonfiy `data`
|
||||
"""
|
||||
return json.dump(data, jsonfile, default=json_handler, indent=indent)
|
||||
|
||||
|
||||
def datetime_parser(dct):
|
||||
for key, val in dct.items():
|
||||
if isinstance(val, list):
|
||||
dct[key] = set(val)
|
||||
return dct
|
||||
|
||||
|
||||
def json_load(jsonfile):
|
||||
return json.load(jsonfile, object_hook=datetime_parser)
|
||||
|
||||
|
||||
def parse_val(val):
|
||||
try:
|
||||
return datetime.strptime(val, "%Y-%m-%dT%H:%M:%S")
|
||||
except ValueError:
|
||||
return val
|
||||
|
||||
|
||||
def parse_logfile(logfilename, pkg_ip_time):
|
||||
"""
|
||||
"""
|
||||
if logfilename.endswith("gz"):
|
||||
logfile = gzip.open(logfilename, 'r')
|
||||
else:
|
||||
logfile = open(logfilename, 'r')
|
||||
|
||||
logre = re.compile(LOGREGEX)
|
||||
|
||||
count = 0
|
||||
|
||||
for line in logfile:
|
||||
|
||||
match = logre.match(line)
|
||||
|
||||
if match is None:
|
||||
continue
|
||||
|
||||
ip = match.group('ip')
|
||||
dtstamp = int(time.mktime(
|
||||
datetime.strptime(match.group('date').split()[0],
|
||||
"%d/%b/%Y:%H:%M:%S").timetuple()))
|
||||
pkg = match.group('package')
|
||||
|
||||
pkg_ip_time.setdefault(pkg, {}).setdefault(ip, set()).add(dtstamp)
|
||||
|
||||
count += 1
|
||||
|
||||
return count
|
||||
|
||||
|
||||
def main():
|
||||
"""main function"""
|
||||
|
||||
parser = argparse.ArgumentParser(description='MELPA Log File Parser')
|
||||
parser.add_argument('logs', metavar="logs", type=unicode, nargs="*",
|
||||
help="Log files to parse.", default=[LOGFILE])
|
||||
args = parser.parse_args()
|
||||
|
||||
# load old data file
|
||||
if os.path.exists("melpa_log_data.json.gz"):
|
||||
pkg_ip_time = json_load(gzip.open("melpa_log_data.json.gz"))
|
||||
else:
|
||||
pkg_ip_time = {}
|
||||
|
||||
# parse each parameter
|
||||
for logfile in args.logs:
|
||||
sys.stdout.write("processing logfile {0}... ".format(logfile))
|
||||
sys.stdout.flush()
|
||||
|
||||
count = parse_logfile(logfile, pkg_ip_time)
|
||||
sys.stdout.write("{0}\n".format(count))
|
||||
|
||||
# dump new data file
|
||||
json_dump(pkg_ip_time, gzip.open("melpa_log_data.json.gz", 'w'))
|
||||
|
||||
# calculate current package totals
|
||||
pkgcount = {p: len(i) for p, i in pkg_ip_time.iteritems()}
|
||||
|
||||
json_dump(pkgcount, open("melpa_download_counts.json", 'w'), indent=1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
Loading…
Reference in a new issue