mirror of
https://github.com/correl/melpa.git
synced 2024-11-14 11:09:31 +00:00
use sqlite3 for processing log files
This commit is contained in:
parent
71ccdd7959
commit
8acf077daf
2 changed files with 14 additions and 15 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -17,3 +17,4 @@
|
||||||
/.ecukes-failing-scenarios
|
/.ecukes-failing-scenarios
|
||||||
/sandbox
|
/sandbox
|
||||||
*~
|
*~
|
||||||
|
/download_log.db
|
||||||
|
|
|
@ -10,6 +10,7 @@ import re
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
import tempfile
|
import tempfile
|
||||||
|
import sqlite3
|
||||||
from operator import or_
|
from operator import or_
|
||||||
|
|
||||||
LOGFILE = "/home/melpa/log/melpa.access.log"
|
LOGFILE = "/home/melpa/log/melpa.access.log"
|
||||||
|
@ -57,7 +58,7 @@ def ip_to_number(ip):
|
||||||
return reduce(or_, ((int(n) << (i*8)) for i, n in enumerate(
|
return reduce(or_, ((int(n) << (i*8)) for i, n in enumerate(
|
||||||
reversed(ip.split('.')))), 0)
|
reversed(ip.split('.')))), 0)
|
||||||
|
|
||||||
def parse_logfile(logfilename, pkg_ip_time):
|
def parse_logfile(logfilename, curs):
|
||||||
"""
|
"""
|
||||||
"""
|
"""
|
||||||
if logfilename.endswith("gz"):
|
if logfilename.endswith("gz"):
|
||||||
|
@ -66,7 +67,6 @@ def parse_logfile(logfilename, pkg_ip_time):
|
||||||
logfile = open(logfilename, 'r')
|
logfile = open(logfilename, 'r')
|
||||||
|
|
||||||
logre = re.compile(LOGREGEX)
|
logre = re.compile(LOGREGEX)
|
||||||
|
|
||||||
count = 0
|
count = 0
|
||||||
|
|
||||||
for line in logfile:
|
for line in logfile:
|
||||||
|
@ -82,8 +82,7 @@ def parse_logfile(logfilename, pkg_ip_time):
|
||||||
"%d/%b/%Y:%H:%M:%S").timetuple()))
|
"%d/%b/%Y:%H:%M:%S").timetuple()))
|
||||||
pkg = match.group('package')
|
pkg = match.group('package')
|
||||||
|
|
||||||
pkg_ip_time.setdefault(pkg, set()).add(ip)
|
curs.execute("INSERT OR IGNORE INTO pkg_ip VALUES (?, ?)", (pkg, ip))
|
||||||
|
|
||||||
count += 1
|
count += 1
|
||||||
|
|
||||||
return count
|
return count
|
||||||
|
@ -113,26 +112,25 @@ def main():
|
||||||
|
|
||||||
file(pidfile, 'w').write(pid)
|
file(pidfile, 'w').write(pid)
|
||||||
|
|
||||||
# load old data file
|
new_db = not os.path.exists("download_log.db")
|
||||||
if os.path.exists("download_log.json.gz"):
|
conn = sqlite3.connect("download_log.db")
|
||||||
pkg_ip_time = json_load(gzip.open("download_log.json.gz"))
|
curs = conn.cursor()
|
||||||
else:
|
if new_db:
|
||||||
pkg_ip_time = {}
|
sys.stdout.write("creating database...\n")
|
||||||
|
curs.execute('''CREATE TABLE pkg_ip (package, ip, PRIMARY KEY (package, ip))''')
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
# parse each parameter
|
# parse each parameter
|
||||||
for logfile in args.logs:
|
for logfile in args.logs:
|
||||||
sys.stdout.write("processing logfile {0}... ".format(logfile))
|
sys.stdout.write("processing logfile {0}... ".format(logfile))
|
||||||
sys.stdout.flush()
|
sys.stdout.flush()
|
||||||
|
|
||||||
count = parse_logfile(logfile, pkg_ip_time)
|
count = parse_logfile(logfile, curs)
|
||||||
sys.stdout.write("{0}\n".format(count))
|
sys.stdout.write("{0}\n".format(count))
|
||||||
|
conn.commit()
|
||||||
# dump new data file
|
|
||||||
json_dump(pkg_ip_time, gzip.open("download_log.json.gz", 'w'))
|
|
||||||
|
|
||||||
# calculate current package totals
|
# calculate current package totals
|
||||||
pkgcount = {p: len(i) for p, i in pkg_ip_time.iteritems()}
|
pkgcount = {p: c for p,c in curs.execute("SELECT package, count(ip) FROM pkg_ip GROUP BY 1")}
|
||||||
|
|
||||||
json_dump(pkgcount, open("html/download_counts.json", 'w'), indent=1)
|
json_dump(pkgcount, open("html/download_counts.json", 'w'), indent=1)
|
||||||
|
|
||||||
os.unlink(pidfile)
|
os.unlink(pidfile)
|
||||||
|
|
Loading…
Reference in a new issue