dejavu/test.py

346 lines
11 KiB
Python
Raw Normal View History

2014-06-28 18:51:14 +00:00
# result generator for dejavu
# TODO: Don't work very well with musics with special chars.
# use test file on the format below, with no special chars and only one "-" to separate artist from song
import os, subprocess, json, re, sys
import logging, time
from os import listdir
from os.path import isfile, join
import numpy as np
import matplotlib.pyplot as plt
from optparse import OptionParser
from dejavu.decoder import path_to_songname
import ast
2014-06-28 18:51:14 +00:00
#####
### Test files are in specific format:
### 'artist_name'-'song_name'_'start_time'_'duration'sec.wav
#####
DEFAULT_FS = 44100
DEFAULT_WINDOW_SIZE = 4096
DEFAULT_OVERLAP_RATIO = 0.5
FIELD_SONG_NAME = 'song_name'
FIELD_CONFIDENCE = 'confidence'
FIELD_QUERY_TIME = 'match_time'
FIELD_OFFSET = 'offset'
# Parse options
usage = "usage: %prog [options] DEJAVU_PATH TEST_FOLDER"
parser = OptionParser(usage=usage, version="%prog 1.1")
parser.add_option("--no-log",
action="store_false",
dest="log",
default=True,
help='Disables logging')
2014-06-28 18:51:14 +00:00
parser.add_option("--log-file",
dest="log_file",
default="results-compare.log",
metavar="LOG_FILE",
help='Set the path and filename of the log file')
2014-06-28 18:51:14 +00:00
parser.add_option("--test-seconds",
action="append",
dest="test_seconds",
default=[],
metavar="Xsec",
help='Appends seconds to test suit')
2014-06-28 18:51:14 +00:00
parser.add_option("--results-folder",
action="store",
dest="results_folder",
metavar="FOLDER",
help='Sets the path where the results are saved')
2014-06-28 18:51:14 +00:00
(options, args) = parser.parse_args()
if len(args) != 2:
parser.error("wrong number of arguments")
if len(options.test_seconds) == 0:
options.test_seconds = ['1sec','2sec','3sec','4sec','5sec','6sec','7sec','8sec','9sec','10sec']
if options.log == True:
logging.basicConfig(filename=options.log_file, level=logging.DEBUG)
2014-06-28 18:51:14 +00:00
if options.results_folder != "" and options.results_folder[len(options.results_folder) - 1] != '/':
2014-06-28 18:51:14 +00:00
options.results_folder += "/"
# ensure results folder exists
2014-06-29 21:28:03 +00:00
try:
os.stat(options.results_folder)
except:
os.mkdir(options.results_folder)
2014-06-28 18:51:14 +00:00
def log_msg(msg):
if options.log == True:
logging.debug(msg)
class DejavuTest (object):
def __init__(self, folder, seconds):
super(DejavuTest, self).__init__()
self.test_folder = folder
self.test_seconds = seconds
self.test_songs = []
self.test_files = [ f for f in listdir(self.test_folder) if isfile(join(self.test_folder,f))
and re.findall("[0-9]*sec",f)[0] in self.test_seconds ]
self.n_columns = len(self.test_seconds)
self.n_lines = len(self.test_files) / self.n_columns
# variable match results (yes, no, invalid)
self.result_match = [[0 for x in xrange(self.n_columns)] for x in xrange(self.n_lines)]
print "columns:", self.n_columns
print "length of test files:", len(self.test_files)
print "lines:", self.n_lines
print "result_match matrix:", self.result_match
2014-06-28 18:51:14 +00:00
# variable match precision (if matched in the corrected time)
self.result_matching_times = [[0 for x in xrange(self.n_columns)] for x in xrange(self.n_lines)]
# variable mahing time (query time)
self.result_query_duration = [[0 for x in xrange(self.n_columns)] for x in xrange(self.n_lines)]
# variable confidence
self.result_match_confidence = [[0 for x in xrange(self.n_columns)] for x in xrange(self.n_lines)]
self.begin()
def get_column_id (self, secs):
2014-06-28 18:51:14 +00:00
for i, sec in enumerate(self.test_seconds):
if secs == sec:
return i
def get_line_id (self, song):
2014-06-28 18:51:14 +00:00
for i, s in enumerate(self.test_songs):
if song == s:
2014-06-28 18:51:14 +00:00
return i
self.test_songs.append(song)
return len(self.test_songs) - 1
2014-06-28 18:51:14 +00:00
def begin(self):
for f in self.test_files:
log_msg('--------------------------------------------------')
log_msg('file: %s' % f)
2014-06-28 18:51:14 +00:00
# get column
col = self.get_column_id(re.findall("[0-9]*sec",f)[0])
song = path_to_songname(f).split("_")[0] # format: XXXX_offset_length.mp3
line = self.get_line_id (song)
result = subprocess.check_output(["python", args[0] + "/dejavu.py", 'recognize', 'file', self.test_folder+"/"+f])
2014-06-28 18:51:14 +00:00
log_msg('RESULT: %s' % result.strip() )
if result.strip() == "None":
log_msg('No match')
self.result_match[line][col] = 'no'
self.result_matching_times[line][col] = 0
self.result_query_duration[line][col] = 0
self.result_match_confidence[line][col] = 0
else:
result = result.strip()
result = result.replace(" \'", ' "')
result = result.replace("{\'", '{"')
result = result.replace("\':", '":')
result = result.replace("\',", '",')
# which song did we predict?
result = ast.literal_eval(result)
print "result", result
song_result = result["song_name"]
2014-06-28 18:51:14 +00:00
log_msg('song: %s' % song)
log_msg('song_result: %s' % song_result)
if song_result != song:
2014-06-28 18:51:14 +00:00
log_msg('invalid match')
self.result_match[line][col] = 'invalid'
self.result_matching_times[line][col] = 0
self.result_query_duration[line][col] = 0
self.result_match_confidence[line][col] = 0
else:
log_msg('correct match')
print self.result_match
2014-06-28 18:51:14 +00:00
self.result_match[line][col] = 'yes'
self.result_query_duration[line][col] = round(result[FIELD_QUERY_TIME],3)
self.result_match_confidence[line][col] = result[FIELD_CONFIDENCE]
song_start_time = re.findall("\_[^\_]+",f)
song_start_time = song_start_time[0].lstrip("_ ")
#result_start_time = round((result[FIELD_SONG_DURATION] * result[FIELD_OFFSET]) / float(result[FIELD_SONG_SPEC_DURATION]), 0)
result_start_time = round((result[FIELD_OFFSET] * DEFAULT_WINDOW_SIZE * DEFAULT_OVERLAP_RATIO) / (DEFAULT_FS),0)
self.result_matching_times[line][col] = int(result_start_time) - int(song_start_time)
if (abs(self.result_matching_times[line][col]) == 1):
self.result_matching_times[line][col] = 0
log_msg('query duration: %s' % round(result[FIELD_QUERY_TIME],3))
log_msg('confidence: %s' % result[FIELD_CONFIDENCE])
log_msg('song start_time: %s' % song_start_time)
log_msg('result start time: %s' % result_start_time)
if (self.result_matching_times[line][col] == 0):
log_msg('accurate match')
else:
log_msg('inaccurate match')
log_msg('--------------------------------------------------\n')
print "obtaining results from dejavu"
log_msg('obtaining results from dejavu')
tm = time.time()
djv = DejavuTest(args[1], options.test_seconds)
print "finished obtaining results from dejavu in %s" % (time.time() - tm)
log_msg("finished obtaining results from dejavu in %s" % (time.time() - tm))
tests_n_lines = djv.n_lines
tests_n_columns = djv.n_columns # len(options.test_seconds)
tests = 1 # djv
n_secs = len(options.test_seconds) # = tests.n_columns
# set result variables -> 4d variables
all_match_counter = [[[0 for x in xrange(tests)] for x in xrange(3)] for x in xrange(n_secs)]
all_matching_times_counter = [[[0 for x in xrange(tests)] for x in xrange(2)] for x in xrange(n_secs)]
all_query_duration = [[[0 for x in xrange(tests)] for x in xrange(tests_n_lines)] for x in xrange(n_secs)]
all_match_confidence = [[[0 for x in xrange(tests)] for x in xrange(tests_n_lines)] for x in xrange(n_secs)]
# agroup results by seconds
for line in range(0, tests_n_lines):
for col in range(0, tests_n_columns):
# for dejavu
all_query_duration[col][line][0] = djv.result_query_duration[line][col]
all_match_confidence[col][line][0] = djv.result_match_confidence[line][col]
djv_match_result = djv.result_match[line][col]
if djv_match_result == 'yes':
all_match_counter[col][0][0] += 1
elif djv_match_result == 'no':
all_match_counter[col][1][0] += 1
else:
all_match_counter[col][2][0] += 1
djv_match_acc = djv.result_matching_times[line][col]
if djv_match_acc == 0 and djv_match_result == 'yes':
all_matching_times_counter[col][0][0] += 1
elif djv_match_acc != 0:
all_matching_times_counter[col][1][0] += 1
def autolabel(rects,ax):
# attach some text labels
for rect in rects:
height = rect.get_height()
ax.text(rect.get_x()+rect.get_width()/2., 1.05*height, '%d'%int(height),
ha='center', va='bottom')
def autolabeldoubles(rects,ax):
# attach some text labels
for rect in rects:
height = rect.get_height()
ax.text(rect.get_x()+rect.get_width()/2., 1.05*height, '%s'%round(float(height),3),
ha='center', va='bottom')
def create_plots(name,results):
for sec in range(0,n_secs):
ind = np.arange(tests_n_lines) #
width = 0.25 # the width of the bars
fig = plt.figure()
ax = fig.add_subplot(111)
ax.set_xlim([-1*width, 2*width])
means_dvj = [x[0] for x in results[sec]]
rects1 = ax.bar(ind, means_dvj, width, color='r')
# add some
ax.set_ylabel(name)
ax.set_title("%s %s Results" % (options.test_seconds[sec],name))
ax.set_xticks(ind+width)
labels = [0 for x in range(0,tests_n_lines)]
for x in range(0,tests_n_lines):
labels[x] = "song %s" % (x+1)
ax.set_xticklabels( labels )
box = ax.get_position()
ax.set_position([box.x0, box.y0, box.width * 0.75, box.height])
#ax.legend( (rects1[0]), ('Dejavu'), loc='center left', bbox_to_anchor=(1, 0.5))
if name == 'Confidence':
autolabel(rects1,ax)
else:
autolabeldoubles(rects1,ax)
plt.grid()
fig_name = "%s%s_%s.png" % (options.results_folder,name,options.test_seconds[sec])
fig.savefig(fig_name)
create_plots('Confidence',all_match_confidence)
create_plots('Query duration',all_query_duration)
for sec in range(0,n_secs):
ind = np.arange(3) #
width = 0.25 # the width of the bars
fig = plt.figure()
ax = fig.add_subplot(111)
ax.set_xlim([-1*width, 2.75])
means_dvj = [round(x[0]*100/tests_n_lines,1) for x in all_match_counter[sec]]
rects1 = ax.bar(ind, means_dvj, width, color='r')
# add some
ax.set_ylabel('Matching Percentage')
ax.set_title('%s Matching Percentage' % options.test_seconds[sec])
ax.set_xticks(ind+width)
labels = ['yes','no','invalid']
ax.set_xticklabels( labels )
box = ax.get_position()
ax.set_position([box.x0, box.y0, box.width * 0.75, box.height])
#ax.legend( (rects1[0]), ('Dejavu'), loc='center left', bbox_to_anchor=(1, 0.5))
autolabeldoubles(rects1,ax)
plt.grid()
fig_name = "%smatching_perc_%s.png" % (options.results_folder,options.test_seconds[sec])
fig.savefig(fig_name)
for sec in range(0,n_secs):
ind = np.arange(2) #
width = 0.25 # the width of the bars
fig = plt.figure()
ax = fig.add_subplot(111)
ax.set_xlim([-1*width, 1.75])
div = all_match_counter[sec][0][0]
if div == 0 :
div = 1000000
means_dvj = [round(x[0]*100/div,1) for x in all_matching_times_counter[sec]]
rects1 = ax.bar(ind, means_dvj, width, color='r')
# add some
ax.set_ylabel('Matching Accuracy')
ax.set_title('%s Matching Times Accuracy' % options.test_seconds[sec])
ax.set_xticks(ind+width)
labels = ['yes','no']
ax.set_xticklabels( labels )
box = ax.get_position()
ax.set_position([box.x0, box.y0, box.width * 0.75, box.height])
#ax.legend( (rects1[0]), ('Dejavu'), loc='center left', bbox_to_anchor=(1, 0.5))
autolabeldoubles(rects1,ax)
plt.grid()
fig_name = "%smatching_acc_%s.png" % (options.results_folder,options.test_seconds[sec])
fig.savefig(fig_name)