From 87469a0fe7062813c87071e82039482d6fa30f9a Mon Sep 17 00:00:00 2001 From: Fabio Costa Date: Sat, 28 Jun 2014 19:51:14 +0100 Subject: [PATCH] Create results_dejavu.py --- scripts/results_dejavu.py | 355 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 355 insertions(+) create mode 100644 scripts/results_dejavu.py diff --git a/scripts/results_dejavu.py b/scripts/results_dejavu.py new file mode 100644 index 0000000..fa4451f --- /dev/null +++ b/scripts/results_dejavu.py @@ -0,0 +1,355 @@ +# result generator for dejavu + +# TODO: Don't work very well with musics with special chars. +# use test file on the format below, with no special chars and only one "-" to separate artist from song + +import os, subprocess, json, re, sys +import logging, time +from os import listdir +from os.path import isfile, join +import numpy as np +import matplotlib.pyplot as plt +import matplotlib.animation as animation +from optparse import OptionParser + +##### +### Test files are in specific format: +### 'artist_name'-'song_name'_'start_time'_'duration'sec.wav +##### + +DEFAULT_FS = 44100 +DEFAULT_WINDOW_SIZE = 4096 +DEFAULT_OVERLAP_RATIO = 0.5 + +#FIELD_SONG_ID = 'song_id' +FIELD_SONG_NAME = 'song_name' +FIELD_CONFIDENCE = 'confidence' +FIELD_QUERY_TIME = 'match_time' +FIELD_OFFSET = 'offset' + +# Parse options + +usage = "usage: %prog [options] DEJAVU_PATH TEST_FOLDER" +parser = OptionParser(usage=usage, version="%prog 1.1") + +parser.add_option("--no-log", + action="store_false", + dest="log", + default=True, + help='Disables logging' + ) +parser.add_option("--log-file", + dest="log_file", + default="results-compare.log", + metavar="LOG_FILE", + help='Set the path and filename of the log file' + ) +parser.add_option("--test-seconds", + action="append", + dest="test_seconds", + default=[], + metavar="Xsec", + help='Appends seconds to test suit' + ) +parser.add_option("--results-folder", + action="store", + dest="results_folder", + metavar="FOLDER", + help='Sets the path where the results are saved' + ) + +(options, args) = parser.parse_args() + +if len(args) != 2: + parser.error("wrong number of arguments") + +if len(options.test_seconds) == 0: + options.test_seconds = ['1sec','2sec','3sec','4sec','5sec','6sec','7sec','8sec','9sec','10sec'] + +if options.log == True: + logging.basicConfig( filename=options.log_file, level=logging.DEBUG ) + +if options.results_folder != "" and options.results_folder[len(options.results_folder)-1] != '/': + options.results_folder += "/" + +def log_msg(msg): + if options.log == True: + logging.debug(msg) + +class DejavuTest (object): + def __init__(self, folder, seconds): + super(DejavuTest, self).__init__() + + self.test_folder = folder + self.test_seconds = seconds + self.test_songs = [] + self.test_files = [ f for f in listdir(self.test_folder) if isfile(join(self.test_folder,f)) + and re.findall("[0-9]*sec",f)[0] in self.test_seconds ] + self.n_columns = len(self.test_seconds) + self.n_lines = len(self.test_files) / self.n_columns + + # variable match results (yes, no, invalid) + self.result_match = [[0 for x in xrange(self.n_columns)] for x in xrange(self.n_lines)] + + # variable match precision (if matched in the corrected time) + self.result_matching_times = [[0 for x in xrange(self.n_columns)] for x in xrange(self.n_lines)] + + # variable mahing time (query time) + self.result_query_duration = [[0 for x in xrange(self.n_columns)] for x in xrange(self.n_lines)] + + # variable confidence + self.result_match_confidence = [[0 for x in xrange(self.n_columns)] for x in xrange(self.n_lines)] + + self.begin() + + def get_column_id ( self,secs ): + for i, sec in enumerate(self.test_seconds): + if secs == sec: + return i + + def get_line_id ( self,artist, song ): + elem = artist + " - " + song + + for i, s in enumerate(self.test_songs): + if elem == s: + return i + + self.test_songs.append(elem) + return len(self.test_songs)-1 + + def begin(self): + for f in self.test_files: + log_msg('--------------------------------------------------') + log_msg('file: %s' % f) + # get column + col = self.get_column_id(re.findall("[0-9]*sec",f)[0]) + + # get artist and song + artist = re.findall("^[^\-]+",f) + artist = artist[0].rstrip() + + song = re.findall("\-[^\_]+",f) + song = song[0].lstrip("- ") + + line = self.get_line_id ( artist, song) + + result = subprocess.check_output([args[0], 'recognize', 'file', self.test_folder+"/"+f]) + log_msg('RESULT: %s' % result.strip() ) + + if result.strip() == "None": + log_msg('No match') + self.result_match[line][col] = 'no' + self.result_matching_times[line][col] = 0 + self.result_query_duration[line][col] = 0 + self.result_match_confidence[line][col] = 0 + + else: + result = result.strip() + result = result.replace(" \'", ' "') + result = result.replace("{\'", '{"') + result = result.replace("\':", '":') + result = result.replace("\',", '",') + + result = json.loads(result) + + artist_result = re.findall("^[^\-]+",result[FIELD_SONG_NAME]) + artist_result = artist_result[0].rstrip() + + song_result = re.findall("\-[^\_]+",result[FIELD_SONG_NAME]) + song_result = song_result[0].lstrip("- ") + + log_msg('artist: %s' % artist) + log_msg('artist_result: %s' % artist_result) + log_msg('song: %s' % song) + log_msg('song_result: %s' % song_result) + + if artist_result != artist or song_result != song: + log_msg('invalid match') + self.result_match[line][col] = 'invalid' + self.result_matching_times[line][col] = 0 + self.result_query_duration[line][col] = 0 + self.result_match_confidence[line][col] = 0 + else: + log_msg('correct match') + self.result_match[line][col] = 'yes' + self.result_query_duration[line][col] = round(result[FIELD_QUERY_TIME],3) + self.result_match_confidence[line][col] = result[FIELD_CONFIDENCE] + + song_start_time = re.findall("\_[^\_]+",f) + song_start_time = song_start_time[0].lstrip("_ ") + + #result_start_time = round((result[FIELD_SONG_DURATION] * result[FIELD_OFFSET]) / float(result[FIELD_SONG_SPEC_DURATION]), 0) + result_start_time = round((result[FIELD_OFFSET] * DEFAULT_WINDOW_SIZE * DEFAULT_OVERLAP_RATIO) / (DEFAULT_FS),0) + + self.result_matching_times[line][col] = int(result_start_time) - int(song_start_time) + if (abs(self.result_matching_times[line][col]) == 1): + self.result_matching_times[line][col] = 0 + + log_msg('query duration: %s' % round(result[FIELD_QUERY_TIME],3)) + log_msg('confidence: %s' % result[FIELD_CONFIDENCE]) + log_msg('song start_time: %s' % song_start_time) + log_msg('result start time: %s' % result_start_time) + if (self.result_matching_times[line][col] == 0): + log_msg('accurate match') + else: + log_msg('inaccurate match') + log_msg('--------------------------------------------------\n') + +print "obtaining results from dejavu" +log_msg('obtaining results from dejavu') +tm = time.time() +djv = DejavuTest(args[1], options.test_seconds) +print "finished obtaining results from dejavu in %s" % (time.time() - tm) +log_msg("finished obtaining results from dejavu in %s" % (time.time() - tm)) + +tests_n_lines = djv.n_lines +tests_n_columns = djv.n_columns # len(options.test_seconds) +tests = 1 # djv +n_secs = len(options.test_seconds) # = tests.n_columns + +# set result variables -> 4d variables +all_match_counter = [[[0 for x in xrange(tests)] for x in xrange(3)] for x in xrange(n_secs)] +all_matching_times_counter = [[[0 for x in xrange(tests)] for x in xrange(2)] for x in xrange(n_secs)] +all_query_duration = [[[0 for x in xrange(tests)] for x in xrange(tests_n_lines)] for x in xrange(n_secs)] +all_match_confidence = [[[0 for x in xrange(tests)] for x in xrange(tests_n_lines)] for x in xrange(n_secs)] + +# agroup results by seconds +for line in range(0, tests_n_lines): + for col in range(0, tests_n_columns): + # for dejavu + all_query_duration[col][line][0] = djv.result_query_duration[line][col] + all_match_confidence[col][line][0] = djv.result_match_confidence[line][col] + + djv_match_result = djv.result_match[line][col] + + if djv_match_result == 'yes': + all_match_counter[col][0][0] += 1 + elif djv_match_result == 'no': + all_match_counter[col][1][0] += 1 + else: + all_match_counter[col][2][0] += 1 + + djv_match_acc = djv.result_matching_times[line][col] + + if djv_match_acc == 0 and djv_match_result == 'yes': + all_matching_times_counter[col][0][0] += 1 + elif djv_match_acc != 0: + all_matching_times_counter[col][1][0] += 1 + +def autolabel(rects,ax): + # attach some text labels + for rect in rects: + height = rect.get_height() + ax.text(rect.get_x()+rect.get_width()/2., 1.05*height, '%d'%int(height), + ha='center', va='bottom') + +def autolabeldoubles(rects,ax): + # attach some text labels + for rect in rects: + height = rect.get_height() + ax.text(rect.get_x()+rect.get_width()/2., 1.05*height, '%s'%round(float(height),3), + ha='center', va='bottom') + +def create_plots(name,results): + for sec in range(0,n_secs): + ind = np.arange(tests_n_lines) # + width = 0.25 # the width of the bars + + fig = plt.figure() + ax = fig.add_subplot(111) + ax.set_xlim([-1*width, 2*width]) + + means_dvj = [x[0] for x in results[sec]] + rects1 = ax.bar(ind, means_dvj, width, color='r') + + # add some + ax.set_ylabel(name) + ax.set_title("%s %s Results" % (options.test_seconds[sec],name)) + ax.set_xticks(ind+width) + + labels = [0 for x in range(0,tests_n_lines)] + for x in range(0,tests_n_lines): + labels[x] = "song %s" % (x+1) + ax.set_xticklabels( labels ) + + box = ax.get_position() + ax.set_position([box.x0, box.y0, box.width * 0.75, box.height]) + + #ax.legend( (rects1[0]), ('Dejavu'), loc='center left', bbox_to_anchor=(1, 0.5)) + + if name == 'Confidence': + autolabel(rects1,ax) + else: + autolabeldoubles(rects1,ax) + + plt.grid() + + fig_name = "%s%s_%s.png" % (options.results_folder,name,options.test_seconds[sec]) + fig.savefig(fig_name) + +create_plots('Confidence',all_match_confidence) +create_plots('Query duration',all_query_duration) + +for sec in range(0,n_secs): + ind = np.arange(3) # + width = 0.25 # the width of the bars + + fig = plt.figure() + ax = fig.add_subplot(111) + ax.set_xlim([-1*width, 2.75]) + + means_dvj = [round(x[0]*100/tests_n_lines,1) for x in all_match_counter[sec]] + rects1 = ax.bar(ind, means_dvj, width, color='r') + + # add some + ax.set_ylabel('Matching Percentage') + ax.set_title('%s Matching Percentage' % options.test_seconds[sec]) + ax.set_xticks(ind+width) + + labels = ['yes','no','invalid'] + ax.set_xticklabels( labels ) + + box = ax.get_position() + ax.set_position([box.x0, box.y0, box.width * 0.75, box.height]) + + #ax.legend( (rects1[0]), ('Dejavu'), loc='center left', bbox_to_anchor=(1, 0.5)) + autolabeldoubles(rects1,ax) + + plt.grid() + + fig_name = "%smatching_perc_%s.png" % (options.results_folder,options.test_seconds[sec]) + fig.savefig(fig_name) + +for sec in range(0,n_secs): + ind = np.arange(2) # + width = 0.25 # the width of the bars + + fig = plt.figure() + ax = fig.add_subplot(111) + ax.set_xlim([-1*width, 1.75]) + + div = all_match_counter[sec][0][0] + if div == 0 : + div = 1000000 + + means_dvj = [round(x[0]*100/div,1) for x in all_matching_times_counter[sec]] + rects1 = ax.bar(ind, means_dvj, width, color='r') + + # add some + ax.set_ylabel('Matching Accuracy') + ax.set_title('%s Matching Times Accuracy' % options.test_seconds[sec]) + ax.set_xticks(ind+width) + + labels = ['yes','no'] + ax.set_xticklabels( labels ) + + box = ax.get_position() + ax.set_position([box.x0, box.y0, box.width * 0.75, box.height]) + + #ax.legend( (rects1[0]), ('Dejavu'), loc='center left', bbox_to_anchor=(1, 0.5)) + autolabeldoubles(rects1,ax) + + plt.grid() + + fig_name = "%smatching_acc_%s.png" % (options.results_folder,options.test_seconds[sec]) + fig.savefig(fig_name)