# result generator for dejavu # TODO: Don't work very well with musics with special chars. # use test file on the format below, with no special chars and only one "-" to separate artist from song import os, subprocess, json, re, sys import logging, time from os import listdir from os.path import isfile, join import numpy as np import matplotlib.pyplot as plt import matplotlib.animation as animation from optparse import OptionParser ##### ### Test files are in specific format: ### 'artist_name'-'song_name'_'start_time'_'duration'sec.wav ##### DEFAULT_FS = 44100 DEFAULT_WINDOW_SIZE = 4096 DEFAULT_OVERLAP_RATIO = 0.5 #FIELD_SONG_ID = 'song_id' FIELD_SONG_NAME = 'song_name' FIELD_CONFIDENCE = 'confidence' FIELD_QUERY_TIME = 'match_time' FIELD_OFFSET = 'offset' # Parse options usage = "usage: %prog [options] DEJAVU_PATH TEST_FOLDER" parser = OptionParser(usage=usage, version="%prog 1.1") parser.add_option("--no-log", action="store_false", dest="log", default=True, help='Disables logging' ) parser.add_option("--log-file", dest="log_file", default="results-compare.log", metavar="LOG_FILE", help='Set the path and filename of the log file' ) parser.add_option("--test-seconds", action="append", dest="test_seconds", default=[], metavar="Xsec", help='Appends seconds to test suit' ) parser.add_option("--results-folder", action="store", dest="results_folder", metavar="FOLDER", help='Sets the path where the results are saved' ) (options, args) = parser.parse_args() if len(args) != 2: parser.error("wrong number of arguments") if len(options.test_seconds) == 0: options.test_seconds = ['1sec','2sec','3sec','4sec','5sec','6sec','7sec','8sec','9sec','10sec'] if options.log == True: logging.basicConfig( filename=options.log_file, level=logging.DEBUG ) if options.results_folder != "" and options.results_folder[len(options.results_folder)-1] != '/': options.results_folder += "/" def log_msg(msg): if options.log == True: logging.debug(msg) class DejavuTest (object): def __init__(self, folder, seconds): super(DejavuTest, self).__init__() self.test_folder = folder self.test_seconds = seconds self.test_songs = [] self.test_files = [ f for f in listdir(self.test_folder) if isfile(join(self.test_folder,f)) and re.findall("[0-9]*sec",f)[0] in self.test_seconds ] self.n_columns = len(self.test_seconds) self.n_lines = len(self.test_files) / self.n_columns # variable match results (yes, no, invalid) self.result_match = [[0 for x in xrange(self.n_columns)] for x in xrange(self.n_lines)] # variable match precision (if matched in the corrected time) self.result_matching_times = [[0 for x in xrange(self.n_columns)] for x in xrange(self.n_lines)] # variable mahing time (query time) self.result_query_duration = [[0 for x in xrange(self.n_columns)] for x in xrange(self.n_lines)] # variable confidence self.result_match_confidence = [[0 for x in xrange(self.n_columns)] for x in xrange(self.n_lines)] self.begin() def get_column_id ( self,secs ): for i, sec in enumerate(self.test_seconds): if secs == sec: return i def get_line_id ( self,artist, song ): elem = artist + " - " + song for i, s in enumerate(self.test_songs): if elem == s: return i self.test_songs.append(elem) return len(self.test_songs)-1 def begin(self): for f in self.test_files: log_msg('--------------------------------------------------') log_msg('file: %s' % f) # get column col = self.get_column_id(re.findall("[0-9]*sec",f)[0]) # get artist and song artist = re.findall("^[^\-]+",f) artist = artist[0].rstrip() song = re.findall("\-[^\_]+",f) song = song[0].lstrip("- ") line = self.get_line_id ( artist, song) result = subprocess.check_output([args[0], 'recognize', 'file', self.test_folder+"/"+f]) log_msg('RESULT: %s' % result.strip() ) if result.strip() == "None": log_msg('No match') self.result_match[line][col] = 'no' self.result_matching_times[line][col] = 0 self.result_query_duration[line][col] = 0 self.result_match_confidence[line][col] = 0 else: result = result.strip() result = result.replace(" \'", ' "') result = result.replace("{\'", '{"') result = result.replace("\':", '":') result = result.replace("\',", '",') result = json.loads(result) artist_result = re.findall("^[^\-]+",result[FIELD_SONG_NAME]) artist_result = artist_result[0].rstrip() song_result = re.findall("\-[^\_]+",result[FIELD_SONG_NAME]) song_result = song_result[0].lstrip("- ") log_msg('artist: %s' % artist) log_msg('artist_result: %s' % artist_result) log_msg('song: %s' % song) log_msg('song_result: %s' % song_result) if artist_result != artist or song_result != song: log_msg('invalid match') self.result_match[line][col] = 'invalid' self.result_matching_times[line][col] = 0 self.result_query_duration[line][col] = 0 self.result_match_confidence[line][col] = 0 else: log_msg('correct match') self.result_match[line][col] = 'yes' self.result_query_duration[line][col] = round(result[FIELD_QUERY_TIME],3) self.result_match_confidence[line][col] = result[FIELD_CONFIDENCE] song_start_time = re.findall("\_[^\_]+",f) song_start_time = song_start_time[0].lstrip("_ ") #result_start_time = round((result[FIELD_SONG_DURATION] * result[FIELD_OFFSET]) / float(result[FIELD_SONG_SPEC_DURATION]), 0) result_start_time = round((result[FIELD_OFFSET] * DEFAULT_WINDOW_SIZE * DEFAULT_OVERLAP_RATIO) / (DEFAULT_FS),0) self.result_matching_times[line][col] = int(result_start_time) - int(song_start_time) if (abs(self.result_matching_times[line][col]) == 1): self.result_matching_times[line][col] = 0 log_msg('query duration: %s' % round(result[FIELD_QUERY_TIME],3)) log_msg('confidence: %s' % result[FIELD_CONFIDENCE]) log_msg('song start_time: %s' % song_start_time) log_msg('result start time: %s' % result_start_time) if (self.result_matching_times[line][col] == 0): log_msg('accurate match') else: log_msg('inaccurate match') log_msg('--------------------------------------------------\n') print "obtaining results from dejavu" log_msg('obtaining results from dejavu') tm = time.time() djv = DejavuTest(args[1], options.test_seconds) print "finished obtaining results from dejavu in %s" % (time.time() - tm) log_msg("finished obtaining results from dejavu in %s" % (time.time() - tm)) tests_n_lines = djv.n_lines tests_n_columns = djv.n_columns # len(options.test_seconds) tests = 1 # djv n_secs = len(options.test_seconds) # = tests.n_columns # set result variables -> 4d variables all_match_counter = [[[0 for x in xrange(tests)] for x in xrange(3)] for x in xrange(n_secs)] all_matching_times_counter = [[[0 for x in xrange(tests)] for x in xrange(2)] for x in xrange(n_secs)] all_query_duration = [[[0 for x in xrange(tests)] for x in xrange(tests_n_lines)] for x in xrange(n_secs)] all_match_confidence = [[[0 for x in xrange(tests)] for x in xrange(tests_n_lines)] for x in xrange(n_secs)] # agroup results by seconds for line in range(0, tests_n_lines): for col in range(0, tests_n_columns): # for dejavu all_query_duration[col][line][0] = djv.result_query_duration[line][col] all_match_confidence[col][line][0] = djv.result_match_confidence[line][col] djv_match_result = djv.result_match[line][col] if djv_match_result == 'yes': all_match_counter[col][0][0] += 1 elif djv_match_result == 'no': all_match_counter[col][1][0] += 1 else: all_match_counter[col][2][0] += 1 djv_match_acc = djv.result_matching_times[line][col] if djv_match_acc == 0 and djv_match_result == 'yes': all_matching_times_counter[col][0][0] += 1 elif djv_match_acc != 0: all_matching_times_counter[col][1][0] += 1 def autolabel(rects,ax): # attach some text labels for rect in rects: height = rect.get_height() ax.text(rect.get_x()+rect.get_width()/2., 1.05*height, '%d'%int(height), ha='center', va='bottom') def autolabeldoubles(rects,ax): # attach some text labels for rect in rects: height = rect.get_height() ax.text(rect.get_x()+rect.get_width()/2., 1.05*height, '%s'%round(float(height),3), ha='center', va='bottom') def create_plots(name,results): for sec in range(0,n_secs): ind = np.arange(tests_n_lines) # width = 0.25 # the width of the bars fig = plt.figure() ax = fig.add_subplot(111) ax.set_xlim([-1*width, 2*width]) means_dvj = [x[0] for x in results[sec]] rects1 = ax.bar(ind, means_dvj, width, color='r') # add some ax.set_ylabel(name) ax.set_title("%s %s Results" % (options.test_seconds[sec],name)) ax.set_xticks(ind+width) labels = [0 for x in range(0,tests_n_lines)] for x in range(0,tests_n_lines): labels[x] = "song %s" % (x+1) ax.set_xticklabels( labels ) box = ax.get_position() ax.set_position([box.x0, box.y0, box.width * 0.75, box.height]) #ax.legend( (rects1[0]), ('Dejavu'), loc='center left', bbox_to_anchor=(1, 0.5)) if name == 'Confidence': autolabel(rects1,ax) else: autolabeldoubles(rects1,ax) plt.grid() fig_name = "%s%s_%s.png" % (options.results_folder,name,options.test_seconds[sec]) fig.savefig(fig_name) create_plots('Confidence',all_match_confidence) create_plots('Query duration',all_query_duration) for sec in range(0,n_secs): ind = np.arange(3) # width = 0.25 # the width of the bars fig = plt.figure() ax = fig.add_subplot(111) ax.set_xlim([-1*width, 2.75]) means_dvj = [round(x[0]*100/tests_n_lines,1) for x in all_match_counter[sec]] rects1 = ax.bar(ind, means_dvj, width, color='r') # add some ax.set_ylabel('Matching Percentage') ax.set_title('%s Matching Percentage' % options.test_seconds[sec]) ax.set_xticks(ind+width) labels = ['yes','no','invalid'] ax.set_xticklabels( labels ) box = ax.get_position() ax.set_position([box.x0, box.y0, box.width * 0.75, box.height]) #ax.legend( (rects1[0]), ('Dejavu'), loc='center left', bbox_to_anchor=(1, 0.5)) autolabeldoubles(rects1,ax) plt.grid() fig_name = "%smatching_perc_%s.png" % (options.results_folder,options.test_seconds[sec]) fig.savefig(fig_name) for sec in range(0,n_secs): ind = np.arange(2) # width = 0.25 # the width of the bars fig = plt.figure() ax = fig.add_subplot(111) ax.set_xlim([-1*width, 1.75]) div = all_match_counter[sec][0][0] if div == 0 : div = 1000000 means_dvj = [round(x[0]*100/div,1) for x in all_matching_times_counter[sec]] rects1 = ax.bar(ind, means_dvj, width, color='r') # add some ax.set_ylabel('Matching Accuracy') ax.set_title('%s Matching Times Accuracy' % options.test_seconds[sec]) ax.set_xticks(ind+width) labels = ['yes','no'] ax.set_xticklabels( labels ) box = ax.get_position() ax.set_position([box.x0, box.y0, box.width * 0.75, box.height]) #ax.legend( (rects1[0]), ('Dejavu'), loc='center left', bbox_to_anchor=(1, 0.5)) autolabeldoubles(rects1,ax) plt.grid() fig_name = "%smatching_acc_%s.png" % (options.results_folder,options.test_seconds[sec]) fig.savefig(fig_name)