diff --git a/README.md b/README.md index df737a0..11e3fb4 100755 --- a/README.md +++ b/README.md @@ -122,6 +122,47 @@ Or by reading files via scripting functions: >>> song = djv.recognize(FileRecognizer, "va_us_top_40/wav/07 - Mirrors - Justin Timberlake.wav") ``` +## Testing (New!) + +Test your Dejavu settings on a corpus of audio files on a number of different metrics: + +* Confidence of match (number fingerprints aligned) +* Offset matching accuracy +* Song matching accuracy +* Time to match + +An example script is given in `test_dejavu.sh`, shown below: + +```bash +##################################### +### Dejavu example testing script ### +##################################### + +########### +# Clear out previous results +rm -rf ./results ./temp_audio + +########### +# Fingerprint files of extension mp3 in the ./mp3 folder +python dejavu.py fingerprint ./mp3/ mp3 + +########## +# Run a test suite on the ./mp3 folder by extracting 1, 2, 3, 4, and 5 +# second clips sampled randomly from within each song 8 seconds +# away from start or end, sampling offset with random seed = 42, and finally, +# store results in ./results and log to ./results/dejavu-test.log +python run_tests.py \ + --secs 5 \ + --temp ./temp_audio \ + --log-file ./results/dejavu-test.log \ + --padding 8 \ + --seed 42 \ + --results ./results \ + ./mp3 +``` + +The testing scripts are as of now are a bit rough, and could certainly use some love and attention if you're interested in submitting a PR! + ## How does it work? The algorithm works off a fingerprint based system, much like: diff --git a/dejavu/testing.py b/dejavu/testing.py new file mode 100644 index 0000000..d15c12f --- /dev/null +++ b/dejavu/testing.py @@ -0,0 +1,270 @@ +from __future__ import division +from pydub import AudioSegment +from dejavu.decoder import path_to_songname +from dejavu import Dejavu +from dejavu.fingerprint import * +import traceback +import fnmatch +import os, re, ast +import subprocess +import random +import logging + +def set_seed(seed=None): + """ + `seed` as None means that the sampling will be random. + + Setting your own seed means that you can produce the + same experiment over and over. + """ + if seed != None: + random.seed(seed) + +def get_files_recursive(src, fmt): + """ + `src` is the source directory. + `fmt` is the extension, ie ".mp3" or "mp3", etc. + """ + for root, dirnames, filenames in os.walk(src): + for filename in fnmatch.filter(filenames, '*' + fmt): + yield os.path.join(root, filename) + +def get_length_audio(audiopath, extension): + """ + Returns length of audio in seconds. + Returns None if format isn't supported or in case of error. + """ + try: + audio = AudioSegment.from_file(audiopath, extension.replace(".", "")) + except: + print "Error in get_length_audio(): %s" % traceback.format_exc() + return None + return int(len(audio) / 1000.0) + +def get_starttime(length, nseconds, padding): + """ + `length` is total audio length in seconds + `nseconds` is amount of time to sample in seconds + `padding` is off-limits seconds at beginning and ending + """ + maximum = length - padding - nseconds + if padding > maximum: + return 0 + return random.randint(padding, maximum) + +def generate_test_files(src, dest, nseconds, fmts=[".mp3", ".wav"], padding=10): + """ + Generates a test file for each file recursively in `src` directory + of given format using `nseconds` sampled from the audio file. + + Results are written to `dest` directory. + + `padding` is the number of off-limit seconds and the beginning and + end of a track that won't be sampled in testing. Often you want to + avoid silence, etc. + """ + # create directories if necessary + for directory in [src, dest]: + try: + os.stat(directory) + except: + os.mkdir(directory) + + # find files recursively of a given file format + for fmt in fmts: + testsources = get_files_recursive(src, fmt) + for audiosource in testsources: + + print "audiosource:", audiosource + + filename, extension = os.path.splitext(os.path.basename(audiosource)) + length = get_length_audio(audiosource, extension) + starttime = get_starttime(length, nseconds, padding) + + test_file_name = "%s_%s_%ssec.%s" % ( + os.path.join(dest, filename), starttime, + nseconds, extension.replace(".", "")) + + subprocess.check_output([ + "ffmpeg", "-y", + "-ss", "%d" % starttime, + '-t' , "%d" % nseconds, + "-i", audiosource, + test_file_name]) + +def log_msg(msg, log=True, silent=False): + if log: + logging.debug(msg) + if not silent: + print msg + +def autolabel(rects, ax): + # attach some text labels + for rect in rects: + height = rect.get_height() + ax.text(rect.get_x() + rect.get_width() / 2., 1.05 * height, + '%d' % int(height), ha='center', va='bottom') + +def autolabeldoubles(rects, ax): + # attach some text labels + for rect in rects: + height = rect.get_height() + ax.text(rect.get_x() + rect.get_width() / 2., 1.05 * height, + '%s' % round(float(height), 3), ha='center', va='bottom') + +class DejavuTest(object): + def __init__(self, folder, seconds): + super(DejavuTest, self).__init__() + + self.test_folder = folder + self.test_seconds = seconds + self.test_songs = [] + + print "test_seconds", self.test_seconds + + self.test_files = [ + f for f in os.listdir(self.test_folder) + if os.path.isfile(os.path.join(self.test_folder, f)) + and re.findall("[0-9]*sec", f)[0] in self.test_seconds] + + print "test_files", self.test_files + + self.n_columns = len(self.test_seconds) + self.n_lines = int(len(self.test_files) / self.n_columns) + + print "columns:", self.n_columns + print "length of test files:", len(self.test_files) + print "lines:", self.n_lines + + # variable match results (yes, no, invalid) + self.result_match = [[0 for x in xrange(self.n_columns)] for x in xrange(self.n_lines)] + + print "result_match matrix:", self.result_match + + # variable match precision (if matched in the corrected time) + self.result_matching_times = [[0 for x in xrange(self.n_columns)] for x in xrange(self.n_lines)] + + # variable mahing time (query time) + self.result_query_duration = [[0 for x in xrange(self.n_columns)] for x in xrange(self.n_lines)] + + # variable confidence + self.result_match_confidence = [[0 for x in xrange(self.n_columns)] for x in xrange(self.n_lines)] + + self.begin() + + def get_column_id (self, secs): + for i, sec in enumerate(self.test_seconds): + if secs == sec: + return i + + def get_line_id (self, song): + for i, s in enumerate(self.test_songs): + if song == s: + return i + self.test_songs.append(song) + return len(self.test_songs) - 1 + + def create_plots(self, name, results, results_folder): + for sec in range(0, len(self.test_seconds)): + ind = np.arange(self.n_lines) # + width = 0.25 # the width of the bars + + fig = plt.figure() + ax = fig.add_subplot(111) + ax.set_xlim([-1 * width, 2 * width]) + + means_dvj = [x[0] for x in results[sec]] + rects1 = ax.bar(ind, means_dvj, width, color='r') + + # add some + ax.set_ylabel(name) + ax.set_title("%s %s Results" % (self.test_seconds[sec], name)) + ax.set_xticks(ind + width) + + labels = [0 for x in range(0, self.n_lines)] + for x in range(0, self.n_lines): + labels[x] = "song %s" % (x+1) + ax.set_xticklabels(labels) + + box = ax.get_position() + ax.set_position([box.x0, box.y0, box.width * 0.75, box.height]) + + #ax.legend( (rects1[0]), ('Dejavu'), loc='center left', bbox_to_anchor=(1, 0.5)) + + if name == 'Confidence': + autolabel(rects1, ax) + else: + autolabeldoubles(rects1, ax) + + plt.grid() + + fig_name = os.path.join(results_folder, "%s_%s.png" % (name, self.test_seconds[sec])) + fig.savefig(fig_name) + + def begin(self): + for f in self.test_files: + log_msg('--------------------------------------------------') + log_msg('file: %s' % f) + + # get column + col = self.get_column_id(re.findall("[0-9]*sec",f)[0]) + song = path_to_songname(f).split("_")[0] # format: XXXX_offset_length.mp3 + line = self.get_line_id (song) + result = subprocess.check_output(["python", "dejavu.py", 'recognize', 'file', self.test_folder + "/" + f]) + + if result.strip() == "None": + log_msg('No match') + self.result_match[line][col] = 'no' + self.result_matching_times[line][col] = 0 + self.result_query_duration[line][col] = 0 + self.result_match_confidence[line][col] = 0 + + else: + result = result.strip() + result = result.replace(" \'", ' "') + result = result.replace("{\'", '{"') + result = result.replace("\':", '":') + result = result.replace("\',", '",') + + # which song did we predict? + result = ast.literal_eval(result) + song_result = result["song_name"] + log_msg('song: %s' % song) + log_msg('song_result: %s' % song_result) + + if song_result != song: + log_msg('invalid match') + self.result_match[line][col] = 'invalid' + self.result_matching_times[line][col] = 0 + self.result_query_duration[line][col] = 0 + self.result_match_confidence[line][col] = 0 + else: + log_msg('correct match') + print self.result_match + self.result_match[line][col] = 'yes' + self.result_query_duration[line][col] = round(result[Dejavu.MATCH_TIME],3) + self.result_match_confidence[line][col] = result[Dejavu.CONFIDENCE] + + song_start_time = re.findall("\_[^\_]+",f) + song_start_time = song_start_time[0].lstrip("_ ") + + result_start_time = round((result[Dejavu.OFFSET] * DEFAULT_WINDOW_SIZE * + DEFAULT_OVERLAP_RATIO) / (DEFAULT_FS), 0) + + self.result_matching_times[line][col] = int(result_start_time) - int(song_start_time) + if (abs(self.result_matching_times[line][col]) == 1): + self.result_matching_times[line][col] = 0 + + log_msg('query duration: %s' % round(result[Dejavu.MATCH_TIME],3)) + log_msg('confidence: %s' % result[Dejavu.CONFIDENCE]) + log_msg('song start_time: %s' % song_start_time) + log_msg('result start time: %s' % result_start_time) + if (self.result_matching_times[line][col] == 0): + log_msg('accurate match') + else: + log_msg('inaccurate match') + log_msg('--------------------------------------------------\n') + + + + diff --git a/mp3/Brad-Sucks--Total-Breakdown.mp3 b/mp3/Brad-Sucks--Total-Breakdown.mp3 new file mode 100644 index 0000000..cd0f853 Binary files /dev/null and b/mp3/Brad-Sucks--Total-Breakdown.mp3 differ diff --git a/mp3/Choc--Eigenvalue-Subspace-Decomposition.mp3 b/mp3/Choc--Eigenvalue-Subspace-Decomposition.mp3 new file mode 100644 index 0000000..9261388 Binary files /dev/null and b/mp3/Choc--Eigenvalue-Subspace-Decomposition.mp3 differ diff --git a/mp3/Josh-Woodward--I-Want-To-Destroy-Something-Beautiful.mp3 b/mp3/Josh-Woodward--I-Want-To-Destroy-Something-Beautiful.mp3 new file mode 100644 index 0000000..d91b5f3 Binary files /dev/null and b/mp3/Josh-Woodward--I-Want-To-Destroy-Something-Beautiful.mp3 differ diff --git a/mp3/Sean-Fournier--Falling-For-You.mp3 b/mp3/Sean-Fournier--Falling-For-You.mp3 new file mode 100644 index 0000000..c74f37b Binary files /dev/null and b/mp3/Sean-Fournier--Falling-For-You.mp3 differ diff --git a/mp3/The-Lights-Galaxia--While-She-Sleeps.mp3 b/mp3/The-Lights-Galaxia--While-She-Sleeps.mp3 new file mode 100644 index 0000000..279b514 Binary files /dev/null and b/mp3/The-Lights-Galaxia--While-She-Sleeps.mp3 differ diff --git a/mp3/about.txt b/mp3/about.txt new file mode 100644 index 0000000..d7b6df5 --- /dev/null +++ b/mp3/about.txt @@ -0,0 +1,2 @@ +* Audio in this folder for testing dejavu is taken from here: +http://freemusicarchive.org/curator/creative_commons/ diff --git a/run_tests.py b/run_tests.py new file mode 100644 index 0000000..6c9ae66 --- /dev/null +++ b/run_tests.py @@ -0,0 +1,184 @@ +from dejavu.testing import * +from dejavu import Dejavu +from optparse import OptionParser +import matplotlib.pyplot as plt +import time +import shutil + +usage = "usage: %prog [options] TESTING_AUDIOFOLDER" +parser = OptionParser(usage=usage, version="%prog 1.1") +parser.add_option("--secs", + action="store", + dest="secs", + default=5, + type=int, + help='Number of seconds starting from zero to test') +parser.add_option("--results", + action="store", + dest="results_folder", + default="./dejavu_test_results", + help='Sets the path where the results are saved') +parser.add_option("--temp", + action="store", + dest="temp_folder", + default="./dejavu_temp_testing_files", + help='Sets the path where the temp files are saved') +parser.add_option("--log", + action="store_true", + dest="log", + default=True, + help='Enables logging') +parser.add_option("--silent", + action="store_false", + dest="silent", + default=False, + help='Disables printing') +parser.add_option("--log-file", + dest="log_file", + default="results-compare.log", + help='Set the path and filename of the log file') +parser.add_option("--padding", + action="store", + dest="padding", + default=10, + type=int, + help='Number of seconds to pad choice of place to test from') +parser.add_option("--seed", + action="store", + dest="seed", + default=None, + type=int, + help='Random seed') +options, args = parser.parse_args() +test_folder = args[0] + +# set random seed if set by user +set_seed(options.seed) + +# ensure results folder exists +try: + os.stat(options.results_folder) +except: + os.mkdir(options.results_folder) + +# set logging +if options.log == True: + logging.basicConfig(filename=options.log_file, level=logging.DEBUG) + +# set test seconds +test_seconds = ['%dsec' % i for i in range(1, options.secs + 1, 1)] + +# generate testing files +for i in range(1, options.secs + 1, 1): + generate_test_files(test_folder, options.temp_folder, + i, padding=options.padding) + +# scan files +log_msg("Running Dejavu fingerprinter on files in %s..." % test_folder, + log=options.log, silent=options.silent) + +tm = time.time() +djv = DejavuTest(options.temp_folder, test_seconds) +log_msg("finished obtaining results from dejavu in %s" % (time.time() - tm), + log=options.log, silent=options.silent) + +tests = 1 # djv +n_secs = len(test_seconds) + +# set result variables -> 4d variables +all_match_counter = [[[0 for x in xrange(tests)] for x in xrange(3)] for x in xrange(n_secs)] +all_matching_times_counter = [[[0 for x in xrange(tests)] for x in xrange(2)] for x in xrange(n_secs)] +all_query_duration = [[[0 for x in xrange(tests)] for x in xrange(djv.n_lines)] for x in xrange(n_secs)] +all_match_confidence = [[[0 for x in xrange(tests)] for x in xrange(djv.n_lines)] for x in xrange(n_secs)] + +# group results by seconds +for line in range(0, djv.n_lines): + for col in range(0, djv.n_columns): + # for dejavu + all_query_duration[col][line][0] = djv.result_query_duration[line][col] + all_match_confidence[col][line][0] = djv.result_match_confidence[line][col] + + djv_match_result = djv.result_match[line][col] + + if djv_match_result == 'yes': + all_match_counter[col][0][0] += 1 + elif djv_match_result == 'no': + all_match_counter[col][1][0] += 1 + else: + all_match_counter[col][2][0] += 1 + + djv_match_acc = djv.result_matching_times[line][col] + + if djv_match_acc == 0 and djv_match_result == 'yes': + all_matching_times_counter[col][0][0] += 1 + elif djv_match_acc != 0: + all_matching_times_counter[col][1][0] += 1 + +# create plots +djv.create_plots('Confidence', all_match_confidence, options.results_folder) +djv.create_plots('Query duration', all_query_duration, options.results_folder) + +for sec in range(0, n_secs): + ind = np.arange(3) # + width = 0.25 # the width of the bars + + fig = plt.figure() + ax = fig.add_subplot(111) + ax.set_xlim([-1 * width, 2.75]) + + means_dvj = [round(x[0] * 100 / djv.n_lines, 1) for x in all_match_counter[sec]] + rects1 = ax.bar(ind, means_dvj, width, color='r') + + # add some + ax.set_ylabel('Matching Percentage') + ax.set_title('%s Matching Percentage' % test_seconds[sec]) + ax.set_xticks(ind + width) + + labels = ['yes','no','invalid'] + ax.set_xticklabels( labels ) + + box = ax.get_position() + ax.set_position([box.x0, box.y0, box.width * 0.75, box.height]) + #ax.legend((rects1[0]), ('Dejavu'), loc='center left', bbox_to_anchor=(1, 0.5)) + autolabeldoubles(rects1,ax) + plt.grid() + + fig_name = os.path.join(options.results_folder, "matching_perc_%s.png" % test_seconds[sec]) + fig.savefig(fig_name) + +for sec in range(0, n_secs): + ind = np.arange(2) # + width = 0.25 # the width of the bars + + fig = plt.figure() + ax = fig.add_subplot(111) + ax.set_xlim([-1*width, 1.75]) + + div = all_match_counter[sec][0][0] + if div == 0 : + div = 1000000 + + means_dvj = [round(x[0] * 100 / div, 1) for x in all_matching_times_counter[sec]] + rects1 = ax.bar(ind, means_dvj, width, color='r') + + # add some + ax.set_ylabel('Matching Accuracy') + ax.set_title('%s Matching Times Accuracy' % test_seconds[sec]) + ax.set_xticks(ind + width) + + labels = ['yes','no'] + ax.set_xticklabels( labels ) + + box = ax.get_position() + ax.set_position([box.x0, box.y0, box.width * 0.75, box.height]) + + #ax.legend( (rects1[0]), ('Dejavu'), loc='center left', bbox_to_anchor=(1, 0.5)) + autolabeldoubles(rects1,ax) + + plt.grid() + + fig_name = os.path.join(options.results_folder, "matching_acc_%s.png" % test_seconds[sec]) + fig.savefig(fig_name) + +# remove temporary folder +shutil.rmtree(options.temp_folder) diff --git a/scripts/generate-test-files.py b/scripts/generate-test-files.py deleted file mode 100755 index ca4b0af..0000000 --- a/scripts/generate-test-files.py +++ /dev/null @@ -1,69 +0,0 @@ -import os, subprocess -from os import listdir -from os.path import isfile, join -from optparse import OptionParser - -usage = "usage: %prog [options] SONGS_PATH DESTINATION_FOLDER" -parser = OptionParser(usage=usage, version="%prog 1.1") - -parser.add_option("-s", "--start", - action="store", - dest="start_time", - type=int, - default=10, - metavar="X", - help='Test files begin on X sec of the original song' - ) - -parser.add_option("--test-seconds", - action="append", - dest="test_seconds", - type=int, - default=[], - metavar="X", - help='Sets the seconds of the test files' - ) - -parser.add_option("--audio-format", - action="append", - dest="audio_formats", - default=[], - metavar="FORMAT", - help='Sets audio formats of files to read' - ) - - -(options, args) = parser.parse_args() - -if len(args) != 2: - parser.error("wrong number of arguments") - -if args[0][len(args[0])-1] != "/": - args[0] += "/" - -if args[1][len(args[1])-1] != "/": - args[1] += "/" - -if len(options.test_seconds) == 0: - options.test_seconds = [1,2,3,4,5,6,7,8,9,10] - -if len(options.audio_formats) == 0: - options.audio_formats = ['wav','mp3'] - -try: - os.stat(args[1]) -except: - os.mkdir(args[1]) - -test_files = [ f for f in listdir(args[0]) if isfile(join(args[0],f)) and - os.path.splitext(f)[len(os.path.splitext(f))-1][1:] in options.audio_formats ] - -for file in test_files: - - filename = os.path.basename(file) - filename,extension = os.path.splitext(filename) - - for i in options.test_seconds: - - test_file_name = "%s%s_%s_%ssec%s" % (args[1],filename,options.start_time,i,extension) - subprocess.check_output(["ffmpeg", "-ss", "%s" % options.start_time, '-t' , "%s" % i, "-i", args[0]+file, test_file_name]) diff --git a/test.py b/test.py deleted file mode 100755 index b9e47a1..0000000 --- a/test.py +++ /dev/null @@ -1,345 +0,0 @@ -# result generator for dejavu - -# TODO: Don't work very well with musics with special chars. -# use test file on the format below, with no special chars and only one "-" to separate artist from song - -import os, subprocess, json, re, sys -import logging, time -from os import listdir -from os.path import isfile, join -import numpy as np -import matplotlib.pyplot as plt -from optparse import OptionParser -from dejavu.decoder import path_to_songname -import ast - -##### -### Test files are in specific format: -### 'artist_name'-'song_name'_'start_time'_'duration'sec.wav -##### - -DEFAULT_FS = 44100 -DEFAULT_WINDOW_SIZE = 4096 -DEFAULT_OVERLAP_RATIO = 0.5 - -FIELD_SONG_NAME = 'song_name' -FIELD_CONFIDENCE = 'confidence' -FIELD_QUERY_TIME = 'match_time' -FIELD_OFFSET = 'offset' - -# Parse options -usage = "usage: %prog [options] DEJAVU_PATH TEST_FOLDER" -parser = OptionParser(usage=usage, version="%prog 1.1") -parser.add_option("--no-log", - action="store_false", - dest="log", - default=True, - help='Disables logging') -parser.add_option("--log-file", - dest="log_file", - default="results-compare.log", - metavar="LOG_FILE", - help='Set the path and filename of the log file') -parser.add_option("--test-seconds", - action="append", - dest="test_seconds", - default=[], - metavar="Xsec", - help='Appends seconds to test suit') -parser.add_option("--results-folder", - action="store", - dest="results_folder", - metavar="FOLDER", - help='Sets the path where the results are saved') - -(options, args) = parser.parse_args() - -if len(args) != 2: - parser.error("wrong number of arguments") - -if len(options.test_seconds) == 0: - options.test_seconds = ['1sec','2sec','3sec','4sec','5sec','6sec','7sec','8sec','9sec','10sec'] - -if options.log == True: - logging.basicConfig(filename=options.log_file, level=logging.DEBUG) - -if options.results_folder != "" and options.results_folder[len(options.results_folder) - 1] != '/': - options.results_folder += "/" - -# ensure results folder exists -try: - os.stat(options.results_folder) -except: - os.mkdir(options.results_folder) - -def log_msg(msg): - if options.log == True: - logging.debug(msg) - -class DejavuTest (object): - def __init__(self, folder, seconds): - super(DejavuTest, self).__init__() - - self.test_folder = folder - self.test_seconds = seconds - self.test_songs = [] - self.test_files = [ f for f in listdir(self.test_folder) if isfile(join(self.test_folder,f)) - and re.findall("[0-9]*sec",f)[0] in self.test_seconds ] - self.n_columns = len(self.test_seconds) - self.n_lines = len(self.test_files) / self.n_columns - - # variable match results (yes, no, invalid) - self.result_match = [[0 for x in xrange(self.n_columns)] for x in xrange(self.n_lines)] - - print "columns:", self.n_columns - print "length of test files:", len(self.test_files) - print "lines:", self.n_lines - print "result_match matrix:", self.result_match - - # variable match precision (if matched in the corrected time) - self.result_matching_times = [[0 for x in xrange(self.n_columns)] for x in xrange(self.n_lines)] - - # variable mahing time (query time) - self.result_query_duration = [[0 for x in xrange(self.n_columns)] for x in xrange(self.n_lines)] - - # variable confidence - self.result_match_confidence = [[0 for x in xrange(self.n_columns)] for x in xrange(self.n_lines)] - - self.begin() - - def get_column_id (self, secs): - for i, sec in enumerate(self.test_seconds): - if secs == sec: - return i - - def get_line_id (self, song): - for i, s in enumerate(self.test_songs): - if song == s: - return i - self.test_songs.append(song) - return len(self.test_songs) - 1 - - def begin(self): - for f in self.test_files: - log_msg('--------------------------------------------------') - log_msg('file: %s' % f) - - # get column - col = self.get_column_id(re.findall("[0-9]*sec",f)[0]) - song = path_to_songname(f).split("_")[0] # format: XXXX_offset_length.mp3 - line = self.get_line_id (song) - result = subprocess.check_output(["python", args[0] + "/dejavu.py", 'recognize', 'file', self.test_folder+"/"+f]) - log_msg('RESULT: %s' % result.strip() ) - - if result.strip() == "None": - log_msg('No match') - self.result_match[line][col] = 'no' - self.result_matching_times[line][col] = 0 - self.result_query_duration[line][col] = 0 - self.result_match_confidence[line][col] = 0 - - else: - result = result.strip() - result = result.replace(" \'", ' "') - result = result.replace("{\'", '{"') - result = result.replace("\':", '":') - result = result.replace("\',", '",') - - # which song did we predict? - result = ast.literal_eval(result) - print "result", result - song_result = result["song_name"] - log_msg('song: %s' % song) - log_msg('song_result: %s' % song_result) - - if song_result != song: - log_msg('invalid match') - self.result_match[line][col] = 'invalid' - self.result_matching_times[line][col] = 0 - self.result_query_duration[line][col] = 0 - self.result_match_confidence[line][col] = 0 - else: - log_msg('correct match') - print self.result_match - self.result_match[line][col] = 'yes' - self.result_query_duration[line][col] = round(result[FIELD_QUERY_TIME],3) - self.result_match_confidence[line][col] = result[FIELD_CONFIDENCE] - - song_start_time = re.findall("\_[^\_]+",f) - song_start_time = song_start_time[0].lstrip("_ ") - - #result_start_time = round((result[FIELD_SONG_DURATION] * result[FIELD_OFFSET]) / float(result[FIELD_SONG_SPEC_DURATION]), 0) - result_start_time = round((result[FIELD_OFFSET] * DEFAULT_WINDOW_SIZE * DEFAULT_OVERLAP_RATIO) / (DEFAULT_FS),0) - - self.result_matching_times[line][col] = int(result_start_time) - int(song_start_time) - if (abs(self.result_matching_times[line][col]) == 1): - self.result_matching_times[line][col] = 0 - - log_msg('query duration: %s' % round(result[FIELD_QUERY_TIME],3)) - log_msg('confidence: %s' % result[FIELD_CONFIDENCE]) - log_msg('song start_time: %s' % song_start_time) - log_msg('result start time: %s' % result_start_time) - if (self.result_matching_times[line][col] == 0): - log_msg('accurate match') - else: - log_msg('inaccurate match') - log_msg('--------------------------------------------------\n') - -print "obtaining results from dejavu" -log_msg('obtaining results from dejavu') -tm = time.time() -djv = DejavuTest(args[1], options.test_seconds) -print "finished obtaining results from dejavu in %s" % (time.time() - tm) -log_msg("finished obtaining results from dejavu in %s" % (time.time() - tm)) - -tests_n_lines = djv.n_lines -tests_n_columns = djv.n_columns # len(options.test_seconds) -tests = 1 # djv -n_secs = len(options.test_seconds) # = tests.n_columns - -# set result variables -> 4d variables -all_match_counter = [[[0 for x in xrange(tests)] for x in xrange(3)] for x in xrange(n_secs)] -all_matching_times_counter = [[[0 for x in xrange(tests)] for x in xrange(2)] for x in xrange(n_secs)] -all_query_duration = [[[0 for x in xrange(tests)] for x in xrange(tests_n_lines)] for x in xrange(n_secs)] -all_match_confidence = [[[0 for x in xrange(tests)] for x in xrange(tests_n_lines)] for x in xrange(n_secs)] - -# agroup results by seconds -for line in range(0, tests_n_lines): - for col in range(0, tests_n_columns): - # for dejavu - all_query_duration[col][line][0] = djv.result_query_duration[line][col] - all_match_confidence[col][line][0] = djv.result_match_confidence[line][col] - - djv_match_result = djv.result_match[line][col] - - if djv_match_result == 'yes': - all_match_counter[col][0][0] += 1 - elif djv_match_result == 'no': - all_match_counter[col][1][0] += 1 - else: - all_match_counter[col][2][0] += 1 - - djv_match_acc = djv.result_matching_times[line][col] - - if djv_match_acc == 0 and djv_match_result == 'yes': - all_matching_times_counter[col][0][0] += 1 - elif djv_match_acc != 0: - all_matching_times_counter[col][1][0] += 1 - -def autolabel(rects,ax): - # attach some text labels - for rect in rects: - height = rect.get_height() - ax.text(rect.get_x()+rect.get_width()/2., 1.05*height, '%d'%int(height), - ha='center', va='bottom') - -def autolabeldoubles(rects,ax): - # attach some text labels - for rect in rects: - height = rect.get_height() - ax.text(rect.get_x()+rect.get_width()/2., 1.05*height, '%s'%round(float(height),3), - ha='center', va='bottom') - -def create_plots(name,results): - for sec in range(0,n_secs): - ind = np.arange(tests_n_lines) # - width = 0.25 # the width of the bars - - fig = plt.figure() - ax = fig.add_subplot(111) - ax.set_xlim([-1*width, 2*width]) - - means_dvj = [x[0] for x in results[sec]] - rects1 = ax.bar(ind, means_dvj, width, color='r') - - # add some - ax.set_ylabel(name) - ax.set_title("%s %s Results" % (options.test_seconds[sec],name)) - ax.set_xticks(ind+width) - - labels = [0 for x in range(0,tests_n_lines)] - for x in range(0,tests_n_lines): - labels[x] = "song %s" % (x+1) - ax.set_xticklabels( labels ) - - box = ax.get_position() - ax.set_position([box.x0, box.y0, box.width * 0.75, box.height]) - - #ax.legend( (rects1[0]), ('Dejavu'), loc='center left', bbox_to_anchor=(1, 0.5)) - - if name == 'Confidence': - autolabel(rects1,ax) - else: - autolabeldoubles(rects1,ax) - - plt.grid() - - fig_name = "%s%s_%s.png" % (options.results_folder,name,options.test_seconds[sec]) - fig.savefig(fig_name) - -create_plots('Confidence',all_match_confidence) -create_plots('Query duration',all_query_duration) - -for sec in range(0,n_secs): - ind = np.arange(3) # - width = 0.25 # the width of the bars - - fig = plt.figure() - ax = fig.add_subplot(111) - ax.set_xlim([-1*width, 2.75]) - - means_dvj = [round(x[0]*100/tests_n_lines,1) for x in all_match_counter[sec]] - rects1 = ax.bar(ind, means_dvj, width, color='r') - - # add some - ax.set_ylabel('Matching Percentage') - ax.set_title('%s Matching Percentage' % options.test_seconds[sec]) - ax.set_xticks(ind+width) - - labels = ['yes','no','invalid'] - ax.set_xticklabels( labels ) - - box = ax.get_position() - ax.set_position([box.x0, box.y0, box.width * 0.75, box.height]) - - #ax.legend( (rects1[0]), ('Dejavu'), loc='center left', bbox_to_anchor=(1, 0.5)) - autolabeldoubles(rects1,ax) - - plt.grid() - - fig_name = "%smatching_perc_%s.png" % (options.results_folder,options.test_seconds[sec]) - fig.savefig(fig_name) - -for sec in range(0,n_secs): - ind = np.arange(2) # - width = 0.25 # the width of the bars - - fig = plt.figure() - ax = fig.add_subplot(111) - ax.set_xlim([-1*width, 1.75]) - - div = all_match_counter[sec][0][0] - if div == 0 : - div = 1000000 - - means_dvj = [round(x[0]*100/div,1) for x in all_matching_times_counter[sec]] - rects1 = ax.bar(ind, means_dvj, width, color='r') - - # add some - ax.set_ylabel('Matching Accuracy') - ax.set_title('%s Matching Times Accuracy' % options.test_seconds[sec]) - ax.set_xticks(ind+width) - - labels = ['yes','no'] - ax.set_xticklabels( labels ) - - box = ax.get_position() - ax.set_position([box.x0, box.y0, box.width * 0.75, box.height]) - - #ax.legend( (rects1[0]), ('Dejavu'), loc='center left', bbox_to_anchor=(1, 0.5)) - autolabeldoubles(rects1,ax) - - plt.grid() - - fig_name = "%smatching_acc_%s.png" % (options.results_folder,options.test_seconds[sec]) - fig.savefig(fig_name) diff --git a/test_dejavu.sh b/test_dejavu.sh new file mode 100644 index 0000000..7ccdd93 --- /dev/null +++ b/test_dejavu.sh @@ -0,0 +1,25 @@ +##################################### +### Dejavu example testing script ### +##################################### + +########### +# Clear out previous results +rm -rf ./results ./temp_audio + +########### +# Fingerprint files of extension mp3 in the ./mp3 folder +python dejavu.py fingerprint ./mp3/ mp3 + +########## +# Run a test suite on the ./mp3 folder by extracting 1, 2, 3, 4, and 5 +# second clips sampled randomly from within each song 8 seconds +# away from start or end, sampling with random seed = 42, and finally +# store results in ./results and log to dejavu-test.log +python run_tests.py \ + --secs 5 \ + --temp ./temp_audio \ + --log-file ./results/dejavu-test.log \ + --padding 8 \ + --seed 42 \ + --results ./results \ + ./mp3 \ No newline at end of file