Merging in new test suite with modifications, mp3 example files to test on, and shell files for automation

This commit is contained in:
Will Drevo 2014-07-03 00:26:50 -04:00
parent 7cbb894501
commit a779ca4cf9
12 changed files with 522 additions and 414 deletions

View file

@ -122,6 +122,47 @@ Or by reading files via scripting functions:
>>> song = djv.recognize(FileRecognizer, "va_us_top_40/wav/07 - Mirrors - Justin Timberlake.wav")
```
## Testing (New!)
Test your Dejavu settings on a corpus of audio files on a number of different metrics:
* Confidence of match (number fingerprints aligned)
* Offset matching accuracy
* Song matching accuracy
* Time to match
An example script is given in `test_dejavu.sh`, shown below:
```bash
#####################################
### Dejavu example testing script ###
#####################################
###########
# Clear out previous results
rm -rf ./results ./temp_audio
###########
# Fingerprint files of extension mp3 in the ./mp3 folder
python dejavu.py fingerprint ./mp3/ mp3
##########
# Run a test suite on the ./mp3 folder by extracting 1, 2, 3, 4, and 5
# second clips sampled randomly from within each song 8 seconds
# away from start or end, sampling offset with random seed = 42, and finally,
# store results in ./results and log to ./results/dejavu-test.log
python run_tests.py \
--secs 5 \
--temp ./temp_audio \
--log-file ./results/dejavu-test.log \
--padding 8 \
--seed 42 \
--results ./results \
./mp3
```
The testing scripts are as of now are a bit rough, and could certainly use some love and attention if you're interested in submitting a PR!
## How does it work?
The algorithm works off a fingerprint based system, much like:

270
dejavu/testing.py Normal file
View file

@ -0,0 +1,270 @@
from __future__ import division
from pydub import AudioSegment
from dejavu.decoder import path_to_songname
from dejavu import Dejavu
from dejavu.fingerprint import *
import traceback
import fnmatch
import os, re, ast
import subprocess
import random
import logging
def set_seed(seed=None):
"""
`seed` as None means that the sampling will be random.
Setting your own seed means that you can produce the
same experiment over and over.
"""
if seed != None:
random.seed(seed)
def get_files_recursive(src, fmt):
"""
`src` is the source directory.
`fmt` is the extension, ie ".mp3" or "mp3", etc.
"""
for root, dirnames, filenames in os.walk(src):
for filename in fnmatch.filter(filenames, '*' + fmt):
yield os.path.join(root, filename)
def get_length_audio(audiopath, extension):
"""
Returns length of audio in seconds.
Returns None if format isn't supported or in case of error.
"""
try:
audio = AudioSegment.from_file(audiopath, extension.replace(".", ""))
except:
print "Error in get_length_audio(): %s" % traceback.format_exc()
return None
return int(len(audio) / 1000.0)
def get_starttime(length, nseconds, padding):
"""
`length` is total audio length in seconds
`nseconds` is amount of time to sample in seconds
`padding` is off-limits seconds at beginning and ending
"""
maximum = length - padding - nseconds
if padding > maximum:
return 0
return random.randint(padding, maximum)
def generate_test_files(src, dest, nseconds, fmts=[".mp3", ".wav"], padding=10):
"""
Generates a test file for each file recursively in `src` directory
of given format using `nseconds` sampled from the audio file.
Results are written to `dest` directory.
`padding` is the number of off-limit seconds and the beginning and
end of a track that won't be sampled in testing. Often you want to
avoid silence, etc.
"""
# create directories if necessary
for directory in [src, dest]:
try:
os.stat(directory)
except:
os.mkdir(directory)
# find files recursively of a given file format
for fmt in fmts:
testsources = get_files_recursive(src, fmt)
for audiosource in testsources:
print "audiosource:", audiosource
filename, extension = os.path.splitext(os.path.basename(audiosource))
length = get_length_audio(audiosource, extension)
starttime = get_starttime(length, nseconds, padding)
test_file_name = "%s_%s_%ssec.%s" % (
os.path.join(dest, filename), starttime,
nseconds, extension.replace(".", ""))
subprocess.check_output([
"ffmpeg", "-y",
"-ss", "%d" % starttime,
'-t' , "%d" % nseconds,
"-i", audiosource,
test_file_name])
def log_msg(msg, log=True, silent=False):
if log:
logging.debug(msg)
if not silent:
print msg
def autolabel(rects, ax):
# attach some text labels
for rect in rects:
height = rect.get_height()
ax.text(rect.get_x() + rect.get_width() / 2., 1.05 * height,
'%d' % int(height), ha='center', va='bottom')
def autolabeldoubles(rects, ax):
# attach some text labels
for rect in rects:
height = rect.get_height()
ax.text(rect.get_x() + rect.get_width() / 2., 1.05 * height,
'%s' % round(float(height), 3), ha='center', va='bottom')
class DejavuTest(object):
def __init__(self, folder, seconds):
super(DejavuTest, self).__init__()
self.test_folder = folder
self.test_seconds = seconds
self.test_songs = []
print "test_seconds", self.test_seconds
self.test_files = [
f for f in os.listdir(self.test_folder)
if os.path.isfile(os.path.join(self.test_folder, f))
and re.findall("[0-9]*sec", f)[0] in self.test_seconds]
print "test_files", self.test_files
self.n_columns = len(self.test_seconds)
self.n_lines = int(len(self.test_files) / self.n_columns)
print "columns:", self.n_columns
print "length of test files:", len(self.test_files)
print "lines:", self.n_lines
# variable match results (yes, no, invalid)
self.result_match = [[0 for x in xrange(self.n_columns)] for x in xrange(self.n_lines)]
print "result_match matrix:", self.result_match
# variable match precision (if matched in the corrected time)
self.result_matching_times = [[0 for x in xrange(self.n_columns)] for x in xrange(self.n_lines)]
# variable mahing time (query time)
self.result_query_duration = [[0 for x in xrange(self.n_columns)] for x in xrange(self.n_lines)]
# variable confidence
self.result_match_confidence = [[0 for x in xrange(self.n_columns)] for x in xrange(self.n_lines)]
self.begin()
def get_column_id (self, secs):
for i, sec in enumerate(self.test_seconds):
if secs == sec:
return i
def get_line_id (self, song):
for i, s in enumerate(self.test_songs):
if song == s:
return i
self.test_songs.append(song)
return len(self.test_songs) - 1
def create_plots(self, name, results, results_folder):
for sec in range(0, len(self.test_seconds)):
ind = np.arange(self.n_lines) #
width = 0.25 # the width of the bars
fig = plt.figure()
ax = fig.add_subplot(111)
ax.set_xlim([-1 * width, 2 * width])
means_dvj = [x[0] for x in results[sec]]
rects1 = ax.bar(ind, means_dvj, width, color='r')
# add some
ax.set_ylabel(name)
ax.set_title("%s %s Results" % (self.test_seconds[sec], name))
ax.set_xticks(ind + width)
labels = [0 for x in range(0, self.n_lines)]
for x in range(0, self.n_lines):
labels[x] = "song %s" % (x+1)
ax.set_xticklabels(labels)
box = ax.get_position()
ax.set_position([box.x0, box.y0, box.width * 0.75, box.height])
#ax.legend( (rects1[0]), ('Dejavu'), loc='center left', bbox_to_anchor=(1, 0.5))
if name == 'Confidence':
autolabel(rects1, ax)
else:
autolabeldoubles(rects1, ax)
plt.grid()
fig_name = os.path.join(results_folder, "%s_%s.png" % (name, self.test_seconds[sec]))
fig.savefig(fig_name)
def begin(self):
for f in self.test_files:
log_msg('--------------------------------------------------')
log_msg('file: %s' % f)
# get column
col = self.get_column_id(re.findall("[0-9]*sec",f)[0])
song = path_to_songname(f).split("_")[0] # format: XXXX_offset_length.mp3
line = self.get_line_id (song)
result = subprocess.check_output(["python", "dejavu.py", 'recognize', 'file', self.test_folder + "/" + f])
if result.strip() == "None":
log_msg('No match')
self.result_match[line][col] = 'no'
self.result_matching_times[line][col] = 0
self.result_query_duration[line][col] = 0
self.result_match_confidence[line][col] = 0
else:
result = result.strip()
result = result.replace(" \'", ' "')
result = result.replace("{\'", '{"')
result = result.replace("\':", '":')
result = result.replace("\',", '",')
# which song did we predict?
result = ast.literal_eval(result)
song_result = result["song_name"]
log_msg('song: %s' % song)
log_msg('song_result: %s' % song_result)
if song_result != song:
log_msg('invalid match')
self.result_match[line][col] = 'invalid'
self.result_matching_times[line][col] = 0
self.result_query_duration[line][col] = 0
self.result_match_confidence[line][col] = 0
else:
log_msg('correct match')
print self.result_match
self.result_match[line][col] = 'yes'
self.result_query_duration[line][col] = round(result[Dejavu.MATCH_TIME],3)
self.result_match_confidence[line][col] = result[Dejavu.CONFIDENCE]
song_start_time = re.findall("\_[^\_]+",f)
song_start_time = song_start_time[0].lstrip("_ ")
result_start_time = round((result[Dejavu.OFFSET] * DEFAULT_WINDOW_SIZE *
DEFAULT_OVERLAP_RATIO) / (DEFAULT_FS), 0)
self.result_matching_times[line][col] = int(result_start_time) - int(song_start_time)
if (abs(self.result_matching_times[line][col]) == 1):
self.result_matching_times[line][col] = 0
log_msg('query duration: %s' % round(result[Dejavu.MATCH_TIME],3))
log_msg('confidence: %s' % result[Dejavu.CONFIDENCE])
log_msg('song start_time: %s' % song_start_time)
log_msg('result start time: %s' % result_start_time)
if (self.result_matching_times[line][col] == 0):
log_msg('accurate match')
else:
log_msg('inaccurate match')
log_msg('--------------------------------------------------\n')

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

2
mp3/about.txt Normal file
View file

@ -0,0 +1,2 @@
* Audio in this folder for testing dejavu is taken from here:
http://freemusicarchive.org/curator/creative_commons/

184
run_tests.py Normal file
View file

@ -0,0 +1,184 @@
from dejavu.testing import *
from dejavu import Dejavu
from optparse import OptionParser
import matplotlib.pyplot as plt
import time
import shutil
usage = "usage: %prog [options] TESTING_AUDIOFOLDER"
parser = OptionParser(usage=usage, version="%prog 1.1")
parser.add_option("--secs",
action="store",
dest="secs",
default=5,
type=int,
help='Number of seconds starting from zero to test')
parser.add_option("--results",
action="store",
dest="results_folder",
default="./dejavu_test_results",
help='Sets the path where the results are saved')
parser.add_option("--temp",
action="store",
dest="temp_folder",
default="./dejavu_temp_testing_files",
help='Sets the path where the temp files are saved')
parser.add_option("--log",
action="store_true",
dest="log",
default=True,
help='Enables logging')
parser.add_option("--silent",
action="store_false",
dest="silent",
default=False,
help='Disables printing')
parser.add_option("--log-file",
dest="log_file",
default="results-compare.log",
help='Set the path and filename of the log file')
parser.add_option("--padding",
action="store",
dest="padding",
default=10,
type=int,
help='Number of seconds to pad choice of place to test from')
parser.add_option("--seed",
action="store",
dest="seed",
default=None,
type=int,
help='Random seed')
options, args = parser.parse_args()
test_folder = args[0]
# set random seed if set by user
set_seed(options.seed)
# ensure results folder exists
try:
os.stat(options.results_folder)
except:
os.mkdir(options.results_folder)
# set logging
if options.log == True:
logging.basicConfig(filename=options.log_file, level=logging.DEBUG)
# set test seconds
test_seconds = ['%dsec' % i for i in range(1, options.secs + 1, 1)]
# generate testing files
for i in range(1, options.secs + 1, 1):
generate_test_files(test_folder, options.temp_folder,
i, padding=options.padding)
# scan files
log_msg("Running Dejavu fingerprinter on files in %s..." % test_folder,
log=options.log, silent=options.silent)
tm = time.time()
djv = DejavuTest(options.temp_folder, test_seconds)
log_msg("finished obtaining results from dejavu in %s" % (time.time() - tm),
log=options.log, silent=options.silent)
tests = 1 # djv
n_secs = len(test_seconds)
# set result variables -> 4d variables
all_match_counter = [[[0 for x in xrange(tests)] for x in xrange(3)] for x in xrange(n_secs)]
all_matching_times_counter = [[[0 for x in xrange(tests)] for x in xrange(2)] for x in xrange(n_secs)]
all_query_duration = [[[0 for x in xrange(tests)] for x in xrange(djv.n_lines)] for x in xrange(n_secs)]
all_match_confidence = [[[0 for x in xrange(tests)] for x in xrange(djv.n_lines)] for x in xrange(n_secs)]
# group results by seconds
for line in range(0, djv.n_lines):
for col in range(0, djv.n_columns):
# for dejavu
all_query_duration[col][line][0] = djv.result_query_duration[line][col]
all_match_confidence[col][line][0] = djv.result_match_confidence[line][col]
djv_match_result = djv.result_match[line][col]
if djv_match_result == 'yes':
all_match_counter[col][0][0] += 1
elif djv_match_result == 'no':
all_match_counter[col][1][0] += 1
else:
all_match_counter[col][2][0] += 1
djv_match_acc = djv.result_matching_times[line][col]
if djv_match_acc == 0 and djv_match_result == 'yes':
all_matching_times_counter[col][0][0] += 1
elif djv_match_acc != 0:
all_matching_times_counter[col][1][0] += 1
# create plots
djv.create_plots('Confidence', all_match_confidence, options.results_folder)
djv.create_plots('Query duration', all_query_duration, options.results_folder)
for sec in range(0, n_secs):
ind = np.arange(3) #
width = 0.25 # the width of the bars
fig = plt.figure()
ax = fig.add_subplot(111)
ax.set_xlim([-1 * width, 2.75])
means_dvj = [round(x[0] * 100 / djv.n_lines, 1) for x in all_match_counter[sec]]
rects1 = ax.bar(ind, means_dvj, width, color='r')
# add some
ax.set_ylabel('Matching Percentage')
ax.set_title('%s Matching Percentage' % test_seconds[sec])
ax.set_xticks(ind + width)
labels = ['yes','no','invalid']
ax.set_xticklabels( labels )
box = ax.get_position()
ax.set_position([box.x0, box.y0, box.width * 0.75, box.height])
#ax.legend((rects1[0]), ('Dejavu'), loc='center left', bbox_to_anchor=(1, 0.5))
autolabeldoubles(rects1,ax)
plt.grid()
fig_name = os.path.join(options.results_folder, "matching_perc_%s.png" % test_seconds[sec])
fig.savefig(fig_name)
for sec in range(0, n_secs):
ind = np.arange(2) #
width = 0.25 # the width of the bars
fig = plt.figure()
ax = fig.add_subplot(111)
ax.set_xlim([-1*width, 1.75])
div = all_match_counter[sec][0][0]
if div == 0 :
div = 1000000
means_dvj = [round(x[0] * 100 / div, 1) for x in all_matching_times_counter[sec]]
rects1 = ax.bar(ind, means_dvj, width, color='r')
# add some
ax.set_ylabel('Matching Accuracy')
ax.set_title('%s Matching Times Accuracy' % test_seconds[sec])
ax.set_xticks(ind + width)
labels = ['yes','no']
ax.set_xticklabels( labels )
box = ax.get_position()
ax.set_position([box.x0, box.y0, box.width * 0.75, box.height])
#ax.legend( (rects1[0]), ('Dejavu'), loc='center left', bbox_to_anchor=(1, 0.5))
autolabeldoubles(rects1,ax)
plt.grid()
fig_name = os.path.join(options.results_folder, "matching_acc_%s.png" % test_seconds[sec])
fig.savefig(fig_name)
# remove temporary folder
shutil.rmtree(options.temp_folder)

View file

@ -1,69 +0,0 @@
import os, subprocess
from os import listdir
from os.path import isfile, join
from optparse import OptionParser
usage = "usage: %prog [options] SONGS_PATH DESTINATION_FOLDER"
parser = OptionParser(usage=usage, version="%prog 1.1")
parser.add_option("-s", "--start",
action="store",
dest="start_time",
type=int,
default=10,
metavar="X",
help='Test files begin on X sec of the original song'
)
parser.add_option("--test-seconds",
action="append",
dest="test_seconds",
type=int,
default=[],
metavar="X",
help='Sets the seconds of the test files'
)
parser.add_option("--audio-format",
action="append",
dest="audio_formats",
default=[],
metavar="FORMAT",
help='Sets audio formats of files to read'
)
(options, args) = parser.parse_args()
if len(args) != 2:
parser.error("wrong number of arguments")
if args[0][len(args[0])-1] != "/":
args[0] += "/"
if args[1][len(args[1])-1] != "/":
args[1] += "/"
if len(options.test_seconds) == 0:
options.test_seconds = [1,2,3,4,5,6,7,8,9,10]
if len(options.audio_formats) == 0:
options.audio_formats = ['wav','mp3']
try:
os.stat(args[1])
except:
os.mkdir(args[1])
test_files = [ f for f in listdir(args[0]) if isfile(join(args[0],f)) and
os.path.splitext(f)[len(os.path.splitext(f))-1][1:] in options.audio_formats ]
for file in test_files:
filename = os.path.basename(file)
filename,extension = os.path.splitext(filename)
for i in options.test_seconds:
test_file_name = "%s%s_%s_%ssec%s" % (args[1],filename,options.start_time,i,extension)
subprocess.check_output(["ffmpeg", "-ss", "%s" % options.start_time, '-t' , "%s" % i, "-i", args[0]+file, test_file_name])

345
test.py
View file

@ -1,345 +0,0 @@
# result generator for dejavu
# TODO: Don't work very well with musics with special chars.
# use test file on the format below, with no special chars and only one "-" to separate artist from song
import os, subprocess, json, re, sys
import logging, time
from os import listdir
from os.path import isfile, join
import numpy as np
import matplotlib.pyplot as plt
from optparse import OptionParser
from dejavu.decoder import path_to_songname
import ast
#####
### Test files are in specific format:
### 'artist_name'-'song_name'_'start_time'_'duration'sec.wav
#####
DEFAULT_FS = 44100
DEFAULT_WINDOW_SIZE = 4096
DEFAULT_OVERLAP_RATIO = 0.5
FIELD_SONG_NAME = 'song_name'
FIELD_CONFIDENCE = 'confidence'
FIELD_QUERY_TIME = 'match_time'
FIELD_OFFSET = 'offset'
# Parse options
usage = "usage: %prog [options] DEJAVU_PATH TEST_FOLDER"
parser = OptionParser(usage=usage, version="%prog 1.1")
parser.add_option("--no-log",
action="store_false",
dest="log",
default=True,
help='Disables logging')
parser.add_option("--log-file",
dest="log_file",
default="results-compare.log",
metavar="LOG_FILE",
help='Set the path and filename of the log file')
parser.add_option("--test-seconds",
action="append",
dest="test_seconds",
default=[],
metavar="Xsec",
help='Appends seconds to test suit')
parser.add_option("--results-folder",
action="store",
dest="results_folder",
metavar="FOLDER",
help='Sets the path where the results are saved')
(options, args) = parser.parse_args()
if len(args) != 2:
parser.error("wrong number of arguments")
if len(options.test_seconds) == 0:
options.test_seconds = ['1sec','2sec','3sec','4sec','5sec','6sec','7sec','8sec','9sec','10sec']
if options.log == True:
logging.basicConfig(filename=options.log_file, level=logging.DEBUG)
if options.results_folder != "" and options.results_folder[len(options.results_folder) - 1] != '/':
options.results_folder += "/"
# ensure results folder exists
try:
os.stat(options.results_folder)
except:
os.mkdir(options.results_folder)
def log_msg(msg):
if options.log == True:
logging.debug(msg)
class DejavuTest (object):
def __init__(self, folder, seconds):
super(DejavuTest, self).__init__()
self.test_folder = folder
self.test_seconds = seconds
self.test_songs = []
self.test_files = [ f for f in listdir(self.test_folder) if isfile(join(self.test_folder,f))
and re.findall("[0-9]*sec",f)[0] in self.test_seconds ]
self.n_columns = len(self.test_seconds)
self.n_lines = len(self.test_files) / self.n_columns
# variable match results (yes, no, invalid)
self.result_match = [[0 for x in xrange(self.n_columns)] for x in xrange(self.n_lines)]
print "columns:", self.n_columns
print "length of test files:", len(self.test_files)
print "lines:", self.n_lines
print "result_match matrix:", self.result_match
# variable match precision (if matched in the corrected time)
self.result_matching_times = [[0 for x in xrange(self.n_columns)] for x in xrange(self.n_lines)]
# variable mahing time (query time)
self.result_query_duration = [[0 for x in xrange(self.n_columns)] for x in xrange(self.n_lines)]
# variable confidence
self.result_match_confidence = [[0 for x in xrange(self.n_columns)] for x in xrange(self.n_lines)]
self.begin()
def get_column_id (self, secs):
for i, sec in enumerate(self.test_seconds):
if secs == sec:
return i
def get_line_id (self, song):
for i, s in enumerate(self.test_songs):
if song == s:
return i
self.test_songs.append(song)
return len(self.test_songs) - 1
def begin(self):
for f in self.test_files:
log_msg('--------------------------------------------------')
log_msg('file: %s' % f)
# get column
col = self.get_column_id(re.findall("[0-9]*sec",f)[0])
song = path_to_songname(f).split("_")[0] # format: XXXX_offset_length.mp3
line = self.get_line_id (song)
result = subprocess.check_output(["python", args[0] + "/dejavu.py", 'recognize', 'file', self.test_folder+"/"+f])
log_msg('RESULT: %s' % result.strip() )
if result.strip() == "None":
log_msg('No match')
self.result_match[line][col] = 'no'
self.result_matching_times[line][col] = 0
self.result_query_duration[line][col] = 0
self.result_match_confidence[line][col] = 0
else:
result = result.strip()
result = result.replace(" \'", ' "')
result = result.replace("{\'", '{"')
result = result.replace("\':", '":')
result = result.replace("\',", '",')
# which song did we predict?
result = ast.literal_eval(result)
print "result", result
song_result = result["song_name"]
log_msg('song: %s' % song)
log_msg('song_result: %s' % song_result)
if song_result != song:
log_msg('invalid match')
self.result_match[line][col] = 'invalid'
self.result_matching_times[line][col] = 0
self.result_query_duration[line][col] = 0
self.result_match_confidence[line][col] = 0
else:
log_msg('correct match')
print self.result_match
self.result_match[line][col] = 'yes'
self.result_query_duration[line][col] = round(result[FIELD_QUERY_TIME],3)
self.result_match_confidence[line][col] = result[FIELD_CONFIDENCE]
song_start_time = re.findall("\_[^\_]+",f)
song_start_time = song_start_time[0].lstrip("_ ")
#result_start_time = round((result[FIELD_SONG_DURATION] * result[FIELD_OFFSET]) / float(result[FIELD_SONG_SPEC_DURATION]), 0)
result_start_time = round((result[FIELD_OFFSET] * DEFAULT_WINDOW_SIZE * DEFAULT_OVERLAP_RATIO) / (DEFAULT_FS),0)
self.result_matching_times[line][col] = int(result_start_time) - int(song_start_time)
if (abs(self.result_matching_times[line][col]) == 1):
self.result_matching_times[line][col] = 0
log_msg('query duration: %s' % round(result[FIELD_QUERY_TIME],3))
log_msg('confidence: %s' % result[FIELD_CONFIDENCE])
log_msg('song start_time: %s' % song_start_time)
log_msg('result start time: %s' % result_start_time)
if (self.result_matching_times[line][col] == 0):
log_msg('accurate match')
else:
log_msg('inaccurate match')
log_msg('--------------------------------------------------\n')
print "obtaining results from dejavu"
log_msg('obtaining results from dejavu')
tm = time.time()
djv = DejavuTest(args[1], options.test_seconds)
print "finished obtaining results from dejavu in %s" % (time.time() - tm)
log_msg("finished obtaining results from dejavu in %s" % (time.time() - tm))
tests_n_lines = djv.n_lines
tests_n_columns = djv.n_columns # len(options.test_seconds)
tests = 1 # djv
n_secs = len(options.test_seconds) # = tests.n_columns
# set result variables -> 4d variables
all_match_counter = [[[0 for x in xrange(tests)] for x in xrange(3)] for x in xrange(n_secs)]
all_matching_times_counter = [[[0 for x in xrange(tests)] for x in xrange(2)] for x in xrange(n_secs)]
all_query_duration = [[[0 for x in xrange(tests)] for x in xrange(tests_n_lines)] for x in xrange(n_secs)]
all_match_confidence = [[[0 for x in xrange(tests)] for x in xrange(tests_n_lines)] for x in xrange(n_secs)]
# agroup results by seconds
for line in range(0, tests_n_lines):
for col in range(0, tests_n_columns):
# for dejavu
all_query_duration[col][line][0] = djv.result_query_duration[line][col]
all_match_confidence[col][line][0] = djv.result_match_confidence[line][col]
djv_match_result = djv.result_match[line][col]
if djv_match_result == 'yes':
all_match_counter[col][0][0] += 1
elif djv_match_result == 'no':
all_match_counter[col][1][0] += 1
else:
all_match_counter[col][2][0] += 1
djv_match_acc = djv.result_matching_times[line][col]
if djv_match_acc == 0 and djv_match_result == 'yes':
all_matching_times_counter[col][0][0] += 1
elif djv_match_acc != 0:
all_matching_times_counter[col][1][0] += 1
def autolabel(rects,ax):
# attach some text labels
for rect in rects:
height = rect.get_height()
ax.text(rect.get_x()+rect.get_width()/2., 1.05*height, '%d'%int(height),
ha='center', va='bottom')
def autolabeldoubles(rects,ax):
# attach some text labels
for rect in rects:
height = rect.get_height()
ax.text(rect.get_x()+rect.get_width()/2., 1.05*height, '%s'%round(float(height),3),
ha='center', va='bottom')
def create_plots(name,results):
for sec in range(0,n_secs):
ind = np.arange(tests_n_lines) #
width = 0.25 # the width of the bars
fig = plt.figure()
ax = fig.add_subplot(111)
ax.set_xlim([-1*width, 2*width])
means_dvj = [x[0] for x in results[sec]]
rects1 = ax.bar(ind, means_dvj, width, color='r')
# add some
ax.set_ylabel(name)
ax.set_title("%s %s Results" % (options.test_seconds[sec],name))
ax.set_xticks(ind+width)
labels = [0 for x in range(0,tests_n_lines)]
for x in range(0,tests_n_lines):
labels[x] = "song %s" % (x+1)
ax.set_xticklabels( labels )
box = ax.get_position()
ax.set_position([box.x0, box.y0, box.width * 0.75, box.height])
#ax.legend( (rects1[0]), ('Dejavu'), loc='center left', bbox_to_anchor=(1, 0.5))
if name == 'Confidence':
autolabel(rects1,ax)
else:
autolabeldoubles(rects1,ax)
plt.grid()
fig_name = "%s%s_%s.png" % (options.results_folder,name,options.test_seconds[sec])
fig.savefig(fig_name)
create_plots('Confidence',all_match_confidence)
create_plots('Query duration',all_query_duration)
for sec in range(0,n_secs):
ind = np.arange(3) #
width = 0.25 # the width of the bars
fig = plt.figure()
ax = fig.add_subplot(111)
ax.set_xlim([-1*width, 2.75])
means_dvj = [round(x[0]*100/tests_n_lines,1) for x in all_match_counter[sec]]
rects1 = ax.bar(ind, means_dvj, width, color='r')
# add some
ax.set_ylabel('Matching Percentage')
ax.set_title('%s Matching Percentage' % options.test_seconds[sec])
ax.set_xticks(ind+width)
labels = ['yes','no','invalid']
ax.set_xticklabels( labels )
box = ax.get_position()
ax.set_position([box.x0, box.y0, box.width * 0.75, box.height])
#ax.legend( (rects1[0]), ('Dejavu'), loc='center left', bbox_to_anchor=(1, 0.5))
autolabeldoubles(rects1,ax)
plt.grid()
fig_name = "%smatching_perc_%s.png" % (options.results_folder,options.test_seconds[sec])
fig.savefig(fig_name)
for sec in range(0,n_secs):
ind = np.arange(2) #
width = 0.25 # the width of the bars
fig = plt.figure()
ax = fig.add_subplot(111)
ax.set_xlim([-1*width, 1.75])
div = all_match_counter[sec][0][0]
if div == 0 :
div = 1000000
means_dvj = [round(x[0]*100/div,1) for x in all_matching_times_counter[sec]]
rects1 = ax.bar(ind, means_dvj, width, color='r')
# add some
ax.set_ylabel('Matching Accuracy')
ax.set_title('%s Matching Times Accuracy' % options.test_seconds[sec])
ax.set_xticks(ind+width)
labels = ['yes','no']
ax.set_xticklabels( labels )
box = ax.get_position()
ax.set_position([box.x0, box.y0, box.width * 0.75, box.height])
#ax.legend( (rects1[0]), ('Dejavu'), loc='center left', bbox_to_anchor=(1, 0.5))
autolabeldoubles(rects1,ax)
plt.grid()
fig_name = "%smatching_acc_%s.png" % (options.results_folder,options.test_seconds[sec])
fig.savefig(fig_name)

25
test_dejavu.sh Normal file
View file

@ -0,0 +1,25 @@
#####################################
### Dejavu example testing script ###
#####################################
###########
# Clear out previous results
rm -rf ./results ./temp_audio
###########
# Fingerprint files of extension mp3 in the ./mp3 folder
python dejavu.py fingerprint ./mp3/ mp3
##########
# Run a test suite on the ./mp3 folder by extracting 1, 2, 3, 4, and 5
# second clips sampled randomly from within each song 8 seconds
# away from start or end, sampling with random seed = 42, and finally
# store results in ./results and log to dejavu-test.log
python run_tests.py \
--secs 5 \
--temp ./temp_audio \
--log-file ./results/dejavu-test.log \
--padding 8 \
--seed 42 \
--results ./results \
./mp3