From 6a6ae94e3d2c5aee5b6cf0f60d26c4f0b4276173 Mon Sep 17 00:00:00 2001 From: Vin Date: Mon, 16 Dec 2013 23:12:50 +0000 Subject: [PATCH 01/10] Refactored the fingerprint module --- dejavu/fingerprint.py | 221 ++++++++++++++++++++++-------------------- 1 file changed, 116 insertions(+), 105 deletions(-) mode change 100644 => 100755 dejavu/fingerprint.py diff --git a/dejavu/fingerprint.py b/dejavu/fingerprint.py old mode 100644 new mode 100755 index d1f78cc..41451f3 --- a/dejavu/fingerprint.py +++ b/dejavu/fingerprint.py @@ -13,19 +13,117 @@ import time import hashlib import pickle +IDX_FREQ_I = 0 +IDX_TIME_J = 1 + +DEFAULT_FS = 44100 +DEFAULT_WINDOW_SIZE = 4096 +DEFAULT_OVERLAP_RATIO = 0.5 +DEFAULT_FAN_VALUE = 15 + +DEFAULT_AMP_MIN = 10 +PEAK_NEIGHBORHOOD_SIZE = 20 +MIN_HASH_TIME_DELTA = 0 + +def fingerprint(channel_samples, + Fs=DEFAULT_FS, + wsize=DEFAULT_WINDOW_SIZE, + wratio=DEFAULT_OVERLAP_RATIO, + fan_value=DEFAULT_FAN_VALUE, + amp_min=DEFAULT_AMP_MIN): + """ + FFT the channel, log transform output, find local maxima, then return + locally sensitive hashes. + """ + # FFT the signal and extract frequency components + arr2D = mlab.specgram( + channel_samples, + NFFT=wsize, + Fs=Fs, + window=mlab.window_hanning, + noverlap=int(wsize * wratio))[0] + + # apply log transform since specgram() returns linear array + arr2D = 10 * np.log10(arr2D) + arr2D[arr2D == -np.inf] = 0 # replace infs with zeros + + # find local maxima + local_maxima = get_2D_peaks(arr2D, plot=False, amp_min=amp_min) + + # return hashes + return generate_hashes(local_maxima, fan_value=fan_value) + +def get_2D_peaks(arr2D, plot=False, amp_min=DEFAULT_AMP_MIN): + + # http://docs.scipy.org/doc/scipy/reference/generated/scipy.ndimage.morphology.iterate_structure.html#scipy.ndimage.morphology.iterate_structure + struct = generate_binary_structure(2, 1) + neighborhood = iterate_structure(struct, PEAK_NEIGHBORHOOD_SIZE) + + # find local maxima using our fliter shape + local_max = maximum_filter(arr2D, footprint=neighborhood) == arr2D + background = (arr2D == 0) + eroded_background = binary_erosion(background, structure=neighborhood, border_value=1) + detected_peaks = local_max - eroded_background # this is a boolean mask of arr2D with True at peaks + + # extract peaks + amps = arr2D[detected_peaks] + j, i = np.where(detected_peaks) + + # filter peaks + amps = amps.flatten() + peaks = zip(i, j, amps) + peaks_filtered = [x for x in peaks if x[2] > amp_min] # freq, time, amp + + # get indices for frequency and time + frequency_idx = [x[1] for x in peaks_filtered] + time_idx = [x[0] for x in peaks_filtered] + + if plot: + # scatter of the peaks + fig, ax = plt.subplots() + ax.imshow(arr2D) + ax.scatter(time_idx, frequency_idx) + ax.set_xlabel('Time') + ax.set_ylabel('Frequency') + ax.set_title("Spectrogram of \"Blurred Lines\" by Robin Thicke"); + plt.gca().invert_yaxis() + plt.show() + + return zip(frequency_idx, time_idx) + +def generate_hashes(peaks, fan_value=DEFAULT_FAN_VALUE): + """ + Hash list structure: + sha1-hash[0:20] time_offset + [(e05b341a9b77a51fd26, 32), ... ] + """ + fingerprinted = set() # to avoid rehashing same pairs + hashes = [] + + for i in range(len(peaks)): + for j in range(fan_value): + if i+j < len(peaks) and not (i, i+j) in fingerprinted: + + freq1 = peaks[i][IDX_FREQ_I] + freq2 = peaks[i+j][IDX_FREQ_I] + t1 = peaks[i][IDX_TIME_J] + t2 = peaks[i+j][IDX_TIME_J] + t_delta = t2 - t1 + + if t_delta >= MIN_HASH_TIME_DELTA: + h = hashlib.sha1("%s|%s|%s" % (str(freq1), str(freq2), str(t_delta))) + hashes.append((h.hexdigest()[0:20], t1)) + + # ensure we don't repeat hashing + fingerprinted.add((i, i+j)) + return hashes + +# TODO: move all of the below to a class with DB access + + class Fingerprinter(): - IDX_FREQ_I = 0 - IDX_TIME_J = 1 - - DEFAULT_FS = 44100 - DEFAULT_WINDOW_SIZE = 4096 - DEFAULT_OVERLAP_RATIO = 0.5 - DEFAULT_FAN_VALUE = 15 - - DEFAULT_AMP_MIN = 10 - PEAK_NEIGHBORHOOD_SIZE = 20 - MIN_HASH_TIME_DELTA = 0 + def __init__(self, config, Fs=DEFAULT_FS, @@ -55,104 +153,15 @@ class Fingerprinter(): hashes = self.process_channel(samples, song_id=sid) print "Generated %d hashes" % len(hashes) self.db.insert_hashes(hashes) - + + # TODO: put this in another module def match(self, samples): """Used for matching unknown songs""" hashes = self.process_channel(samples) matches = self.db.return_matches(hashes) return matches - def process_channel(self, channel_samples, song_id=None): - """ - FFT the channel, log transform output, find local maxima, then return - locally sensitive hashes. - """ - # FFT the signal and extract frequency components - arr2D = mlab.specgram( - channel_samples, - NFFT=self.window_size, - Fs=self.Fs, - window=mlab.window_hanning, - noverlap=self.noverlap)[0] - - # apply log transform since specgram() returns linear array - arr2D = 10 * np.log10(arr2D) - arr2D[arr2D == -np.inf] = 0 # replace infs with zeros - - # find local maxima - local_maxima = self.get_2D_peaks(arr2D, plot=False) - - # return hashes - return self.generate_hashes(local_maxima, song_id=song_id) - - def get_2D_peaks(self, arr2D, plot=False): - - # http://docs.scipy.org/doc/scipy/reference/generated/scipy.ndimage.morphology.iterate_structure.html#scipy.ndimage.morphology.iterate_structure - struct = generate_binary_structure(2, 1) - neighborhood = iterate_structure(struct, Fingerprinter.PEAK_NEIGHBORHOOD_SIZE) - - # find local maxima using our fliter shape - local_max = maximum_filter(arr2D, footprint=neighborhood) == arr2D - background = (arr2D == 0) - eroded_background = binary_erosion(background, structure=neighborhood, border_value=1) - detected_peaks = local_max - eroded_background # this is a boolean mask of arr2D with True at peaks - - # extract peaks - amps = arr2D[detected_peaks] - j, i = np.where(detected_peaks) - - # filter peaks - amps = amps.flatten() - peaks = zip(i, j, amps) - peaks_filtered = [x for x in peaks if x[2] > self.amp_min] # freq, time, amp - - # get indices for frequency and time - frequency_idx = [x[1] for x in peaks_filtered] - time_idx = [x[0] for x in peaks_filtered] - - if plot: - # scatter of the peaks - fig, ax = plt.subplots() - ax.imshow(arr2D) - ax.scatter(time_idx, frequency_idx) - ax.set_xlabel('Time') - ax.set_ylabel('Frequency') - ax.set_title("Spectrogram of \"Blurred Lines\" by Robin Thicke"); - plt.gca().invert_yaxis() - plt.show() - - return zip(frequency_idx, time_idx) - - def generate_hashes(self, peaks, song_id=None): - """ - Hash list structure: - sha1-hash[0:20] song_id, time_offset - [(e05b341a9b77a51fd26, (3, 32)), ... ] - """ - fingerprinted = set() # to avoid rehashing same pairs - hashes = [] - - for i in range(len(peaks)): - for j in range(self.fan_value): - if i+j < len(peaks) and not (i, i+j) in fingerprinted: - - freq1 = peaks[i][Fingerprinter.IDX_FREQ_I] - freq2 = peaks[i+j][Fingerprinter.IDX_FREQ_I] - t1 = peaks[i][Fingerprinter.IDX_TIME_J] - t2 = peaks[i+j][Fingerprinter.IDX_TIME_J] - t_delta = t2 - t1 - - if t_delta >= Fingerprinter.MIN_HASH_TIME_DELTA: - h = hashlib.sha1("%s|%s|%s" % (str(freq1), str(freq2), str(t_delta))) - hashes.append((h.hexdigest()[0:20], (song_id, t1))) - - # ensure we don't repeat hashing - fingerprinted.add((i, i+j)) - return hashes - - def insert_into_db(self, key, value): - self.db.insert(key, value) - + # TODO: this function has nothing to do with fingerprinting. is it needed? def print_stats(self): iterable = self.db.get_iterable_kv_pairs() @@ -168,10 +177,12 @@ class Fingerprinter(): for song_id, count in counter.iteritems(): song_name = self.song_names[song_id] print "%s has %d spectrogram peaks" % (song_name, count) - + + # this does... what? this seems to only be used for the above function def set_song_names(self, wpaths): self.song_names = wpaths - + + # TODO: put this in another module def align_matches(self, matches, starttime, record_seconds=0, verbose=False): """ Finds hash matches that align in time with other matches and finds From a4ed61265889b63bd9bd76370f847dc50c5c341c Mon Sep 17 00:00:00 2001 From: Vin Date: Mon, 16 Dec 2013 23:38:58 +0000 Subject: [PATCH 02/10] Moved main Dejavu class from dejavu.control to dejavu --- dejavu/__init__.py | 176 ++++++++++++++++++++++++++++++++++++++++++ dejavu/control.py | 107 ------------------------- dejavu/fingerprint.py | 58 +------------- go.py | 2 +- 4 files changed, 178 insertions(+), 165 deletions(-) mode change 100644 => 100755 dejavu/__init__.py delete mode 100644 dejavu/control.py mode change 100644 => 100755 go.py diff --git a/dejavu/__init__.py b/dejavu/__init__.py old mode 100644 new mode 100755 index e69de29..b44897d --- a/dejavu/__init__.py +++ b/dejavu/__init__.py @@ -0,0 +1,176 @@ +from dejavu.database import SQLDatabase +from dejavu.convert import Converter +import dejavu.fingerprint as fingerprint +from scipy.io import wavfile +from multiprocessing import Process +import wave, os +import random + +DEBUG = False + +class Dejavu(): + + def __init__(self, config): + + self.config = config + + # initialize db + database = SQLDatabase( + self.config.get(SQLDatabase.CONNECTION, SQLDatabase.KEY_HOSTNAME), + self.config.get(SQLDatabase.CONNECTION, SQLDatabase.KEY_USERNAME), + self.config.get(SQLDatabase.CONNECTION, SQLDatabase.KEY_PASSWORD), + self.config.get(SQLDatabase.CONNECTION, SQLDatabase.KEY_DATABASE)) + self.db = database + + # create components + self.converter = Converter() + #self.fingerprinter = Fingerprinter(self.config) + self.db.setup() + + # get songs previously indexed + self.songs = self.db.get_songs() + self.songnames_set = set() # to know which ones we've computed before + if self.songs: + for song in self.songs: + song_id = song[SQLDatabase.FIELD_SONG_ID] + song_name = song[SQLDatabase.FIELD_SONGNAME] + self.songnames_set.add(song_name) + print "Added: %s to the set of fingerprinted songs..." % song_name + + def chunkify(self, lst, n): + """ + Splits a list into roughly n equal parts. + http://stackoverflow.com/questions/2130016/splitting-a-list-of-arbitrary-size-into-only-roughly-n-equal-parts + """ + return [lst[i::n] for i in xrange(n)] + + def fingerprint(self, path, output, extensions, nprocesses): + + # convert files, shuffle order + files = self.converter.find_files(path, extensions) + random.shuffle(files) + files_split = self.chunkify(files, nprocesses) + + # split into processes here + processes = [] + for i in range(nprocesses): + + # need database instance since mysql connections shouldn't be shared across processes + sql_connection = SQLDatabase( + self.config.get(SQLDatabase.CONNECTION, SQLDatabase.KEY_HOSTNAME), + self.config.get(SQLDatabase.CONNECTION, SQLDatabase.KEY_USERNAME), + self.config.get(SQLDatabase.CONNECTION, SQLDatabase.KEY_PASSWORD), + self.config.get(SQLDatabase.CONNECTION, SQLDatabase.KEY_DATABASE)) + + # create process and start it + p = Process(target=self.fingerprint_worker, args=(files_split[i], sql_connection, output)) + p.start() + processes.append(p) + + # wait for all processes to complete + for p in processes: + p.join() + + # delete orphans + # print "Done fingerprinting. Deleting orphaned fingerprints..." + # TODO: need a more performant query in database.py for the + #self.fingerprinter.db.delete_orphans() + + def fingerprint_worker(self, files, sql_connection, output): + + for filename, extension in files: + + # if there are already fingerprints in database, don't re-fingerprint or convert + song_name = os.path.basename(filename).split(".")[0] + if DEBUG and song_name in self.songnames_set: + print("-> Already fingerprinted, continuing...") + continue + + # convert to WAV + wavout_path = self.converter.convert(filename, extension, Converter.WAV, output, song_name) + + # insert song name into database + song_id = sql_connection.insert_song(song_name) + + # for each channel perform FFT analysis and fingerprinting + channels, Fs = self.extract_channels(wavout_path) + for c in range(len(channels)): + channel = channels[c] + print "-> Fingerprinting channel %d of song %s..." % (c+1, song_name) + hashes = fingerprint.fingerprint(channel, Fs=Fs) + sql_connection.insert_hashes(song_id, hashes) + + # only after done fingerprinting do confirm + sql_connection.set_song_fingerprinted(song_id) + + def extract_channels(self, path): + """ + Reads channels from disk. + Returns a tuple with (channels, sample_rate) + """ + channels = [] + Fs, frames = wavfile.read(path) + wave_object = wave.open(path) + nchannels, sampwidth, framerate, num_frames, comptype, compname = wave_object.getparams() + #assert Fs == self.fingerprinter.Fs + + for channel in range(nchannels): + channels.append(frames[:, channel]) + return (channels, Fs) + + def match(self, samples, Fs=fingerprint.DEFAULT_FS): + hashes = fingerprint.fingerprint(samples, Fs=Fs) + return self.db.return_matches(hashes) + + def align_matches(self, matches, starttime, record_seconds=None): + """ + Finds hash matches that align in time with other matches and finds + consensus about which hashes are "true" signal from the audio. + + Returns a dictionary with match information. + """ + # align by diffs + diff_counter = {} + largest = 0 + largest_count = 0 + song_id = -1 + for tup in matches: + sid, diff = tup + if not diff in diff_counter: + diff_counter[diff] = {} + if not sid in diff_counter[diff]: + diff_counter[diff][sid] = 0 + diff_counter[diff][sid] += 1 + + if diff_counter[diff][sid] > largest_count: + largest = diff + largest_count = diff_counter[diff][sid] + song_id = sid + + if DEBUG: + print("Diff is %d with %d offset-aligned matches" % (largest, largest_count)) + + # extract idenfication + song = self.db.get_song_by_id(song_id) + if song: + songname = song.get(SQLDatabase.FIELD_SONGNAME, None) + else: + return None + songname = songname.replace("_", " ") + elapsed = time.time() - starttime + + if DEBUG: + print("Song is %s (song ID = %d) identification took %f seconds" % (songname, song_id, elapsed)) + + # return match info + song = { + "song_id" : song_id, + "song_name" : songname, + "match_time" : elapsed, + "confidence" : largest_count + } + + if record_seconds: + song['record_time'] = record_seconds + + return song \ No newline at end of file diff --git a/dejavu/control.py b/dejavu/control.py deleted file mode 100644 index 606e366..0000000 --- a/dejavu/control.py +++ /dev/null @@ -1,107 +0,0 @@ -from dejavu.database import SQLDatabase -from dejavu.convert import Converter -from dejavu.fingerprint import Fingerprinter -from scipy.io import wavfile -from multiprocessing import Process -import wave, os -import random - -class Dejavu(): - - def __init__(self, config): - - self.config = config - - # create components - self.converter = Converter() - self.fingerprinter = Fingerprinter(self.config) - self.fingerprinter.db.setup() - - # get songs previously indexed - self.songs = self.fingerprinter.db.get_songs() - self.songnames_set = set() # to know which ones we've computed before - if self.songs: - for song in self.songs: - song_id = song[SQLDatabase.FIELD_SONG_ID] - song_name = song[SQLDatabase.FIELD_SONGNAME] - self.songnames_set.add(song_name) - print "Added: %s to the set of fingerprinted songs..." % song_name - - def chunkify(self, lst, n): - """ - Splits a list into roughly n equal parts. - http://stackoverflow.com/questions/2130016/splitting-a-list-of-arbitrary-size-into-only-roughly-n-equal-parts - """ - return [lst[i::n] for i in xrange(n)] - - def fingerprint(self, path, output, extensions, nprocesses): - - # convert files, shuffle order - files = self.converter.find_files(path, extensions) - random.shuffle(files) - files_split = self.chunkify(files, nprocesses) - - # split into processes here - processes = [] - for i in range(nprocesses): - - # need database instance since mysql connections shouldn't be shared across processes - sql_connection = SQLDatabase( - self.config.get(SQLDatabase.CONNECTION, SQLDatabase.KEY_HOSTNAME), - self.config.get(SQLDatabase.CONNECTION, SQLDatabase.KEY_USERNAME), - self.config.get(SQLDatabase.CONNECTION, SQLDatabase.KEY_PASSWORD), - self.config.get(SQLDatabase.CONNECTION, SQLDatabase.KEY_DATABASE)) - - # create process and start it - p = Process(target=self.fingerprint_worker, args=(files_split[i], sql_connection, output)) - p.start() - processes.append(p) - - # wait for all processes to complete - for p in processes: - p.join() - - # delete orphans - # print "Done fingerprinting. Deleting orphaned fingerprints..." - # TODO: need a more performant query in database.py for the - #self.fingerprinter.db.delete_orphans() - - def fingerprint_worker(self, files, sql_connection, output): - - for filename, extension in files: - - # if there are already fingerprints in database, don't re-fingerprint or convert - song_name = os.path.basename(filename).split(".")[0] - if song_name in self.songnames_set: - print "-> Already fingerprinted, continuing..." - continue - - # convert to WAV - wavout_path = self.converter.convert(filename, extension, Converter.WAV, output, song_name) - - # insert song name into database - song_id = sql_connection.insert_song(song_name) - - # for each channel perform FFT analysis and fingerprinting - channels = self.extract_channels(wavout_path) - for c in range(len(channels)): - channel = channels[c] - print "-> Fingerprinting channel %d of song %s..." % (c+1, song_name) - self.fingerprinter.fingerprint(channel, wavout_path, song_id, c+1) - - # only after done fingerprinting do confirm - sql_connection.set_song_fingerprinted(song_id) - - def extract_channels(self, path): - """ - Reads channels from disk. - """ - channels = [] - Fs, frames = wavfile.read(path) - wave_object = wave.open(path) - nchannels, sampwidth, framerate, num_frames, comptype, compname = wave_object.getparams() - assert Fs == self.fingerprinter.Fs - - for channel in range(nchannels): - channels.append(frames[:, channel]) - return channels \ No newline at end of file diff --git a/dejavu/fingerprint.py b/dejavu/fingerprint.py index 41451f3..6108799 100755 --- a/dejavu/fingerprint.py +++ b/dejavu/fingerprint.py @@ -133,12 +133,7 @@ class Fingerprinter(): amp_min=DEFAULT_AMP_MIN): self.config = config - database = SQLDatabase( - self.config.get(SQLDatabase.CONNECTION, SQLDatabase.KEY_HOSTNAME), - self.config.get(SQLDatabase.CONNECTION, SQLDatabase.KEY_USERNAME), - self.config.get(SQLDatabase.CONNECTION, SQLDatabase.KEY_PASSWORD), - self.config.get(SQLDatabase.CONNECTION, SQLDatabase.KEY_DATABASE)) - self.db = database + self.Fs = Fs self.dt = 1.0 / self.Fs @@ -183,55 +178,4 @@ class Fingerprinter(): self.song_names = wpaths # TODO: put this in another module - def align_matches(self, matches, starttime, record_seconds=0, verbose=False): - """ - Finds hash matches that align in time with other matches and finds - consensus about which hashes are "true" signal from the audio. - - Returns a dictionary with match information. - """ - # align by diffs - diff_counter = {} - largest = 0 - largest_count = 0 - song_id = -1 - for tup in matches: - sid, diff = tup - if not diff in diff_counter: - diff_counter[diff] = {} - if not sid in diff_counter[diff]: - diff_counter[diff][sid] = 0 - diff_counter[diff][sid] += 1 - if diff_counter[diff][sid] > largest_count: - largest = diff - largest_count = diff_counter[diff][sid] - song_id = sid - - if verbose: - print "Diff is %d with %d offset-aligned matches" % (largest, largest_count) - - # extract idenfication - song = self.db.get_song_by_id(song_id) - if song: - songname = song.get(SQLDatabase.FIELD_SONGNAME, None) - else: - return None - songname = songname.replace("_", " ") - elapsed = time.time() - starttime - - if verbose: - print "Song is %s (song ID = %d) identification took %f seconds" % (songname, song_id, elapsed) - - # return match info - song = { - "song_id" : song_id, - "song_name" : songname, - "match_time" : elapsed, - "confidence" : largest_count - } - - if record_seconds: - song['record_time'] = record_seconds - - return song diff --git a/go.py b/go.py old mode 100644 new mode 100755 index 2aaee89..9108f4e --- a/go.py +++ b/go.py @@ -1,4 +1,4 @@ -from dejavu.control import Dejavu +from dejavu import Dejavu from ConfigParser import ConfigParser import warnings warnings.filterwarnings("ignore") From 3bc507d8e42f84e395209fbd6d7504e8ef799080 Mon Sep 17 00:00:00 2001 From: Vin Date: Tue, 17 Dec 2013 00:48:49 +0000 Subject: [PATCH 03/10] Changed insert_hashes to fit the new data format --- dejavu/database.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) mode change 100644 => 100755 dejavu/database.py diff --git a/dejavu/database.py b/dejavu/database.py old mode 100644 new mode 100755 index 018ddd4..78b1237 --- a/dejavu/database.py +++ b/dejavu/database.py @@ -260,14 +260,13 @@ class SQLDatabase(Database): """ return self.query(None) - def insert_hashes(self, hashes): + def insert_hashes(self, sid, hashes): """ Insert series of hash => song_id, offset values into the database. """ - # TODO: Fix this when hashes will be a new format. values = [] - for hash, (sid, offset) in hashes: + for hash, offset in hashes: values.append((hash, sid, offset)) with self.cursor() as cur: From 25bf97e813fc28001e0a45d4768608f81d036bb2 Mon Sep 17 00:00:00 2001 From: Vin Date: Tue, 17 Dec 2013 16:37:06 +0000 Subject: [PATCH 04/10] Changed a method to fit the new hash tuples --- dejavu/database.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/dejavu/database.py b/dejavu/database.py index a08bfcd..5d0222f 100755 --- a/dejavu/database.py +++ b/dejavu/database.py @@ -277,15 +277,11 @@ class SQLDatabase(Database): def return_matches(self, hashes): """ Return the (song_id, offset_diff) tuples associated with - a list of - - sha1 => (None, sample_offset) - - values. + a list of (sha1, sample_offset) values. """ # Create a dictionary of hash => offset pairs for later lookups mapper = {} - for hash, (_, offset) in hashes: + for hash, offset in hashes: mapper[hash.upper()] = offset # Get an iteratable of all the hashes we need From f02ab9419255b4d7dcf6705f0c77f1e303327d57 Mon Sep 17 00:00:00 2001 From: Vin Date: Tue, 17 Dec 2013 20:18:55 +0000 Subject: [PATCH 05/10] Cleaned up Dejavu class calls (find_matches, align_matches) --- dejavu/__init__.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/dejavu/__init__.py b/dejavu/__init__.py index b44897d..33414f7 100755 --- a/dejavu/__init__.py +++ b/dejavu/__init__.py @@ -118,11 +118,22 @@ class Dejavu(): channels.append(frames[:, channel]) return (channels, Fs) - def match(self, samples, Fs=fingerprint.DEFAULT_FS): + def fingerprint(self, filepath, song_name=None): + # TODO: replace with something that handles all audio formats + channels, Fs = self.extract_channels(path) + if not song_name: + song_name = os.path.basename(filename).split(".")[0] + song_id = self.db.insert_song(song_name) + + for data in channels: + hashes = fingerprint.fingerprint(data, Fs=Fs) + self.db.insert_hashes(song_id, hashes) + + def find_matches(self, samples, Fs=fingerprint.DEFAULT_FS): hashes = fingerprint.fingerprint(samples, Fs=Fs) return self.db.return_matches(hashes) - def align_matches(self, matches, starttime, record_seconds=None): + def align_matches(self, matches): """ Finds hash matches that align in time with other matches and finds consensus about which hashes are "true" signal from the audio. @@ -156,8 +167,6 @@ class Dejavu(): songname = song.get(SQLDatabase.FIELD_SONGNAME, None) else: return None - songname = songname.replace("_", " ") - elapsed = time.time() - starttime if DEBUG: print("Song is %s (song ID = %d) identification took %f seconds" % (songname, song_id, elapsed)) @@ -166,11 +175,7 @@ class Dejavu(): song = { "song_id" : song_id, "song_name" : songname, - "match_time" : elapsed, "confidence" : largest_count } - - if record_seconds: - song['record_time'] = record_seconds return song \ No newline at end of file From 371742a3140aeda6cba66c92e0962ccef2e4568f Mon Sep 17 00:00:00 2001 From: Vin Date: Tue, 17 Dec 2013 20:55:20 +0000 Subject: [PATCH 06/10] Began moving recognizer functionality into separate classes --- dejavu/recognize.py | 66 ++++++++++++++++++++++++++++++++------------- 1 file changed, 48 insertions(+), 18 deletions(-) mode change 100644 => 100755 dejavu/recognize.py diff --git a/dejavu/recognize.py b/dejavu/recognize.py old mode 100644 new mode 100755 index bcb5d3c..f6a5de5 --- a/dejavu/recognize.py +++ b/dejavu/recognize.py @@ -1,5 +1,7 @@ from multiprocessing import Queue, Process from dejavu.database import SQLDatabase +import dejavu.fingerprint +from dejavu import Dejavu from scipy.io import wavfile import wave import numpy as np @@ -8,6 +10,52 @@ import sys import time import array + +class BaseRecognizer(object): + + def __init__(self, dejavu): + self.dejavu = dejavu + self.Fs = dejavu.fingerprint.DEFAULT_FS + + def recognize(self, *data): + matches = [] + for d in data: + matches.extend(self.dejavu.find_matches(data, Fs=self.Fs)) + return self.dejavu.align_matches(matches) + + +class WaveFileRecognizer(BaseRecognizer): + + def __init__(self, dejavu): + super(BaseRecognizer, self).__init__(dejavu) + + def recognize_file(self, filepath): + Fs, frames = wavfile.read(filename) + self.Fs = Fs + + wave_object = wave.open(filename) + nchannels, sampwidth, framerate, num_frames, comptype, compname = wave_object.getparams() + + channels = [] + for channel in range(nchannels): + channels.append(frames[:, channel]) + + t = time.time() + match = self.recognize(*channels) + t = time.time() - t + + if match: + match['match_time'] = t + + return match + + +class MicrophoneRecognizer(BaseRecognizer): + pass + + + + class Recognizer(object): CHUNK = 8192 # 44100 is a multiple of 1225 @@ -20,24 +68,6 @@ class Recognizer(object): self.fingerprinter = fingerprinter self.config = config self.audio = pyaudio.PyAudio() - - def read(self, filename, verbose=False): - - # read file into channels - channels = [] - Fs, frames = wavfile.read(filename) - wave_object = wave.open(filename) - nchannels, sampwidth, framerate, num_frames, comptype, compname = wave_object.getparams() - for channel in range(nchannels): - channels.append(frames[:, channel]) - - # get matches - starttime = time.time() - matches = [] - for channel in channels: - matches.extend(self.fingerprinter.match(channel)) - - return self.fingerprinter.align_matches(matches, starttime, verbose=verbose) def listen(self, seconds=10, verbose=False): From d25c564d4f82f8e3c7c8fa023d05f3b9fd3ed791 Mon Sep 17 00:00:00 2001 From: Vin Date: Tue, 17 Dec 2013 21:57:59 +0000 Subject: [PATCH 07/10] Finished the changes to the recognize module - Recognizers now use a class structure - Generic code for matching is in BaseRecognizer - Two recognizers are available: - Wave file recognizer - Recording recognizer --- dejavu/recognize.py | 121 ++++++++++++++++++++++++++------------------ 1 file changed, 72 insertions(+), 49 deletions(-) diff --git a/dejavu/recognize.py b/dejavu/recognize.py index f6a5de5..6cc1bd4 100755 --- a/dejavu/recognize.py +++ b/dejavu/recognize.py @@ -10,6 +10,10 @@ import sys import time import array +CHUNK = 8192 # 44100 is a multiple of 1225 +FORMAT = pyaudio.paInt16 +CHANNELS = 2 +RATE = 44100 class BaseRecognizer(object): @@ -17,19 +21,25 @@ class BaseRecognizer(object): self.dejavu = dejavu self.Fs = dejavu.fingerprint.DEFAULT_FS - def recognize(self, *data): + def _recognize(self, *data): matches = [] for d in data: matches.extend(self.dejavu.find_matches(data, Fs=self.Fs)) return self.dejavu.align_matches(matches) + + def recognize(self): + pass # base class does nothing + + class WaveFileRecognizer(BaseRecognizer): - def __init__(self, dejavu): + def __init__(self, dejavu, filename=None): super(BaseRecognizer, self).__init__(dejavu) + self.filename = filename - def recognize_file(self, filepath): + def recognize_file(self, filename): Fs, frames = wavfile.read(filename) self.Fs = Fs @@ -41,62 +51,75 @@ class WaveFileRecognizer(BaseRecognizer): channels.append(frames[:, channel]) t = time.time() - match = self.recognize(*channels) + match = self._recognize(*channels) t = time.time() - t if match: match['match_time'] = t return match + + def recognize(self): + return self.recognize_file(self.filename) class MicrophoneRecognizer(BaseRecognizer): - pass - - - -class Recognizer(object): - - CHUNK = 8192 # 44100 is a multiple of 1225 - FORMAT = pyaudio.paInt16 - CHANNELS = 2 - RATE = 44100 - - def __init__(self, fingerprinter, config): - - self.fingerprinter = fingerprinter - self.config = config + def __init__(self, dejavu, seconds=None) + super(BaseRecognizer, self).__init__(dejavu) self.audio = pyaudio.PyAudio() + self.stream = None + self.data = [] + self.channels = CHANNELS + self.chunk_size = CHUNK + self.rate = RATE + self.recorded = False - def listen(self, seconds=10, verbose=False): + def start_recording(self, channels=CHANNELS, rate=RATE, chunk=CHUNK): + self.chunk_size = chunk + self.channels = channels + self.recorded = False + self.rate = rate + + if self.stream: + self.stream.stop_stream() + self.stream.close() + + self.stream = self.audio.open(format=FORMAT, + channels=channels, + rate=rate, + input=True, + frames_per_buffer=chunk) + + self.data = [[] for i in range(channels)] + + def process_recording(self): + data = self.stream.read(self.chunk_size) + nums = np.fromstring(data, np.int16) + for c in range(self.channels): + self.data[c].extend(nums[c::c+1]) + + def stop_recording(self): + self.stream.stop_stream() + self.stream.close() + self.stream = None + self.recorded = True + + def recognize_recording(self): + if not self.recorded: + raise NoRecordingError("Recording was not complete/begun") + return self._recognize(*data) + + def get_recorded_time(self): + return len(self.data[0]) / self.rate + + def recognize(self): + self.start_recording() + for i in range(0, int(self.rate / self.chunk * self.seconds)): + self.process_recording() + self.stop_recording() + return self.recognize_recording() + +class NoRecordingError(Exception): + pass - # open stream - stream = self.audio.open(format=Recognizer.FORMAT, - channels=Recognizer.CHANNELS, - rate=Recognizer.RATE, - input=True, - frames_per_buffer=Recognizer.CHUNK) - - # record - if verbose: print("* recording") - left, right = [], [] - for i in range(0, int(Recognizer.RATE / Recognizer.CHUNK * seconds)): - data = stream.read(Recognizer.CHUNK) - nums = np.fromstring(data, np.int16) - left.extend(nums[1::2]) - right.extend(nums[0::2]) - if verbose: print("* done recording") - - # close and stop the stream - stream.stop_stream() - stream.close() - - # match both channels - starttime = time.time() - matches = [] - matches.extend(self.fingerprinter.match(left)) - matches.extend(self.fingerprinter.match(right)) - - # align and return - return self.fingerprinter.align_matches(matches, starttime, record_seconds=seconds, verbose=verbose) \ No newline at end of file From 788b3acebfd202b23d15bf4a1f984d0b680897d7 Mon Sep 17 00:00:00 2001 From: Vin Date: Tue, 17 Dec 2013 22:03:50 +0000 Subject: [PATCH 08/10] Added missing 'self' --- dejavu/recognize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dejavu/recognize.py b/dejavu/recognize.py index 6cc1bd4..d743b16 100755 --- a/dejavu/recognize.py +++ b/dejavu/recognize.py @@ -108,7 +108,7 @@ class MicrophoneRecognizer(BaseRecognizer): def recognize_recording(self): if not self.recorded: raise NoRecordingError("Recording was not complete/begun") - return self._recognize(*data) + return self._recognize(*self.data) def get_recorded_time(self): return len(self.data[0]) / self.rate From 614ad0d4cc1a2d92d3540aa73e39435e07638afc Mon Sep 17 00:00:00 2001 From: Vin Date: Tue, 17 Dec 2013 22:05:09 +0000 Subject: [PATCH 09/10] Moved mic recording constants inside MicrophoneRecognizer --- dejavu/recognize.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/dejavu/recognize.py b/dejavu/recognize.py index d743b16..024e62f 100755 --- a/dejavu/recognize.py +++ b/dejavu/recognize.py @@ -10,10 +10,6 @@ import sys import time import array -CHUNK = 8192 # 44100 is a multiple of 1225 -FORMAT = pyaudio.paInt16 -CHANNELS = 2 -RATE = 44100 class BaseRecognizer(object): @@ -65,6 +61,11 @@ class WaveFileRecognizer(BaseRecognizer): class MicrophoneRecognizer(BaseRecognizer): + CHUNK = 8192 # 44100 is a multiple of 1225 + FORMAT = pyaudio.paInt16 + CHANNELS = 2 + RATE = 44100 + def __init__(self, dejavu, seconds=None) super(BaseRecognizer, self).__init__(dejavu) self.audio = pyaudio.PyAudio() From 1fcff7e2c5f68d0c672e1610ac9290459387d80e Mon Sep 17 00:00:00 2001 From: Vin Date: Tue, 17 Dec 2013 22:12:21 +0000 Subject: [PATCH 10/10] Fixed some import/class mistakes --- dejavu/__init__.py | 6 +++--- dejavu/recognize.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/dejavu/__init__.py b/dejavu/__init__.py index 33414f7..340db94 100755 --- a/dejavu/__init__.py +++ b/dejavu/__init__.py @@ -1,6 +1,6 @@ from dejavu.database import SQLDatabase from dejavu.convert import Converter -import dejavu.fingerprint as fingerprint +import fingerprint from scipy.io import wavfile from multiprocessing import Process import wave, os @@ -44,7 +44,7 @@ class Dejavu(): """ return [lst[i::n] for i in xrange(n)] - def fingerprint(self, path, output, extensions, nprocesses): + def do_fingerprint(self, path, output, extensions, nprocesses): # convert files, shuffle order files = self.converter.find_files(path, extensions) @@ -118,7 +118,7 @@ class Dejavu(): channels.append(frames[:, channel]) return (channels, Fs) - def fingerprint(self, filepath, song_name=None): + def fingerprint_file(self, filepath, song_name=None): # TODO: replace with something that handles all audio formats channels, Fs = self.extract_channels(path) if not song_name: diff --git a/dejavu/recognize.py b/dejavu/recognize.py index 024e62f..68700fd 100755 --- a/dejavu/recognize.py +++ b/dejavu/recognize.py @@ -32,7 +32,7 @@ class BaseRecognizer(object): class WaveFileRecognizer(BaseRecognizer): def __init__(self, dejavu, filename=None): - super(BaseRecognizer, self).__init__(dejavu) + super(WaveFileRecognizer, self).__init__(dejavu) self.filename = filename def recognize_file(self, filename): @@ -66,8 +66,8 @@ class MicrophoneRecognizer(BaseRecognizer): CHANNELS = 2 RATE = 44100 - def __init__(self, dejavu, seconds=None) - super(BaseRecognizer, self).__init__(dejavu) + def __init__(self, dejavu, seconds=None): + super(MicrophoneRecognizer, self).__init__(dejavu) self.audio = pyaudio.PyAudio() self.stream = None self.data = []