migrated code to python 3.6.6 and refactored some code to improve it.

2024-11-23 11:09:52 +00:00 · 2019-09-18 12:39:59 -03:00 · 2019-09-18 12:39:59 -03:00 · 78dfef04d3
commit 78dfef04d3
parent d2b8761eb3
18 changed files with 682 additions and 661 deletions
--- a/dejavu.cnf.SAMPLE
+++ b/dejavu.cnf.SAMPLE
@ -2,7 +2,7 @@
    "database": {
        "host": "127.0.0.1",
        "user": "root",
-        "passwd": "12345678", 
+        "password": "rootpass",
-        "db": "dejavu"
+        "database": "dejavu"
    }
-}
+}
--- a/dejavu.py
+++ b/dejavu.py
@ -24,7 +24,7 @@ def init(configpath):
        with open(configpath) as f:
            config = json.load(f)
    except IOError as err:
-        print("Cannot open configuration: %s. Exiting" % (str(err)))
+        print(("Cannot open configuration: %s. Exiting" % (str(err))))
        sys.exit(1)
    # create a Dejavu instance
@ -67,8 +67,8 @@ if __name__ == '__main__':
        if len(args.fingerprint) == 2:
            directory = args.fingerprint[0]
            extension = args.fingerprint[1]
-            print("Fingerprinting all .%s files in the %s directory"
+            print(("Fingerprinting all .%s files in the %s directory"
-                  % (extension, directory))
+                  % (extension, directory)))
            djv.fingerprint_directory(directory, ["." + extension], 4)
        elif len(args.fingerprint) == 1:
--- a/dejavu/init.py
+++ b/dejavu/init.py
@ -1,28 +1,23 @@
 from dejavu.database import get_database, Database
 import dejavu.decoder as decoder
 import fingerprint
 import multiprocessing
 import os
 import traceback
 import sys
 import traceback
 import dejavu.decoder as decoder
 from dejavu.config.config import (CONFIDENCE, DEFAULT_FS,
                                  DEFAULT_OVERLAP_RATIO, DEFAULT_WINDOW_SIZE,
                                  FIELD_FILE_SHA1, OFFSET, OFFSET_SECS,
                                  SONG_ID, SONG_NAME, TOPN)
 from dejavu.database import get_database
 from dejavu.fingerprint import fingerprint
-class Dejavu(object):
+class Dejavu:
    SONG_ID = "song_id"
    SONG_NAME = 'song_name'
    CONFIDENCE = 'confidence'
    MATCH_TIME = 'match_time'
    OFFSET = 'offset'
    OFFSET_SECS = 'offset_seconds'
    def __init__(self, config):
        super(Dejavu, self).__init__()
        self.config = config
        # initialize db
-        db_cls = get_database(config.get("database_type", None))
+        db_cls = get_database(config.get("database_type", "mysql").lower())
        self.db = db_cls(**config.get("database", {}))
        self.db.setup()
@ -39,7 +34,7 @@ class Dejavu(object):
        self.songs = self.db.get_songs()
        self.songhashes_set = set()  # to know which ones we've computed before
        for song in self.songs:
-            song_hash = song[Database.FIELD_FILE_SHA1]
+            song_hash = song[FIELD_FILE_SHA1]
            self.songhashes_set.add(song_hash)
    def fingerprint_directory(self, path, extensions, nprocesses=None):
@ -55,26 +50,23 @@ class Dejavu(object):
        filenames_to_fingerprint = []
        for filename, _ in decoder.find_files(path, extensions):
            # don't refingerprint already fingerprinted files
            if decoder.unique_hash(filename) in self.songhashes_set:
-                print "%s already fingerprinted, continuing..." % filename
+                print(f"{filename} already fingerprinted, continuing...")
                continue
            filenames_to_fingerprint.append(filename)
        # Prepare _fingerprint_worker input
-        worker_input = zip(filenames_to_fingerprint,
+        worker_input = list(zip(filenames_to_fingerprint, [self.limit] * len(filenames_to_fingerprint)))
                           [self.limit] * len(filenames_to_fingerprint))
        # Send off our tasks
-        iterator = pool.imap_unordered(_fingerprint_worker,
+        iterator = pool.imap_unordered(_fingerprint_worker, worker_input)
                                       worker_input)
        # Loop till we have all of them
        while True:
            try:
-                song_name, hashes, file_hash = iterator.next()
+                song_name, hashes, file_hash = next(iterator)
            except multiprocessing.TimeoutError:
                continue
            except StopIteration:
@ -99,7 +91,7 @@ class Dejavu(object):
        song_name = song_name or songname
        # don't refingerprint already fingerprinted files
        if song_hash in self.songhashes_set:
-            print "%s already fingerprinted, continuing..." % song_name
+            print(f"{song_name} already fingerprinted, continuing...")
        else:
            song_name, hashes, file_hash = _fingerprint_worker(
                filepath,
@ -112,22 +104,21 @@ class Dejavu(object):
            self.db.set_song_fingerprinted(sid)
            self.get_fingerprinted_songs()
-    def find_matches(self, samples, Fs=fingerprint.DEFAULT_FS):
+    def find_matches(self, samples, Fs=DEFAULT_FS):
-        hashes = fingerprint.fingerprint(samples, Fs=Fs)
+        hashes = fingerprint(samples, Fs=Fs)
        return self.db.return_matches(hashes)
-    def align_matches(self, matches):
+    def align_matches(self, matches, topn=TOPN):
        """
            Finds hash matches that align in time with other matches and finds
            consensus about which hashes are "true" signal from the audio.
-            Returns a dictionary with match information.
+            Returns a list of dictionaries (based on topn) with match information.
        """
        # align by diffs
        diff_counter = {}
        largest = 0
        largest_count = 0
-        song_id = -1
+
        for tup in matches:
            sid, diff = tup
            if diff not in diff_counter:
@ -137,30 +128,65 @@ class Dejavu(object):
            diff_counter[diff][sid] += 1
            if diff_counter[diff][sid] > largest_count:
                largest = diff
                largest_count = diff_counter[diff][sid]
                song_id = sid
-        # extract idenfication
+        # create dic where key are songs ids
-        song = self.db.get_song_by_id(song_id)
+        songs_num_matches = {}
-        if song:
+        for dc in diff_counter:
-            # TODO: Clarify what `get_song_by_id` should return.
+            for sid in diff_counter[dc]:
-            songname = song.get(Dejavu.SONG_NAME, None)
+                match_val = diff_counter[dc][sid]
-        else:
+                if (sid not in songs_num_matches) or (match_val > songs_num_matches[sid]['value']):
-            return None
+                    songs_num_matches[sid] = {
                        'sid': sid,
                        'value': match_val,
                        'largest': dc
                    }
-        # return match info
+        # use dicc of songs to create an ordered (descending) list using the match value property assigned to each song
-        nseconds = round(float(largest) / fingerprint.DEFAULT_FS *
+        songs_num_matches_list = []
-                         fingerprint.DEFAULT_WINDOW_SIZE *
+        for s in songs_num_matches:
-                         fingerprint.DEFAULT_OVERLAP_RATIO, 5)
+            songs_num_matches_list.append({
-        song = {
+                'sid': s,
-            Dejavu.SONG_ID : song_id,
+                'object': songs_num_matches[s]
-            Dejavu.SONG_NAME : songname.encode("utf8"),
+            })
-            Dejavu.CONFIDENCE : largest_count,
+
-            Dejavu.OFFSET : int(largest),
+        songs_num_matches_list_ordered = sorted(songs_num_matches_list, key=lambda x: x['object']['value'],
-            Dejavu.OFFSET_SECS : nseconds,
+                                                reverse=True)
-            Database.FIELD_FILE_SHA1 : song.get(Database.FIELD_FILE_SHA1, None).encode("utf8"),}
+
-        return song
+        # iterate the ordered list and fill results
        songs_result = []
        for s in songs_num_matches_list_ordered:
            # get expected variable by the original code
            song_id = s['object']['sid']
            largest = s['object']['largest']
            largest_count = s['object']['value']
            # extract identification
            song = self.db.get_song_by_id(song_id)
            if song:
                # TODO: Clarify what `get_song_by_id` should return.
                songname = song.get(SONG_NAME, None)
                # return match info
                nseconds = round(float(largest) / DEFAULT_FS *
                                 DEFAULT_WINDOW_SIZE *
                                 DEFAULT_OVERLAP_RATIO, 5)
                song = {
                    SONG_ID: song_id,
                    SONG_NAME: songname.encode("utf8"),
                    CONFIDENCE: largest_count,
                    OFFSET: int(largest),
                    OFFSET_SECS: nseconds,
                    FIELD_FILE_SHA1: song.get(FIELD_FILE_SHA1, None).encode("utf8")
                }
                songs_result.append(song)
                # only consider up to topn elements in the result
                if len(songs_result) > topn:
                    break
        return songs_result
    def recognize(self, recognizer, *options, **kwoptions):
        r = recognizer(self)
@ -177,26 +203,15 @@ def _fingerprint_worker(filename, limit=None, song_name=None):
    songname, extension = os.path.splitext(os.path.basename(filename))
    song_name = song_name or songname
-    channels, Fs, file_hash = decoder.read(filename, limit)
+    channels, fs, file_hash = decoder.read(filename, limit)
    result = set()
    channel_amount = len(channels)
    for channeln, channel in enumerate(channels):
        # TODO: Remove prints or change them into optional logging.
-        print("Fingerprinting channel %d/%d for %s" % (channeln + 1,
+        print(f"Fingerprinting channel {channeln + 1}/{channel_amount} for {filename}")
-                                                       channel_amount,
+        hashes = fingerprint(channel, Fs=fs)
-                                                       filename))
+        print(f"Finished channel {channeln + 1}/{channel_amount} for {filename}")
        hashes = fingerprint.fingerprint(channel, Fs=Fs)
        print("Finished channel %d/%d for %s" % (channeln + 1, channel_amount,
                                                 filename))
        result |= set(hashes)
    return song_name, result, file_hash
 def chunkify(lst, n):
    """
    Splits a list into roughly n equal parts.
    http://stackoverflow.com/questions/2130016/splitting-a-list-of-arbitrary-size-into-only-roughly-n-equal-parts
    """
    return [lst[i::n] for i in xrange(n)]
--- a/dejavu/config/config.py
+++ b/dejavu/config/config.py
@ -0,0 +1,74 @@
 # Dejavu
 SONG_ID = "song_id"
 SONG_NAME = 'song_name'
 CONFIDENCE = 'confidence'
 MATCH_TIME = 'match_time'
 OFFSET = 'offset'
 OFFSET_SECS = 'offset_seconds'
 # DATABASE CLASS INSTANCES:
 DATABASES = {
    'mysql': ("dejavu.database_handler.mysql_database", "MySQLDatabase")
 }
 # TABLE SONGS
 SONGS_TABLENAME = "songs"
 # SONGS FIELDS
 FIELD_SONG_ID = 'song_id'
 FIELD_SONGNAME = 'song_name'
 FIELD_FINGERPRINTED = "fingerprinted"
 FIELD_FILE_SHA1 = 'file_sha1'
 # TABLE FINGERPRINTS
 FINGERPRINTS_TABLENAME = "fingerprints"
 # FINGERPRINTS FIELDS
 FIELD_HASH = 'hash'
 FIELD_OFFSET = 'offset'
 # FINGERPRINTS CONFIG:
 # Sampling rate, related to the Nyquist conditions, which affects
 # the range frequencies we can detect.
 DEFAULT_FS = 44100
 # Size of the FFT window, affects frequency granularity
 DEFAULT_WINDOW_SIZE = 4096
 # Ratio by which each sequential window overlaps the last and the
 # next window. Higher overlap will allow a higher granularity of offset
 # matching, but potentially more fingerprints.
 DEFAULT_OVERLAP_RATIO = 0.5
 # Degree to which a fingerprint can be paired with its neighbors --
 # higher will cause more fingerprints, but potentially better accuracy.
 DEFAULT_FAN_VALUE = 15
 # Minimum amplitude in spectrogram in order to be considered a peak.
 # This can be raised to reduce number of fingerprints, but can negatively
 # affect accuracy.
 DEFAULT_AMP_MIN = 10
 # Number of cells around an amplitude peak in the spectrogram in order
 # for Dejavu to consider it a spectral peak. Higher values mean less
 # fingerprints and faster matching, but can potentially affect accuracy.
 PEAK_NEIGHBORHOOD_SIZE = 20
 # Thresholds on how close or far fingerprints can be in time in order
 # to be paired as a fingerprint. If your max is too low, higher values of
 # DEFAULT_FAN_VALUE may not perform as expected.
 MIN_HASH_TIME_DELTA = 0
 MAX_HASH_TIME_DELTA = 200
 # If True, will sort peaks temporally for fingerprinting;
 # not sorting will cut down number of fingerprints, but potentially
 # affect performance.
 PEAK_SORT = True
 # Number of bits to grab from the front of the SHA1 hash in the
 # fingerprint calculation. The more you grab, the more memory storage,
 # with potentially lesser collisions of matches.
 FINGERPRINT_REDUCTION = 20
 # Number of results being returned for file recognition
 TOPN = 2
--- a/dejavu/database.py
+++ b/dejavu/database.py
@ -1,22 +1,15 @@
 from __future__ import absolute_import
 import abc
 import importlib
 from dejavu.config.config import DATABASES
-class Database(object):
+class Database(object, metaclass=abc.ABCMeta):
    __metaclass__ = abc.ABCMeta
    FIELD_FILE_SHA1 = 'file_sha1'
    FIELD_SONG_ID = 'song_id'
    FIELD_SONGNAME = 'song_name'
    FIELD_OFFSET = 'offset'
    FIELD_HASH = 'hash'
    # Name of your Database subclass, this is used in configuration
    # to refer to your class
    type = None
    def __init__(self):
-        super(Database, self).__init__()
+        super().__init__()
    def before_fork(self):
        """
@ -159,18 +152,11 @@ class Database(object):
        pass
-def get_database(database_type=None):
+def get_database(database_type="mysql"):
-    # Default to using the mysql database
+    path, db_class_name = DATABASES[database_type]
-    database_type = database_type or "mysql"
+    try:
-    # Lower all the input.
+        db_module = importlib.import_module(path)
-    database_type = database_type.lower()
+        db_class = getattr(db_module, db_class_name)
-
+        return db_class
-    for db_cls in Database.__subclasses__():
+    except ImportError:
-        if db_cls.type == database_type:
+        raise TypeError("Unsupported database type supplied.")
            return db_cls
    raise TypeError("Unsupported database type supplied.")
 # Import our default database handler
 import dejavu.database_sql
--- a/dejavu/database_handler/init.py
+++ b/dejavu/database_handler/init.py
--- a/dejavu/database_handler/mysql_database.py
+++ b/dejavu/database_handler/mysql_database.py
@ -0,0 +1,235 @@
 import queue
 import mysql.connector
 from mysql.connector.errors import DatabaseError
 import dejavu.database_handler.mysql_queries as queries
 from dejavu.database import Database
 class MySQLDatabase(Database):
    type = "mysql"
    def __init__(self, **options):
        super().__init__()
        self.cursor = cursor_factory(**options)
        self._options = options
    def after_fork(self):
        # Clear the cursor cache, we don't want any stale connections from
        # the previous process.
        Cursor.clear_cache()
    def setup(self):
        """
        Creates any non-existing tables required for dejavu to function.
        This also removes all songs that have been added but have no
        fingerprints associated with them.
        """
        with self.cursor() as cur:
            cur.execute(queries.CREATE_SONGS_TABLE)
            cur.execute(queries.CREATE_FINGERPRINTS_TABLE)
            cur.execute(queries.DELETE_UNFINGERPRINTED)
    def empty(self):
        """
        Drops tables created by dejavu and then creates them again
        by calling `SQLDatabase.setup`.
        .. warning:
            This will result in a loss of data
        """
        with self.cursor() as cur:
            cur.execute(queries.DROP_FINGERPRINTS)
            cur.execute(queries.DROP_SONGS)
        self.setup()
    def delete_unfingerprinted_songs(self):
        """
        Removes all songs that have no fingerprints associated with them.
        """
        with self.cursor() as cur:
            cur.execute(queries.DELETE_UNFINGERPRINTED)
    def get_num_songs(self):
        """
        Returns number of songs the database has fingerprinted.
        """
        with self.cursor() as cur:
            cur.execute(queries.SELECT_UNIQUE_SONG_IDS)
            count = cur.fetchone()[0] if cur.rowcount != 0 else 0
        return count
    def get_num_fingerprints(self):
        """
        Returns number of fingerprints the database has fingerprinted.
        """
        with self.cursor() as cur:
            cur.execute(queries.SELECT_NUM_FINGERPRINTS)
            count = cur.fetchone()[0] if cur.rowcount != 0 else 0
            cur.close()
        return count
    def set_song_fingerprinted(self, sid):
        """
        Set the fingerprinted flag to TRUE (1) once a song has been completely
        fingerprinted in the database.
        """
        with self.cursor() as cur:
            cur.execute(queries.UPDATE_SONG_FINGERPRINTED, (sid,))
    def get_songs(self):
        """
        Return songs that have the fingerprinted flag set TRUE (1).
        """
        with self.cursor(dictionary=True) as cur:
            cur.execute(queries.SELECT_SONGS)
            for row in cur:
                yield row
    def get_song_by_id(self, sid):
        """
        Returns song by its ID.
        """
        with self.cursor(dictionary=True) as cur:
            cur.execute(queries.SELECT_SONG, (sid,))
            return cur.fetchone()
    def insert(self, hash, sid, offset):
        """
        Insert a (sha1, song_id, offset) row into database.
        """
        with self.cursor() as cur:
            cur.execute(queries.INSERT_FINGERPRINT, (hash, sid, offset))
    def insert_song(self, song_name, file_hash):
        """
        Inserts song in the database and returns the ID of the inserted record.
        """
        with self.cursor() as cur:
            cur.execute(queries.INSERT_SONG, (song_name, file_hash))
            return cur.lastrowid
    def query(self, hash):
        """
        Return all tuples associated with hash.
        If hash is None, returns all entries in the
        database (be careful with that one!).
        """
        if hash:
            with self.cursor() as cur:
                cur.execute(queries.SELECT, (hash,))
                for sid, offset in cur:
                    yield (sid, offset)
        else:  # select all if no key
            with self.cursor() as cur:
                cur.execute(queries.SELECT_ALL)
                for sid, offset in cur:
                    yield (sid, offset)
    def get_iterable_kv_pairs(self):
        """
        Returns all tuples in database.
        """
        return self.query(None)
    def insert_hashes(self, sid, hashes, batch=1000):
        """
        Insert series of hash => song_id, offset
        values into the database.
        """
        values = [(sid, hash, int(offset)) for hash, offset in hashes]
        with self.cursor() as cur:
            for index in range(0, len(hashes), batch):
                cur.executemany(queries.INSERT_FINGERPRINT, values[index: index + batch])
    def return_matches(self, hashes, batch=1000):
        """
        Return the (song_id, offset_diff) tuples associated with
        a list of (sha1, sample_offset) values.
        """
        # Create a dictionary of hash => offset pairs for later lookups
        mapper = {}
        for hash, offset in hashes:
            mapper[hash.upper()] = offset
        # Get an iterable of all the hashes we need
        values = list(mapper.keys())
        with self.cursor() as cur:
            for index in range(0, len(values), batch):
                # Create our IN part of the query
                query = queries.SELECT_MULTIPLE
                query = query % ', '.join(['UNHEX(%s)'] * len(values[index: index + batch]))
                cur.execute(query, values[index: index + batch])
                for hash, sid, offset in cur:
                    # (sid, db_offset - song_sampled_offset)
                    yield (sid, offset - mapper[hash])
    def __getstate__(self):
        return self._options,
    def __setstate__(self, state):
        self._options, = state
        self.cursor = cursor_factory(**self._options)
 def cursor_factory(**factory_options):
    def cursor(**options):
        options.update(factory_options)
        return Cursor(**options)
    return cursor
 class Cursor(object):
    """
    Establishes a connection to the database and returns an open cursor.
    # Use as context manager
    with Cursor() as cur:
        cur.execute(query)
        ...
    """
    def __init__(self, dictionary=False, **options):
        super().__init__()
        self._cache = queue.Queue(maxsize=5)
        try:
            conn = self._cache.get_nowait()
            # Ping the connection before using it from the cache.
            conn.ping(True)
        except queue.Empty:
            conn = mysql.connector.connect(**options)
        self.conn = conn
        self.dictionary = dictionary
    @classmethod
    def clear_cache(cls):
        cls._cache = queue.Queue(maxsize=5)
    def __enter__(self):
        self.cursor = self.conn.cursor(dictionary=self.dictionary)
        return self.cursor
    def __exit__(self, extype, exvalue, traceback):
        # if we had a MySQL related error we try to rollback the cursor.
        if extype is DatabaseError:
            self.cursor.rollback()
        self.cursor.close()
        self.conn.commit()
        # Put it back on the queue
        try:
            self._cache.put_nowait(self.conn)
        except queue.Full:
            self.conn.close()
--- a/dejavu/database_handler/mysql_queries.py
+++ b/dejavu/database_handler/mysql_queries.py
@ -0,0 +1,126 @@
 from dejavu.config.config import (FIELD_FILE_SHA1, FIELD_FINGERPRINTED,
                                  FIELD_HASH, FIELD_OFFSET, FIELD_SONG_ID,
                                  FIELD_SONGNAME, FINGERPRINTS_TABLENAME,
                                  SONGS_TABLENAME)
 """
 Queries:
 1) Find duplicates (shouldn't be any, though):
    select `hash`, `song_id`, `offset`, count(*) cnt
    from fingerprints
    group by `hash`, `song_id`, `offset`
    having cnt > 1
    order by cnt asc;
 2) Get number of hashes by song:
    select song_id, song_name, count(song_id) as num
    from fingerprints
    natural join songs
    group by song_id
    order by count(song_id) desc;
 3) get hashes with highest number of collisions
    select
        hash,
        count(distinct song_id) as n
    from fingerprints
    group by `hash`
    order by n DESC;
 => 26 different songs with same fingerprint (392 times):
    select songs.song_name, fingerprints.offset
    from fingerprints natural join songs
    where fingerprints.hash = "08d3c833b71c60a7b620322ac0c0aba7bf5a3e73";
 """
 # creates
 CREATE_SONGS_TABLE = f"""
    CREATE TABLE IF NOT EXISTS `{SONGS_TABLENAME}` (
        `{FIELD_SONG_ID}` mediumint unsigned not null auto_increment,
        `{FIELD_SONGNAME}` varchar(250) not null,
        `{FIELD_FINGERPRINTED}` tinyint default 0,
        `{FIELD_FILE_SHA1}` binary(20) not null,
    PRIMARY KEY (`{FIELD_SONG_ID}`),
    UNIQUE KEY `{FIELD_SONG_ID}` (`{FIELD_SONG_ID}`)
 ) ENGINE=INNODB;"""
 CREATE_FINGERPRINTS_TABLE = f"""
    CREATE TABLE IF NOT EXISTS `{FINGERPRINTS_TABLENAME}` (
         `{FIELD_HASH}` binary(10) not null,
         `{FIELD_SONG_ID}` mediumint unsigned not null,
         `{FIELD_OFFSET}` int unsigned not null,
     INDEX ({FIELD_HASH}),
     UNIQUE KEY `unique_constraint` ({FIELD_SONG_ID}, {FIELD_OFFSET}, {FIELD_HASH}),
     FOREIGN KEY ({FIELD_SONG_ID}) REFERENCES {SONGS_TABLENAME}({FIELD_SONG_ID}) ON DELETE CASCADE
 ) ENGINE=INNODB;"""
 # inserts (ignores duplicates)
 INSERT_FINGERPRINT = f"""
    INSERT IGNORE INTO `{FINGERPRINTS_TABLENAME}` (
            `{FIELD_SONG_ID}`
        ,   `{FIELD_HASH}`
        , `{FIELD_OFFSET}`) 
    VALUES (%s, UNHEX(%s), %s);
 """
 INSERT_SONG = f"""
    INSERT INTO `{SONGS_TABLENAME}` (`{FIELD_SONGNAME}`,`{FIELD_FILE_SHA1}`) 
    VALUES (%s, UNHEX(%s));
 """
 # selects
 SELECT = f"""
    SELECT `{FIELD_SONG_ID}`, `{FIELD_OFFSET}` 
    FROM `{FINGERPRINTS_TABLENAME}` 
    WHERE `{FIELD_HASH}` = UNHEX(%s);
 """
 SELECT_MULTIPLE = f"""
    SELECT HEX(`{FIELD_HASH}`), `{FIELD_SONG_ID}`, `{FIELD_OFFSET}` 
    FROM `{FINGERPRINTS_TABLENAME}` 
    WHERE `{FIELD_HASH}` IN (%s);
 """
 SELECT_ALL = f"SELECT `{FIELD_SONG_ID}`, `{FIELD_OFFSET}` FROM `{FINGERPRINTS_TABLENAME}`;"
 SELECT_SONG = f"""
    SELECT `{FIELD_SONGNAME}`, HEX(`{FIELD_FILE_SHA1}`) AS `{FIELD_FILE_SHA1}` 
    FROM `{SONGS_TABLENAME}` 
    WHERE `{FIELD_SONG_ID}` = %s;
 """
 SELECT_NUM_FINGERPRINTS = f"SELECT COUNT(*) AS n FROM `{FINGERPRINTS_TABLENAME}`;"
 SELECT_UNIQUE_SONG_IDS = f"""
    SELECT COUNT(`{FIELD_SONG_ID}`) AS n 
    FROM `{SONGS_TABLENAME}` 
    WHERE `{FIELD_FINGERPRINTED}` = 1;
 """
 SELECT_SONGS = f"""
    SELECT 
        `{FIELD_SONG_ID}`
    ,   `{FIELD_SONGNAME}`
    ,   HEX(`{FIELD_FILE_SHA1}`) AS `{FIELD_FILE_SHA1}` 
    FROM `{SONGS_TABLENAME}` 
    WHERE `{FIELD_FINGERPRINTED}` = 1;
 """
 # drops
 DROP_FINGERPRINTS = f"DROP TABLE IF EXISTS `{FINGERPRINTS_TABLENAME}`;"
 DROP_SONGS = f"DROP TABLE IF EXISTS `{SONGS_TABLENAME}`;"
 # update
 UPDATE_SONG_FINGERPRINTED = f"""
    UPDATE `{SONGS_TABLENAME}` SET `{FIELD_FINGERPRINTED}` = 1 WHERE `{FIELD_SONG_ID}` = %s;
 """
 # delete
 DELETE_UNFINGERPRINTED = f"""
    DELETE FROM `{SONGS_TABLENAME}` WHERE `{FIELD_FINGERPRINTED}` = 0;
 """
--- a/dejavu/database_sql.py
+++ b/dejavu/database_sql.py
@ -1,373 +0,0 @@
 from __future__ import absolute_import
 from itertools import izip_longest
 import Queue
 import MySQLdb as mysql
 from MySQLdb.cursors import DictCursor
 from dejavu.database import Database
 class SQLDatabase(Database):
    """
    Queries:
    1) Find duplicates (shouldn't be any, though):
        select `hash`, `song_id`, `offset`, count(*) cnt
        from fingerprints
        group by `hash`, `song_id`, `offset`
        having cnt > 1
        order by cnt asc;
    2) Get number of hashes by song:
        select song_id, song_name, count(song_id) as num
        from fingerprints
        natural join songs
        group by song_id
        order by count(song_id) desc;
    3) get hashes with highest number of collisions
        select
            hash,
            count(distinct song_id) as n
        from fingerprints
        group by `hash`
        order by n DESC;
    => 26 different songs with same fingerprint (392 times):
        select songs.song_name, fingerprints.offset
        from fingerprints natural join songs
        where fingerprints.hash = "08d3c833b71c60a7b620322ac0c0aba7bf5a3e73";
    """
    type = "mysql"
    # tables
    FINGERPRINTS_TABLENAME = "fingerprints"
    SONGS_TABLENAME = "songs"
    # fields
    FIELD_FINGERPRINTED = "fingerprinted"
    # creates
    CREATE_FINGERPRINTS_TABLE = """
        CREATE TABLE IF NOT EXISTS `%s` (
             `%s` binary(10) not null,
             `%s` mediumint unsigned not null,
             `%s` int unsigned not null,
         INDEX (%s),
         UNIQUE KEY `unique_constraint` (%s, %s, %s),
         FOREIGN KEY (%s) REFERENCES %s(%s) ON DELETE CASCADE
    ) ENGINE=INNODB;""" % (
        FINGERPRINTS_TABLENAME, Database.FIELD_HASH,
        Database.FIELD_SONG_ID, Database.FIELD_OFFSET, Database.FIELD_HASH,
        Database.FIELD_SONG_ID, Database.FIELD_OFFSET, Database.FIELD_HASH,
        Database.FIELD_SONG_ID, SONGS_TABLENAME, Database.FIELD_SONG_ID
    )
    CREATE_SONGS_TABLE = """
        CREATE TABLE IF NOT EXISTS `%s` (
            `%s` mediumint unsigned not null auto_increment,
            `%s` varchar(250) not null,
            `%s` tinyint default 0,
            `%s` binary(20) not null,
        PRIMARY KEY (`%s`),
        UNIQUE KEY `%s` (`%s`)
    ) ENGINE=INNODB;""" % (
        SONGS_TABLENAME, Database.FIELD_SONG_ID, Database.FIELD_SONGNAME, FIELD_FINGERPRINTED,
        Database.FIELD_FILE_SHA1,
        Database.FIELD_SONG_ID, Database.FIELD_SONG_ID, Database.FIELD_SONG_ID,
    )
    # inserts (ignores duplicates)
    INSERT_FINGERPRINT = """
        INSERT IGNORE INTO %s (%s, %s, %s) values
            (UNHEX(%%s), %%s, %%s);
    """ % (FINGERPRINTS_TABLENAME, Database.FIELD_HASH, Database.FIELD_SONG_ID, Database.FIELD_OFFSET)
    INSERT_SONG = "INSERT INTO %s (%s, %s) values (%%s, UNHEX(%%s));" % (
        SONGS_TABLENAME, Database.FIELD_SONGNAME, Database.FIELD_FILE_SHA1)
    # selects
    SELECT = """
        SELECT %s, %s FROM %s WHERE %s = UNHEX(%%s);
    """ % (Database.FIELD_SONG_ID, Database.FIELD_OFFSET, FINGERPRINTS_TABLENAME, Database.FIELD_HASH)
    SELECT_MULTIPLE = """
        SELECT HEX(%s), %s, %s FROM %s WHERE %s IN (%%s);
    """ % (Database.FIELD_HASH, Database.FIELD_SONG_ID, Database.FIELD_OFFSET,
           FINGERPRINTS_TABLENAME, Database.FIELD_HASH)
    SELECT_ALL = """
        SELECT %s, %s FROM %s;
    """ % (Database.FIELD_SONG_ID, Database.FIELD_OFFSET, FINGERPRINTS_TABLENAME)
    SELECT_SONG = """
        SELECT %s, HEX(%s) as %s FROM %s WHERE %s = %%s;
    """ % (Database.FIELD_SONGNAME, Database.FIELD_FILE_SHA1, Database.FIELD_FILE_SHA1, SONGS_TABLENAME, Database.FIELD_SONG_ID)
    SELECT_NUM_FINGERPRINTS = """
        SELECT COUNT(*) as n FROM %s
    """ % (FINGERPRINTS_TABLENAME)
    SELECT_UNIQUE_SONG_IDS = """
        SELECT COUNT(DISTINCT %s) as n FROM %s WHERE %s = 1;
    """ % (Database.FIELD_SONG_ID, SONGS_TABLENAME, FIELD_FINGERPRINTED)
    SELECT_SONGS = """
        SELECT %s, %s, HEX(%s) as %s FROM %s WHERE %s = 1;
    """ % (Database.FIELD_SONG_ID, Database.FIELD_SONGNAME, Database.FIELD_FILE_SHA1, Database.FIELD_FILE_SHA1,
           SONGS_TABLENAME, FIELD_FINGERPRINTED)
    # drops
    DROP_FINGERPRINTS = "DROP TABLE IF EXISTS %s;" % FINGERPRINTS_TABLENAME
    DROP_SONGS = "DROP TABLE IF EXISTS %s;" % SONGS_TABLENAME
    # update
    UPDATE_SONG_FINGERPRINTED = """
        UPDATE %s SET %s = 1 WHERE %s = %%s
    """ % (SONGS_TABLENAME, FIELD_FINGERPRINTED, Database.FIELD_SONG_ID)
    # delete
    DELETE_UNFINGERPRINTED = """
        DELETE FROM %s WHERE %s = 0;
    """ % (SONGS_TABLENAME, FIELD_FINGERPRINTED)
    def __init__(self, **options):
        super(SQLDatabase, self).__init__()
        self.cursor = cursor_factory(**options)
        self._options = options
    def after_fork(self):
        # Clear the cursor cache, we don't want any stale connections from
        # the previous process.
        Cursor.clear_cache()
    def setup(self):
        """
        Creates any non-existing tables required for dejavu to function.
        This also removes all songs that have been added but have no
        fingerprints associated with them.
        """
        with self.cursor(charset="utf8") as cur:
            cur.execute(self.CREATE_SONGS_TABLE)
            cur.execute(self.CREATE_FINGERPRINTS_TABLE)
            cur.execute(self.DELETE_UNFINGERPRINTED)
    def empty(self):
        """
        Drops tables created by dejavu and then creates them again
        by calling `SQLDatabase.setup`.
        .. warning:
            This will result in a loss of data
        """
        with self.cursor(charset="utf8") as cur:
            cur.execute(self.DROP_FINGERPRINTS)
            cur.execute(self.DROP_SONGS)
        self.setup()
    def delete_unfingerprinted_songs(self):
        """
        Removes all songs that have no fingerprints associated with them.
        """
        with self.cursor(charset="utf8") as cur:
            cur.execute(self.DELETE_UNFINGERPRINTED)
    def get_num_songs(self):
        """
        Returns number of songs the database has fingerprinted.
        """
        with self.cursor(charset="utf8") as cur:
            cur.execute(self.SELECT_UNIQUE_SONG_IDS)
            for count, in cur:
                return count
            return 0
    def get_num_fingerprints(self):
        """
        Returns number of fingerprints the database has fingerprinted.
        """
        with self.cursor(charset="utf8") as cur:
            cur.execute(self.SELECT_NUM_FINGERPRINTS)
            for count, in cur:
                return count
            return 0
    def set_song_fingerprinted(self, sid):
        """
        Set the fingerprinted flag to TRUE (1) once a song has been completely
        fingerprinted in the database.
        """
        with self.cursor(charset="utf8") as cur:
            cur.execute(self.UPDATE_SONG_FINGERPRINTED, (sid,))
    def get_songs(self):
        """
        Return songs that have the fingerprinted flag set TRUE (1).
        """
        with self.cursor(cursor_type=DictCursor, charset="utf8") as cur:
            cur.execute(self.SELECT_SONGS)
            for row in cur:
                yield row
    def get_song_by_id(self, sid):
        """
        Returns song by its ID.
        """
        with self.cursor(cursor_type=DictCursor, charset="utf8") as cur:
            cur.execute(self.SELECT_SONG, (sid,))
            return cur.fetchone()
    def insert(self, hash, sid, offset):
        """
        Insert a (sha1, song_id, offset) row into database.
        """
        with self.cursor(charset="utf8") as cur:
            cur.execute(self.INSERT_FINGERPRINT, (hash, sid, offset))
    def insert_song(self, songname, file_hash):
        """
        Inserts song in the database and returns the ID of the inserted record.
        """
        with self.cursor(charset="utf8") as cur:
            cur.execute(self.INSERT_SONG, (songname, file_hash))
            return cur.lastrowid
    def query(self, hash):
        """
        Return all tuples associated with hash.
        If hash is None, returns all entries in the
        database (be careful with that one!).
        """
        # select all if no key
        query = self.SELECT_ALL if hash is None else self.SELECT
        with self.cursor(charset="utf8") as cur:
            cur.execute(query)
            for sid, offset in cur:
                yield (sid, offset)
    def get_iterable_kv_pairs(self):
        """
        Returns all tuples in database.
        """
        return self.query(None)
    def insert_hashes(self, sid, hashes):
        """
        Insert series of hash => song_id, offset
        values into the database.
        """
        values = []
        for hash, offset in hashes:
            values.append((hash, sid, offset))
        with self.cursor(charset="utf8") as cur:
            for split_values in grouper(values, 1000):
                cur.executemany(self.INSERT_FINGERPRINT, split_values)
    def return_matches(self, hashes):
        """
        Return the (song_id, offset_diff) tuples associated with
        a list of (sha1, sample_offset) values.
        """
        # Create a dictionary of hash => offset pairs for later lookups
        mapper = {}
        for hash, offset in hashes:
            mapper[hash.upper()] = offset
        # Get an iteratable of all the hashes we need
        values = mapper.keys()
        with self.cursor(charset="utf8") as cur:
            for split_values in grouper(values, 1000):
                # Create our IN part of the query
                query = self.SELECT_MULTIPLE
                query = query % ', '.join(['UNHEX(%s)'] * len(split_values))
                cur.execute(query, split_values)
                for hash, sid, offset in cur:
                    # (sid, db_offset - song_sampled_offset)
                    yield (sid, offset - mapper[hash])
    def __getstate__(self):
        return (self._options,)
    def __setstate__(self, state):
        self._options, = state
        self.cursor = cursor_factory(**self._options)
 def grouper(iterable, n, fillvalue=None):
    args = [iter(iterable)] * n
    return (filter(None, values) for values
            in izip_longest(fillvalue=fillvalue, *args))
 def cursor_factory(**factory_options):
    def cursor(**options):
        options.update(factory_options)
        return Cursor(**options)
    return cursor
 class Cursor(object):
    """
    Establishes a connection to the database and returns an open cursor.
    ```python
    # Use as context manager
    with Cursor() as cur:
        cur.execute(query)
    ```
    """
    def __init__(self, cursor_type=mysql.cursors.Cursor, **options):
        super(Cursor, self).__init__()
        self._cache = Queue.Queue(maxsize=5)
        try:
            conn = self._cache.get_nowait()
        except Queue.Empty:
            conn = mysql.connect(**options)
        else:
            # Ping the connection before using it from the cache.
            conn.ping(True)
        self.conn = conn
        self.conn.autocommit(False)
        self.cursor_type = cursor_type
    @classmethod
    def clear_cache(cls):
        cls._cache = Queue.Queue(maxsize=5)
    def __enter__(self):
        self.cursor = self.conn.cursor(self.cursor_type)
        return self.cursor
    def __exit__(self, extype, exvalue, traceback):
        # if we had a MySQL related error we try to rollback the cursor.
        if extype is mysql.MySQLError:
            self.cursor.rollback()
        self.cursor.close()
        self.conn.commit()
        # Put it back on the queue
        try:
            self._cache.put_nowait(self.conn)
        except Queue.Full:
            self.conn.close()
--- a/dejavu/decoder.py
+++ b/dejavu/decoder.py
@ -3,9 +3,10 @@ import fnmatch
 import numpy as np
 from pydub import AudioSegment
 from pydub.utils import audioop
-import wavio
+from . import wavio
 from hashlib import sha1
 def unique_hash(filepath, blocksize=2**20):
    """ Small function to generate a hash to uniquely generate
    a file. Inspired by MD5 version here:
@ -14,7 +15,7 @@ def unique_hash(filepath, blocksize=2**20):
    Works with large files. 
    """
    s = sha1()
-    with open(filepath , "rb") as f:
+    with open(filepath, "rb") as f:
        while True:
            buf = f.read(blocksize)
            if not buf:
@ -29,7 +30,7 @@ def find_files(path, extensions):
    for dirpath, dirnames, files in os.walk(path):
        for extension in extensions:
-            for f in fnmatch.filter(files, "*.%s" % extension):
+            for f in fnmatch.filter(files, f"*.{extension}"):
                p = os.path.join(dirpath, f)
                yield (p, extension)
@ -53,15 +54,15 @@ def read(filename, limit=None):
        if limit:
            audiofile = audiofile[:limit * 1000]
-        data = np.fromstring(audiofile._data, np.int16)
+        data = np.fromstring(audiofile.raw_data, np.int16)
        channels = []
-        for chn in xrange(audiofile.channels):
+        for chn in range(audiofile.channels):
            channels.append(data[chn::audiofile.channels])
-        fs = audiofile.frame_rate
+        audiofile.frame_rate
    except audioop.error:
-        fs, _, audiofile = wavio.readwav(filename)
+        _, _, audiofile = wavio.readwav(filename)
        if limit:
            audiofile = audiofile[:limit * 1000]
--- a/dejavu/fingerprint.py
+++ b/dejavu/fingerprint.py
@ -1,74 +1,32 @@
 import numpy as np
 import matplotlib.mlab as mlab
 import matplotlib.pyplot as plt
 from scipy.ndimage.filters import maximum_filter
 from scipy.ndimage.morphology import (generate_binary_structure,
                                      iterate_structure, binary_erosion)
 import hashlib
 from operator import itemgetter
 import matplotlib.mlab as mlab
 import matplotlib.pyplot as plt
 import numpy as np
 from scipy.ndimage.filters import maximum_filter
 from scipy.ndimage.morphology import (binary_erosion,
                                      generate_binary_structure,
                                      iterate_structure)
 from dejavu.config.config import (DEFAULT_AMP_MIN, DEFAULT_FAN_VALUE,
                                  DEFAULT_FS, DEFAULT_OVERLAP_RATIO,
                                  DEFAULT_WINDOW_SIZE, FINGERPRINT_REDUCTION,
                                  MAX_HASH_TIME_DELTA, MIN_HASH_TIME_DELTA,
                                  PEAK_NEIGHBORHOOD_SIZE, PEAK_SORT)
 IDX_FREQ_I = 0
 IDX_TIME_J = 1
 ######################################################################
 # Sampling rate, related to the Nyquist conditions, which affects
 # the range frequencies we can detect.
 DEFAULT_FS = 44100
-######################################################################
+def fingerprint(channel_samples,
-# Size of the FFT window, affects frequency granularity
+                Fs=DEFAULT_FS,
 DEFAULT_WINDOW_SIZE = 4096
 ######################################################################
 # Ratio by which each sequential window overlaps the last and the
 # next window. Higher overlap will allow a higher granularity of offset
 # matching, but potentially more fingerprints.
 DEFAULT_OVERLAP_RATIO = 0.5
 ######################################################################
 # Degree to which a fingerprint can be paired with its neighbors --
 # higher will cause more fingerprints, but potentially better accuracy.
 DEFAULT_FAN_VALUE = 15
 ######################################################################
 # Minimum amplitude in spectrogram in order to be considered a peak.
 # This can be raised to reduce number of fingerprints, but can negatively
 # affect accuracy.
 DEFAULT_AMP_MIN = 10
 ######################################################################
 # Number of cells around an amplitude peak in the spectrogram in order
 # for Dejavu to consider it a spectral peak. Higher values mean less
 # fingerprints and faster matching, but can potentially affect accuracy.
 PEAK_NEIGHBORHOOD_SIZE = 20
 ######################################################################
 # Thresholds on how close or far fingerprints can be in time in order
 # to be paired as a fingerprint. If your max is too low, higher values of
 # DEFAULT_FAN_VALUE may not perform as expected.
 MIN_HASH_TIME_DELTA = 0
 MAX_HASH_TIME_DELTA = 200
 ######################################################################
 # If True, will sort peaks temporally for fingerprinting;
 # not sorting will cut down number of fingerprints, but potentially
 # affect performance.
 PEAK_SORT = True
 ######################################################################
 # Number of bits to grab from the front of the SHA1 hash in the
 # fingerprint calculation. The more you grab, the more memory storage, 
 # with potentially lesser collisions of matches.
 FINGERPRINT_REDUCTION = 20
 def fingerprint(channel_samples, Fs=DEFAULT_FS,
                wsize=DEFAULT_WINDOW_SIZE,
                wratio=DEFAULT_OVERLAP_RATIO,
                fan_value=DEFAULT_FAN_VALUE,
                amp_min=DEFAULT_AMP_MIN):
    """
-    FFT the channel, log transform output, find local maxima, then return
+    FFT the channel, log transform output, find local maxima, then return locally sensitive hashes.
    locally sensitive hashes.
    """
    # FFT the signal and extract frequency components
    arr2D = mlab.specgram(
@ -78,11 +36,9 @@ def fingerprint(channel_samples, Fs=DEFAULT_FS,
        window=mlab.window_hanning,
        noverlap=int(wsize * wratio))[0]
-    # apply log transform since specgram() returns linear array
+    # Apply log transform since specgram() returns linear array. 0s are excluded to avoid np warning.
-    arr2D = 10 * np.log10(arr2D)
+    arr2D = 10 * np.log10(arr2D, out=np.zeros_like(arr2D), where=(arr2D != 0))
    arr2D[arr2D == -np.inf] = 0  # replace infs with zeros
    # find local maxima
    local_maxima = get_2D_peaks(arr2D, plot=False, amp_min=amp_min)
    # return hashes
@ -97,39 +53,35 @@ def get_2D_peaks(arr2D, plot=False, amp_min=DEFAULT_AMP_MIN):
    # find local maxima using our filter shape
    local_max = maximum_filter(arr2D, footprint=neighborhood) == arr2D
    background = (arr2D == 0)
-    eroded_background = binary_erosion(background, structure=neighborhood,
+    eroded_background = binary_erosion(background, structure=neighborhood, border_value=1)
                                       border_value=1)
    # Boolean mask of arr2D with True at peaks (Fixed deprecated boolean operator by changing '-' to '^')
    detected_peaks = local_max ^ eroded_background
    # extract peaks
    amps = arr2D[detected_peaks]
-    j, i = np.where(detected_peaks)
+    freqs, times = np.where(detected_peaks)
    # filter peaks
    amps = amps.flatten()
    peaks = zip(i, j, amps)
    peaks_filtered = filter(lambda x: x[2]>amp_min, peaks) # freq, time, amp
    # get indices for frequency and time
-    frequency_idx = []
+    filter_idxs = np.where(amps > amp_min)
-    time_idx = []
+
-    for x in peaks_filtered:
+    freqs_filter = freqs[filter_idxs]
-        frequency_idx.append(x[1])
+    times_filter = times[filter_idxs]
-        time_idx.append(x[0])
+
    if plot:
        # scatter of the peaks
        fig, ax = plt.subplots()
        ax.imshow(arr2D)
-        ax.scatter(time_idx, frequency_idx)
+        ax.scatter(times_filter, freqs_filter)
        ax.set_xlabel('Time')
        ax.set_ylabel('Frequency')
        ax.set_title("Spectrogram")
        plt.gca().invert_yaxis()
        plt.show()
-    return zip(frequency_idx, time_idx)
+    return list(zip(freqs_filter, times_filter))
 def generate_hashes(peaks, fan_value=DEFAULT_FAN_VALUE):
@ -151,7 +103,6 @@ def generate_hashes(peaks, fan_value=DEFAULT_FAN_VALUE):
                t2 = peaks[i + j][IDX_TIME_J]
                t_delta = t2 - t1
-                if t_delta >= MIN_HASH_TIME_DELTA and t_delta <= MAX_HASH_TIME_DELTA:
+                if MIN_HASH_TIME_DELTA <= t_delta <= MAX_HASH_TIME_DELTA:
-                    h = hashlib.sha1(
+                    h = hashlib.sha1(f"{str(freq1)}|{str(freq2)}|{str(t_delta)}".encode('utf-8'))
                        "%s|%s|%s" % (str(freq1), str(freq2), str(t_delta)))
                    yield (h.hexdigest()[0:FINGERPRINT_REDUCTION], t1)
--- a/dejavu/recognize.py
+++ b/dejavu/recognize.py
@ -1,16 +1,16 @@
-# encoding: utf-8
+import time
-import dejavu.fingerprint as fingerprint
+
 import dejavu.decoder as decoder
 import numpy as np
 import pyaudio
-import time
+
 import dejavu.decoder as decoder
 from dejavu.config.config import DEFAULT_FS
 class BaseRecognizer(object):
    def __init__(self, dejavu):
        self.dejavu = dejavu
-        self.Fs = fingerprint.DEFAULT_FS
+        self.Fs = DEFAULT_FS
    def _recognize(self, *data):
        matches = []
@ -24,32 +24,32 @@ class BaseRecognizer(object):
 class FileRecognizer(BaseRecognizer):
    def __init__(self, dejavu):
-        super(FileRecognizer, self).__init__(dejavu)
+        super().__init__(dejavu)
    def recognize_file(self, filename):
        frames, self.Fs, file_hash = decoder.read(filename, self.dejavu.limit)
        t = time.time()
-        match = self._recognize(*frames)
+        matches = self._recognize(*frames)
        t = time.time() - t
-        if match:
+        for match in matches:
            match['match_time'] = t
-        return match
+        return matches
    def recognize(self, filename):
        return self.recognize_file(filename)
 class MicrophoneRecognizer(BaseRecognizer):
-    default_chunksize   = 8192
+    default_chunksize = 8192
-    default_format      = pyaudio.paInt16
+    default_format = pyaudio.paInt16
-    default_channels    = 2
+    default_channels = 2
-    default_samplerate  = 44100
+    default_samplerate = 44100
    def __init__(self, dejavu):
-        super(MicrophoneRecognizer, self).__init__(dejavu)
+        super().__init__(dejavu)
        self.audio = pyaudio.PyAudio()
        self.stream = None
        self.data = []
--- a/dejavu/testing.py
+++ b/dejavu/testing.py
@ -1,14 +1,19 @@
-from __future__ import division
+
-from pydub import AudioSegment
+import ast
 from dejavu.decoder import path_to_songname
 from dejavu import Dejavu
 from dejavu.fingerprint import *
 import traceback
 import fnmatch
 import os, re, ast
 import subprocess
 import random
 import logging
 import os
 import random
 import re
 import subprocess
 import traceback
 from pydub import AudioSegment
 from dejavu import Dejavu
 from dejavu.decoder import path_to_songname
 from dejavu.fingerprint import *
 def set_seed(seed=None):
    """
@ -20,6 +25,7 @@ def set_seed(seed=None):
    if seed != None:
        random.seed(seed)
 def get_files_recursive(src, fmt):
    """
    `src` is the source directory. 
@ -29,6 +35,7 @@ def get_files_recursive(src, fmt):
        for filename in fnmatch.filter(filenames, '*' + fmt):
            yield os.path.join(root, filename)
 def get_length_audio(audiopath, extension):
    """
    Returns length of audio in seconds. 
@ -37,10 +44,11 @@ def get_length_audio(audiopath, extension):
    try:
        audio = AudioSegment.from_file(audiopath, extension.replace(".", ""))
    except:
-        print "Error in get_length_audio(): %s" % traceback.format_exc()
+        print(f"Error in get_length_audio(): {traceback.format_exc()}")
        return None
    return int(len(audio) / 1000.0)
 def get_starttime(length, nseconds, padding):
    """
    `length` is total audio length in seconds
@ -52,6 +60,7 @@ def get_starttime(length, nseconds, padding):
        return 0
    return random.randint(padding, maximum)
 def generate_test_files(src, dest, nseconds, fmts=[".mp3", ".wav"], padding=10):
    """
    Generates a test file for each file recursively in `src` directory
@ -75,42 +84,43 @@ def generate_test_files(src, dest, nseconds, fmts=[".mp3", ".wav"], padding=10):
        testsources = get_files_recursive(src, fmt) 
        for audiosource in testsources:
-            print "audiosource:", audiosource
+            print("audiosource:", audiosource)
            filename, extension = os.path.splitext(os.path.basename(audiosource))
            length = get_length_audio(audiosource, extension) 
            starttime = get_starttime(length, nseconds, padding)
-            test_file_name = "%s_%s_%ssec.%s" % (
+            test_file_name = f"{os.path.join(dest, filename)}_{starttime}_{nseconds}sec.{extension.replace('.', '')}"
                os.path.join(dest, filename), starttime, 
                nseconds, extension.replace(".", ""))
            subprocess.check_output([
                "ffmpeg", "-y",
-                "-ss", "%d" % starttime, 
+                "-ss", f"{starttime}",
-                '-t' , "%d" % nseconds, 
+                '-t', f"{nseconds}",
                "-i", audiosource,
                test_file_name])
 def log_msg(msg, log=True, silent=False):
    if log:
        logging.debug(msg)
    if not silent:
-        print msg
+        print(msg)
 def autolabel(rects, ax):
    # attach some text labels
    for rect in rects:
        height = rect.get_height()
-        ax.text(rect.get_x() + rect.get_width() / 2., 1.05 * height, 
+        ax.text(rect.get_x() + rect.get_width() / 2., 1.05 * height, f'{int(height)}', ha='center', va='bottom')
-            '%d' % int(height), ha='center', va='bottom')
+
 def autolabeldoubles(rects, ax):
    # attach some text labels
    for rect in rects:
        height = rect.get_height()
-        ax.text(rect.get_x() + rect.get_width() / 2., 1.05 * height, 
+        ax.text(rect.get_x() + rect.get_width() / 2., 1.05 * height, f'{round(float(height), 3)}',
-            '%s' % round(float(height), 3), ha='center', va='bottom')
+                ha='center', va='bottom')
 class DejavuTest(object):
    def __init__(self, folder, seconds):
@ -120,35 +130,35 @@ class DejavuTest(object):
        self.test_seconds = seconds
        self.test_songs = []
-        print "test_seconds", self.test_seconds
+        print("test_seconds", self.test_seconds)
        self.test_files = [
            f for f in os.listdir(self.test_folder) 
            if os.path.isfile(os.path.join(self.test_folder, f)) 
            and re.findall("[0-9]*sec", f)[0] in self.test_seconds]
-        print "test_files", self.test_files
+        print("test_files", self.test_files)
        self.n_columns = len(self.test_seconds)
        self.n_lines = int(len(self.test_files) / self.n_columns)
-        print "columns:", self.n_columns
+        print("columns:", self.n_columns)
-        print "length of test files:", len(self.test_files)
+        print("length of test files:", len(self.test_files))
-        print "lines:", self.n_lines
+        print("lines:", self.n_lines)
        # variable match results (yes, no, invalid)
-        self.result_match = [[0 for x in xrange(self.n_columns)] for x in xrange(self.n_lines)] 
+        self.result_match = [[0 for x in range(self.n_columns)] for x in range(self.n_lines)] 
-        print "result_match matrix:", self.result_match 
+        print("result_match matrix:", self.result_match) 
        # variable match precision (if matched in the corrected time)
-        self.result_matching_times = [[0 for x in xrange(self.n_columns)] for x in xrange(self.n_lines)] 
+        self.result_matching_times = [[0 for x in range(self.n_columns)] for x in range(self.n_lines)] 
        # variable mahing time (query time) 
-        self.result_query_duration = [[0 for x in xrange(self.n_columns)] for x in xrange(self.n_lines)] 
+        self.result_query_duration = [[0 for x in range(self.n_columns)] for x in range(self.n_lines)] 
        # variable confidence
-        self.result_match_confidence = [[0 for x in xrange(self.n_columns)] for x in xrange(self.n_lines)] 
+        self.result_match_confidence = [[0 for x in range(self.n_columns)] for x in range(self.n_lines)] 
        self.begin()
@ -178,19 +188,17 @@ class DejavuTest(object):
            # add some
            ax.set_ylabel(name)
-            ax.set_title("%s %s Results" % (self.test_seconds[sec], name)) 
+            ax.set_title(f"{self.test_seconds[sec]} {name} Results")
            ax.set_xticks(ind + width)
            labels = [0 for x in range(0, self.n_lines)]
            for x in range(0, self.n_lines):
-                labels[x] = "song %s" % (x+1)
+                labels[x] = f"song {x+1}"
            ax.set_xticklabels(labels)
            box = ax.get_position()
            ax.set_position([box.x0, box.y0, box.width * 0.75, box.height])
            #ax.legend( (rects1[0]), ('Dejavu'), loc='center left', bbox_to_anchor=(1, 0.5))
            if name == 'Confidence':
                autolabel(rects1, ax)
            else:
@ -198,13 +206,13 @@ class DejavuTest(object):
            plt.grid()
-            fig_name = os.path.join(results_folder, "%s_%s.png" % (name, self.test_seconds[sec]))
+            fig_name = os.path.join(results_folder, f"{name}_{self.test_seconds[sec]}.png")
            fig.savefig(fig_name)
    def begin(self):
        for f in self.test_files:
            log_msg('--------------------------------------------------')
-            log_msg('file: %s' % f)
+            log_msg(f'file: {f}')
            # get column 
            col = self.get_column_id(re.findall("[0-9]*sec", f)[0])
@ -235,8 +243,8 @@ class DejavuTest(object):
                # which song did we predict?
                result = ast.literal_eval(result)
                song_result = result["song_name"]
-                log_msg('song: %s' % song)
+                log_msg(f'song: {song}')
-                log_msg('song_result: %s' % song_result)
+                log_msg(f'song_result: {song_result}')
                if song_result != song:
                    log_msg('invalid match')
@ -246,31 +254,28 @@ class DejavuTest(object):
                    self.result_match_confidence[line][col] = 0
                else:
                    log_msg('correct match')
-                    print self.result_match
+                    print(self.result_match)
                    self.result_match[line][col] = 'yes'
                    self.result_query_duration[line][col] = round(result[Dejavu.MATCH_TIME],3)
                    self.result_match_confidence[line][col] = result[Dejavu.CONFIDENCE]
-                    song_start_time = re.findall("\_[^\_]+",f)
+                    song_start_time = re.findall("_[^_]+", f)
                    song_start_time = song_start_time[0].lstrip("_ ")
-                    result_start_time = round((result[Dejavu.OFFSET] * DEFAULT_WINDOW_SIZE * 
+                    result_start_time = round((result[Dejavu.OFFSET] * DEFAULT_WINDOW_SIZE *
-                        DEFAULT_OVERLAP_RATIO) / (DEFAULT_FS), 0)
+                                               DEFAULT_OVERLAP_RATIO) / DEFAULT_FS, 0)
                    self.result_matching_times[line][col] = int(result_start_time) - int(song_start_time)
-                    if (abs(self.result_matching_times[line][col]) == 1):
+                    if abs(self.result_matching_times[line][col]) == 1:
                        self.result_matching_times[line][col] = 0
-                    log_msg('query duration: %s' % round(result[Dejavu.MATCH_TIME],3))
+                    log_msg(f'query duration: {round(result[Dejavu.MATCH_TIME], 3)}')
-                    log_msg('confidence: %s' % result[Dejavu.CONFIDENCE])
+                    log_msg(f'confidence: {result[Dejavu.CONFIDENCE]}')
-                    log_msg('song start_time: %s' % song_start_time)
+                    log_msg(f'song start_time: {song_start_time}')
-                    log_msg('result start time: %s' % result_start_time)
+                    log_msg(f'result start time: {result_start_time}')
-                    if (self.result_matching_times[line][col] == 0):
+
                    if self.result_matching_times[line][col] == 0:
                        log_msg('accurate match')
                    else:
                        log_msg('inaccurate match')
            log_msg('--------------------------------------------------\n')
--- a/dejavu/wavio.py
+++ b/dejavu/wavio.py
@ -5,6 +5,7 @@
 # Github: github.com/WarrenWeckesser/wavio
 import wave as _wave
 import numpy as _np
--- a/example.py
+++ b/example.py
@ -1,35 +1,37 @@
 import warnings
 import json
-warnings.filterwarnings("ignore")
+import warnings
 from dejavu import Dejavu
 from dejavu.recognize import FileRecognizer, MicrophoneRecognizer
 warnings.filterwarnings("ignore")
 # load config from a JSON file (or anything outputting a python dictionary)
 with open("dejavu.cnf.SAMPLE") as f:
    config = json.load(f)
 if __name__ == '__main__':
-	# create a Dejavu instance
+    # create a Dejavu instance
-	djv = Dejavu(config)
+    djv = Dejavu(config)
-	# Fingerprint all the mp3's in the directory we give it
+    # Fingerprint all the mp3's in the directory we give it
-	djv.fingerprint_directory("mp3", [".mp3"])
+    djv.fingerprint_directory("mp3", [".mp3"])
-	# Recognize audio from a file
+    # Recognize audio from a file
-	song = djv.recognize(FileRecognizer, "mp3/Sean-Fournier--Falling-For-You.mp3")
+    song = djv.recognize(FileRecognizer, "mp3/Sean-Fournier--Falling-For-You.mp3")
-	print "From file we recognized: %s\n" % song
+    print(f"From file we recognized: {song}\n")
-	# Or recognize audio from your microphone for `secs` seconds
+    # Or recognize audio from your microphone for `secs` seconds
-	secs = 5
+    secs = 5
-	song = djv.recognize(MicrophoneRecognizer, seconds=secs)
+    song = djv.recognize(MicrophoneRecognizer, seconds=secs)
-	if song is None:
+    if song is None:
-		print "Nothing recognized -- did you play the song out loud so your mic could hear it? :)"
+        print("Nothing recognized -- did you play the song out loud so your mic could hear it? :)")
-	else:
+    else:
-		print "From mic with %d seconds we recognized: %s\n" % (secs, song)
+        print(f"From mic with %d seconds we recognized: {(secs, song)}\n")
-	# Or use a recognizer without the shortcut, in anyway you would like
+    # Or use a recognizer without the shortcut, in anyway you would like
-	recognizer = FileRecognizer(djv)
+    recognizer = FileRecognizer(djv)
-	song = recognizer.recognize_file("mp3/Josh-Woodward--I-Want-To-Destroy-Something-Beautiful.mp3")
+    song = recognizer.recognize_file("mp3/Josh-Woodward--I-Want-To-Destroy-Something-Beautiful.mp3")
-	print "No shortcut, we recognized: %s\n" % song
+    print(f"No shortcut, we recognized: {song}\n")
--- a/requirements.txt
+++ b/requirements.txt
@ -1,9 +1,7 @@
-# requirements file
+pydub==0.23.1
 PyAudio==0.2.11
 numpy==1.17.2
 scipy==1.3.1
 matplotlib==3.1.1
 mysql-connector-python==8.0.17
 ### BEGIN ###
 pydub>=0.9.4
 PyAudio>=0.2.7
 numpy>=1.8.2
 scipy>=0.12.1
 matplotlib>=1.3.1
 ### END ###
--- a/run_tests.py
+++ b/run_tests.py
@ -86,10 +86,10 @@ tests = 1  # djv
 n_secs = len(test_seconds) 
 # set result variables -> 4d variables
-all_match_counter = [[[0 for x in xrange(tests)] for x in xrange(3)] for x in xrange(n_secs)]
+all_match_counter = [[[0 for x in range(tests)] for x in range(3)] for x in range(n_secs)]
-all_matching_times_counter = [[[0 for x in xrange(tests)] for x in xrange(2)] for x in xrange(n_secs)]
+all_matching_times_counter = [[[0 for x in range(tests)] for x in range(2)] for x in range(n_secs)]
-all_query_duration = [[[0 for x in xrange(tests)] for x in xrange(djv.n_lines)] for x in xrange(n_secs)]
+all_query_duration = [[[0 for x in range(tests)] for x in range(djv.n_lines)] for x in range(n_secs)]
-all_match_confidence = [[[0 for x in xrange(tests)] for x in xrange(djv.n_lines)] for x in xrange(n_secs)]
+all_match_confidence = [[[0 for x in range(tests)] for x in range(djv.n_lines)] for x in range(n_secs)]
 # group results by seconds
 for line in range(0, djv.n_lines):
--- a/setup.py
+++ b/setup.py
@ -7,11 +7,11 @@ def parse_requirements(requirements):
    with open(requirements) as f:
        lines = [l for l in f]
        # remove spaces
-        stripped = map((lambda x: x.strip()), lines)
+        stripped = list(map((lambda x: x.strip()), lines))
        # remove comments
-        nocomments = filter((lambda x: not x.startswith('#')), stripped)
+        nocomments = list(filter((lambda x: not x.startswith('#')), stripped))
        # remove empty lines
-        reqs = filter((lambda x: x), nocomments)
+        reqs = list(filter((lambda x: x), nocomments))
        return reqs
 PACKAGE_NAME = "PyDejavu"