mirror of
https://github.com/correl/dejavu.git
synced 2024-11-23 11:09:52 +00:00
migrated code to python 3.6.6 and refactored some code to improve it.
This commit is contained in:
parent
d2b8761eb3
commit
78dfef04d3
18 changed files with 682 additions and 661 deletions
|
@ -2,7 +2,7 @@
|
||||||
"database": {
|
"database": {
|
||||||
"host": "127.0.0.1",
|
"host": "127.0.0.1",
|
||||||
"user": "root",
|
"user": "root",
|
||||||
"passwd": "12345678",
|
"password": "rootpass",
|
||||||
"db": "dejavu"
|
"database": "dejavu"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,7 +24,7 @@ def init(configpath):
|
||||||
with open(configpath) as f:
|
with open(configpath) as f:
|
||||||
config = json.load(f)
|
config = json.load(f)
|
||||||
except IOError as err:
|
except IOError as err:
|
||||||
print("Cannot open configuration: %s. Exiting" % (str(err)))
|
print(("Cannot open configuration: %s. Exiting" % (str(err))))
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
# create a Dejavu instance
|
# create a Dejavu instance
|
||||||
|
@ -67,8 +67,8 @@ if __name__ == '__main__':
|
||||||
if len(args.fingerprint) == 2:
|
if len(args.fingerprint) == 2:
|
||||||
directory = args.fingerprint[0]
|
directory = args.fingerprint[0]
|
||||||
extension = args.fingerprint[1]
|
extension = args.fingerprint[1]
|
||||||
print("Fingerprinting all .%s files in the %s directory"
|
print(("Fingerprinting all .%s files in the %s directory"
|
||||||
% (extension, directory))
|
% (extension, directory)))
|
||||||
djv.fingerprint_directory(directory, ["." + extension], 4)
|
djv.fingerprint_directory(directory, ["." + extension], 4)
|
||||||
|
|
||||||
elif len(args.fingerprint) == 1:
|
elif len(args.fingerprint) == 1:
|
||||||
|
|
|
@ -1,28 +1,23 @@
|
||||||
from dejavu.database import get_database, Database
|
|
||||||
import dejavu.decoder as decoder
|
|
||||||
import fingerprint
|
|
||||||
import multiprocessing
|
import multiprocessing
|
||||||
import os
|
import os
|
||||||
import traceback
|
|
||||||
import sys
|
import sys
|
||||||
|
import traceback
|
||||||
|
|
||||||
|
import dejavu.decoder as decoder
|
||||||
|
from dejavu.config.config import (CONFIDENCE, DEFAULT_FS,
|
||||||
|
DEFAULT_OVERLAP_RATIO, DEFAULT_WINDOW_SIZE,
|
||||||
|
FIELD_FILE_SHA1, OFFSET, OFFSET_SECS,
|
||||||
|
SONG_ID, SONG_NAME, TOPN)
|
||||||
|
from dejavu.database import get_database
|
||||||
|
from dejavu.fingerprint import fingerprint
|
||||||
|
|
||||||
|
|
||||||
class Dejavu(object):
|
class Dejavu:
|
||||||
|
|
||||||
SONG_ID = "song_id"
|
|
||||||
SONG_NAME = 'song_name'
|
|
||||||
CONFIDENCE = 'confidence'
|
|
||||||
MATCH_TIME = 'match_time'
|
|
||||||
OFFSET = 'offset'
|
|
||||||
OFFSET_SECS = 'offset_seconds'
|
|
||||||
|
|
||||||
def __init__(self, config):
|
def __init__(self, config):
|
||||||
super(Dejavu, self).__init__()
|
|
||||||
|
|
||||||
self.config = config
|
self.config = config
|
||||||
|
|
||||||
# initialize db
|
# initialize db
|
||||||
db_cls = get_database(config.get("database_type", None))
|
db_cls = get_database(config.get("database_type", "mysql").lower())
|
||||||
|
|
||||||
self.db = db_cls(**config.get("database", {}))
|
self.db = db_cls(**config.get("database", {}))
|
||||||
self.db.setup()
|
self.db.setup()
|
||||||
|
@ -39,7 +34,7 @@ class Dejavu(object):
|
||||||
self.songs = self.db.get_songs()
|
self.songs = self.db.get_songs()
|
||||||
self.songhashes_set = set() # to know which ones we've computed before
|
self.songhashes_set = set() # to know which ones we've computed before
|
||||||
for song in self.songs:
|
for song in self.songs:
|
||||||
song_hash = song[Database.FIELD_FILE_SHA1]
|
song_hash = song[FIELD_FILE_SHA1]
|
||||||
self.songhashes_set.add(song_hash)
|
self.songhashes_set.add(song_hash)
|
||||||
|
|
||||||
def fingerprint_directory(self, path, extensions, nprocesses=None):
|
def fingerprint_directory(self, path, extensions, nprocesses=None):
|
||||||
|
@ -55,26 +50,23 @@ class Dejavu(object):
|
||||||
|
|
||||||
filenames_to_fingerprint = []
|
filenames_to_fingerprint = []
|
||||||
for filename, _ in decoder.find_files(path, extensions):
|
for filename, _ in decoder.find_files(path, extensions):
|
||||||
|
|
||||||
# don't refingerprint already fingerprinted files
|
# don't refingerprint already fingerprinted files
|
||||||
if decoder.unique_hash(filename) in self.songhashes_set:
|
if decoder.unique_hash(filename) in self.songhashes_set:
|
||||||
print "%s already fingerprinted, continuing..." % filename
|
print(f"{filename} already fingerprinted, continuing...")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
filenames_to_fingerprint.append(filename)
|
filenames_to_fingerprint.append(filename)
|
||||||
|
|
||||||
# Prepare _fingerprint_worker input
|
# Prepare _fingerprint_worker input
|
||||||
worker_input = zip(filenames_to_fingerprint,
|
worker_input = list(zip(filenames_to_fingerprint, [self.limit] * len(filenames_to_fingerprint)))
|
||||||
[self.limit] * len(filenames_to_fingerprint))
|
|
||||||
|
|
||||||
# Send off our tasks
|
# Send off our tasks
|
||||||
iterator = pool.imap_unordered(_fingerprint_worker,
|
iterator = pool.imap_unordered(_fingerprint_worker, worker_input)
|
||||||
worker_input)
|
|
||||||
|
|
||||||
# Loop till we have all of them
|
# Loop till we have all of them
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
song_name, hashes, file_hash = iterator.next()
|
song_name, hashes, file_hash = next(iterator)
|
||||||
except multiprocessing.TimeoutError:
|
except multiprocessing.TimeoutError:
|
||||||
continue
|
continue
|
||||||
except StopIteration:
|
except StopIteration:
|
||||||
|
@ -99,7 +91,7 @@ class Dejavu(object):
|
||||||
song_name = song_name or songname
|
song_name = song_name or songname
|
||||||
# don't refingerprint already fingerprinted files
|
# don't refingerprint already fingerprinted files
|
||||||
if song_hash in self.songhashes_set:
|
if song_hash in self.songhashes_set:
|
||||||
print "%s already fingerprinted, continuing..." % song_name
|
print(f"{song_name} already fingerprinted, continuing...")
|
||||||
else:
|
else:
|
||||||
song_name, hashes, file_hash = _fingerprint_worker(
|
song_name, hashes, file_hash = _fingerprint_worker(
|
||||||
filepath,
|
filepath,
|
||||||
|
@ -112,22 +104,21 @@ class Dejavu(object):
|
||||||
self.db.set_song_fingerprinted(sid)
|
self.db.set_song_fingerprinted(sid)
|
||||||
self.get_fingerprinted_songs()
|
self.get_fingerprinted_songs()
|
||||||
|
|
||||||
def find_matches(self, samples, Fs=fingerprint.DEFAULT_FS):
|
def find_matches(self, samples, Fs=DEFAULT_FS):
|
||||||
hashes = fingerprint.fingerprint(samples, Fs=Fs)
|
hashes = fingerprint(samples, Fs=Fs)
|
||||||
return self.db.return_matches(hashes)
|
return self.db.return_matches(hashes)
|
||||||
|
|
||||||
def align_matches(self, matches):
|
def align_matches(self, matches, topn=TOPN):
|
||||||
"""
|
"""
|
||||||
Finds hash matches that align in time with other matches and finds
|
Finds hash matches that align in time with other matches and finds
|
||||||
consensus about which hashes are "true" signal from the audio.
|
consensus about which hashes are "true" signal from the audio.
|
||||||
|
|
||||||
Returns a dictionary with match information.
|
Returns a list of dictionaries (based on topn) with match information.
|
||||||
"""
|
"""
|
||||||
# align by diffs
|
# align by diffs
|
||||||
diff_counter = {}
|
diff_counter = {}
|
||||||
largest = 0
|
|
||||||
largest_count = 0
|
largest_count = 0
|
||||||
song_id = -1
|
|
||||||
for tup in matches:
|
for tup in matches:
|
||||||
sid, diff = tup
|
sid, diff = tup
|
||||||
if diff not in diff_counter:
|
if diff not in diff_counter:
|
||||||
|
@ -137,30 +128,65 @@ class Dejavu(object):
|
||||||
diff_counter[diff][sid] += 1
|
diff_counter[diff][sid] += 1
|
||||||
|
|
||||||
if diff_counter[diff][sid] > largest_count:
|
if diff_counter[diff][sid] > largest_count:
|
||||||
largest = diff
|
|
||||||
largest_count = diff_counter[diff][sid]
|
largest_count = diff_counter[diff][sid]
|
||||||
song_id = sid
|
|
||||||
|
|
||||||
# extract idenfication
|
# create dic where key are songs ids
|
||||||
song = self.db.get_song_by_id(song_id)
|
songs_num_matches = {}
|
||||||
if song:
|
for dc in diff_counter:
|
||||||
# TODO: Clarify what `get_song_by_id` should return.
|
for sid in diff_counter[dc]:
|
||||||
songname = song.get(Dejavu.SONG_NAME, None)
|
match_val = diff_counter[dc][sid]
|
||||||
else:
|
if (sid not in songs_num_matches) or (match_val > songs_num_matches[sid]['value']):
|
||||||
return None
|
songs_num_matches[sid] = {
|
||||||
|
'sid': sid,
|
||||||
|
'value': match_val,
|
||||||
|
'largest': dc
|
||||||
|
}
|
||||||
|
|
||||||
# return match info
|
# use dicc of songs to create an ordered (descending) list using the match value property assigned to each song
|
||||||
nseconds = round(float(largest) / fingerprint.DEFAULT_FS *
|
songs_num_matches_list = []
|
||||||
fingerprint.DEFAULT_WINDOW_SIZE *
|
for s in songs_num_matches:
|
||||||
fingerprint.DEFAULT_OVERLAP_RATIO, 5)
|
songs_num_matches_list.append({
|
||||||
song = {
|
'sid': s,
|
||||||
Dejavu.SONG_ID : song_id,
|
'object': songs_num_matches[s]
|
||||||
Dejavu.SONG_NAME : songname.encode("utf8"),
|
})
|
||||||
Dejavu.CONFIDENCE : largest_count,
|
|
||||||
Dejavu.OFFSET : int(largest),
|
songs_num_matches_list_ordered = sorted(songs_num_matches_list, key=lambda x: x['object']['value'],
|
||||||
Dejavu.OFFSET_SECS : nseconds,
|
reverse=True)
|
||||||
Database.FIELD_FILE_SHA1 : song.get(Database.FIELD_FILE_SHA1, None).encode("utf8"),}
|
|
||||||
return song
|
# iterate the ordered list and fill results
|
||||||
|
songs_result = []
|
||||||
|
for s in songs_num_matches_list_ordered:
|
||||||
|
|
||||||
|
# get expected variable by the original code
|
||||||
|
song_id = s['object']['sid']
|
||||||
|
largest = s['object']['largest']
|
||||||
|
largest_count = s['object']['value']
|
||||||
|
|
||||||
|
# extract identification
|
||||||
|
song = self.db.get_song_by_id(song_id)
|
||||||
|
if song:
|
||||||
|
# TODO: Clarify what `get_song_by_id` should return.
|
||||||
|
songname = song.get(SONG_NAME, None)
|
||||||
|
|
||||||
|
# return match info
|
||||||
|
nseconds = round(float(largest) / DEFAULT_FS *
|
||||||
|
DEFAULT_WINDOW_SIZE *
|
||||||
|
DEFAULT_OVERLAP_RATIO, 5)
|
||||||
|
song = {
|
||||||
|
SONG_ID: song_id,
|
||||||
|
SONG_NAME: songname.encode("utf8"),
|
||||||
|
CONFIDENCE: largest_count,
|
||||||
|
OFFSET: int(largest),
|
||||||
|
OFFSET_SECS: nseconds,
|
||||||
|
FIELD_FILE_SHA1: song.get(FIELD_FILE_SHA1, None).encode("utf8")
|
||||||
|
}
|
||||||
|
|
||||||
|
songs_result.append(song)
|
||||||
|
|
||||||
|
# only consider up to topn elements in the result
|
||||||
|
if len(songs_result) > topn:
|
||||||
|
break
|
||||||
|
return songs_result
|
||||||
|
|
||||||
def recognize(self, recognizer, *options, **kwoptions):
|
def recognize(self, recognizer, *options, **kwoptions):
|
||||||
r = recognizer(self)
|
r = recognizer(self)
|
||||||
|
@ -177,26 +203,15 @@ def _fingerprint_worker(filename, limit=None, song_name=None):
|
||||||
|
|
||||||
songname, extension = os.path.splitext(os.path.basename(filename))
|
songname, extension = os.path.splitext(os.path.basename(filename))
|
||||||
song_name = song_name or songname
|
song_name = song_name or songname
|
||||||
channels, Fs, file_hash = decoder.read(filename, limit)
|
channels, fs, file_hash = decoder.read(filename, limit)
|
||||||
result = set()
|
result = set()
|
||||||
channel_amount = len(channels)
|
channel_amount = len(channels)
|
||||||
|
|
||||||
for channeln, channel in enumerate(channels):
|
for channeln, channel in enumerate(channels):
|
||||||
# TODO: Remove prints or change them into optional logging.
|
# TODO: Remove prints or change them into optional logging.
|
||||||
print("Fingerprinting channel %d/%d for %s" % (channeln + 1,
|
print(f"Fingerprinting channel {channeln + 1}/{channel_amount} for {filename}")
|
||||||
channel_amount,
|
hashes = fingerprint(channel, Fs=fs)
|
||||||
filename))
|
print(f"Finished channel {channeln + 1}/{channel_amount} for {filename}")
|
||||||
hashes = fingerprint.fingerprint(channel, Fs=Fs)
|
|
||||||
print("Finished channel %d/%d for %s" % (channeln + 1, channel_amount,
|
|
||||||
filename))
|
|
||||||
result |= set(hashes)
|
result |= set(hashes)
|
||||||
|
|
||||||
return song_name, result, file_hash
|
return song_name, result, file_hash
|
||||||
|
|
||||||
|
|
||||||
def chunkify(lst, n):
|
|
||||||
"""
|
|
||||||
Splits a list into roughly n equal parts.
|
|
||||||
http://stackoverflow.com/questions/2130016/splitting-a-list-of-arbitrary-size-into-only-roughly-n-equal-parts
|
|
||||||
"""
|
|
||||||
return [lst[i::n] for i in xrange(n)]
|
|
||||||
|
|
74
dejavu/config/config.py
Normal file
74
dejavu/config/config.py
Normal file
|
@ -0,0 +1,74 @@
|
||||||
|
# Dejavu
|
||||||
|
SONG_ID = "song_id"
|
||||||
|
SONG_NAME = 'song_name'
|
||||||
|
CONFIDENCE = 'confidence'
|
||||||
|
MATCH_TIME = 'match_time'
|
||||||
|
OFFSET = 'offset'
|
||||||
|
OFFSET_SECS = 'offset_seconds'
|
||||||
|
|
||||||
|
# DATABASE CLASS INSTANCES:
|
||||||
|
DATABASES = {
|
||||||
|
'mysql': ("dejavu.database_handler.mysql_database", "MySQLDatabase")
|
||||||
|
}
|
||||||
|
|
||||||
|
# TABLE SONGS
|
||||||
|
SONGS_TABLENAME = "songs"
|
||||||
|
|
||||||
|
# SONGS FIELDS
|
||||||
|
FIELD_SONG_ID = 'song_id'
|
||||||
|
FIELD_SONGNAME = 'song_name'
|
||||||
|
FIELD_FINGERPRINTED = "fingerprinted"
|
||||||
|
FIELD_FILE_SHA1 = 'file_sha1'
|
||||||
|
|
||||||
|
# TABLE FINGERPRINTS
|
||||||
|
FINGERPRINTS_TABLENAME = "fingerprints"
|
||||||
|
|
||||||
|
# FINGERPRINTS FIELDS
|
||||||
|
FIELD_HASH = 'hash'
|
||||||
|
FIELD_OFFSET = 'offset'
|
||||||
|
|
||||||
|
# FINGERPRINTS CONFIG:
|
||||||
|
# Sampling rate, related to the Nyquist conditions, which affects
|
||||||
|
# the range frequencies we can detect.
|
||||||
|
DEFAULT_FS = 44100
|
||||||
|
|
||||||
|
# Size of the FFT window, affects frequency granularity
|
||||||
|
DEFAULT_WINDOW_SIZE = 4096
|
||||||
|
|
||||||
|
# Ratio by which each sequential window overlaps the last and the
|
||||||
|
# next window. Higher overlap will allow a higher granularity of offset
|
||||||
|
# matching, but potentially more fingerprints.
|
||||||
|
DEFAULT_OVERLAP_RATIO = 0.5
|
||||||
|
|
||||||
|
# Degree to which a fingerprint can be paired with its neighbors --
|
||||||
|
# higher will cause more fingerprints, but potentially better accuracy.
|
||||||
|
DEFAULT_FAN_VALUE = 15
|
||||||
|
|
||||||
|
# Minimum amplitude in spectrogram in order to be considered a peak.
|
||||||
|
# This can be raised to reduce number of fingerprints, but can negatively
|
||||||
|
# affect accuracy.
|
||||||
|
DEFAULT_AMP_MIN = 10
|
||||||
|
|
||||||
|
# Number of cells around an amplitude peak in the spectrogram in order
|
||||||
|
# for Dejavu to consider it a spectral peak. Higher values mean less
|
||||||
|
# fingerprints and faster matching, but can potentially affect accuracy.
|
||||||
|
PEAK_NEIGHBORHOOD_SIZE = 20
|
||||||
|
|
||||||
|
# Thresholds on how close or far fingerprints can be in time in order
|
||||||
|
# to be paired as a fingerprint. If your max is too low, higher values of
|
||||||
|
# DEFAULT_FAN_VALUE may not perform as expected.
|
||||||
|
MIN_HASH_TIME_DELTA = 0
|
||||||
|
MAX_HASH_TIME_DELTA = 200
|
||||||
|
|
||||||
|
# If True, will sort peaks temporally for fingerprinting;
|
||||||
|
# not sorting will cut down number of fingerprints, but potentially
|
||||||
|
# affect performance.
|
||||||
|
PEAK_SORT = True
|
||||||
|
|
||||||
|
# Number of bits to grab from the front of the SHA1 hash in the
|
||||||
|
# fingerprint calculation. The more you grab, the more memory storage,
|
||||||
|
# with potentially lesser collisions of matches.
|
||||||
|
FINGERPRINT_REDUCTION = 20
|
||||||
|
|
||||||
|
# Number of results being returned for file recognition
|
||||||
|
TOPN = 2
|
|
@ -1,22 +1,15 @@
|
||||||
from __future__ import absolute_import
|
|
||||||
import abc
|
import abc
|
||||||
|
import importlib
|
||||||
|
from dejavu.config.config import DATABASES
|
||||||
|
|
||||||
|
|
||||||
class Database(object):
|
class Database(object, metaclass=abc.ABCMeta):
|
||||||
__metaclass__ = abc.ABCMeta
|
|
||||||
|
|
||||||
FIELD_FILE_SHA1 = 'file_sha1'
|
|
||||||
FIELD_SONG_ID = 'song_id'
|
|
||||||
FIELD_SONGNAME = 'song_name'
|
|
||||||
FIELD_OFFSET = 'offset'
|
|
||||||
FIELD_HASH = 'hash'
|
|
||||||
|
|
||||||
# Name of your Database subclass, this is used in configuration
|
# Name of your Database subclass, this is used in configuration
|
||||||
# to refer to your class
|
# to refer to your class
|
||||||
type = None
|
type = None
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
super(Database, self).__init__()
|
super().__init__()
|
||||||
|
|
||||||
def before_fork(self):
|
def before_fork(self):
|
||||||
"""
|
"""
|
||||||
|
@ -159,18 +152,11 @@ class Database(object):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
def get_database(database_type=None):
|
def get_database(database_type="mysql"):
|
||||||
# Default to using the mysql database
|
path, db_class_name = DATABASES[database_type]
|
||||||
database_type = database_type or "mysql"
|
try:
|
||||||
# Lower all the input.
|
db_module = importlib.import_module(path)
|
||||||
database_type = database_type.lower()
|
db_class = getattr(db_module, db_class_name)
|
||||||
|
return db_class
|
||||||
for db_cls in Database.__subclasses__():
|
except ImportError:
|
||||||
if db_cls.type == database_type:
|
raise TypeError("Unsupported database type supplied.")
|
||||||
return db_cls
|
|
||||||
|
|
||||||
raise TypeError("Unsupported database type supplied.")
|
|
||||||
|
|
||||||
|
|
||||||
# Import our default database handler
|
|
||||||
import dejavu.database_sql
|
|
||||||
|
|
0
dejavu/database_handler/__init__.py
Normal file
0
dejavu/database_handler/__init__.py
Normal file
235
dejavu/database_handler/mysql_database.py
Executable file
235
dejavu/database_handler/mysql_database.py
Executable file
|
@ -0,0 +1,235 @@
|
||||||
|
import queue
|
||||||
|
|
||||||
|
import mysql.connector
|
||||||
|
from mysql.connector.errors import DatabaseError
|
||||||
|
|
||||||
|
import dejavu.database_handler.mysql_queries as queries
|
||||||
|
from dejavu.database import Database
|
||||||
|
|
||||||
|
|
||||||
|
class MySQLDatabase(Database):
|
||||||
|
type = "mysql"
|
||||||
|
|
||||||
|
def __init__(self, **options):
|
||||||
|
super().__init__()
|
||||||
|
self.cursor = cursor_factory(**options)
|
||||||
|
self._options = options
|
||||||
|
|
||||||
|
def after_fork(self):
|
||||||
|
# Clear the cursor cache, we don't want any stale connections from
|
||||||
|
# the previous process.
|
||||||
|
Cursor.clear_cache()
|
||||||
|
|
||||||
|
def setup(self):
|
||||||
|
"""
|
||||||
|
Creates any non-existing tables required for dejavu to function.
|
||||||
|
|
||||||
|
This also removes all songs that have been added but have no
|
||||||
|
fingerprints associated with them.
|
||||||
|
"""
|
||||||
|
with self.cursor() as cur:
|
||||||
|
cur.execute(queries.CREATE_SONGS_TABLE)
|
||||||
|
cur.execute(queries.CREATE_FINGERPRINTS_TABLE)
|
||||||
|
cur.execute(queries.DELETE_UNFINGERPRINTED)
|
||||||
|
|
||||||
|
def empty(self):
|
||||||
|
"""
|
||||||
|
Drops tables created by dejavu and then creates them again
|
||||||
|
by calling `SQLDatabase.setup`.
|
||||||
|
|
||||||
|
.. warning:
|
||||||
|
This will result in a loss of data
|
||||||
|
"""
|
||||||
|
with self.cursor() as cur:
|
||||||
|
cur.execute(queries.DROP_FINGERPRINTS)
|
||||||
|
cur.execute(queries.DROP_SONGS)
|
||||||
|
|
||||||
|
self.setup()
|
||||||
|
|
||||||
|
def delete_unfingerprinted_songs(self):
|
||||||
|
"""
|
||||||
|
Removes all songs that have no fingerprints associated with them.
|
||||||
|
"""
|
||||||
|
with self.cursor() as cur:
|
||||||
|
cur.execute(queries.DELETE_UNFINGERPRINTED)
|
||||||
|
|
||||||
|
def get_num_songs(self):
|
||||||
|
"""
|
||||||
|
Returns number of songs the database has fingerprinted.
|
||||||
|
"""
|
||||||
|
with self.cursor() as cur:
|
||||||
|
cur.execute(queries.SELECT_UNIQUE_SONG_IDS)
|
||||||
|
count = cur.fetchone()[0] if cur.rowcount != 0 else 0
|
||||||
|
|
||||||
|
return count
|
||||||
|
|
||||||
|
def get_num_fingerprints(self):
|
||||||
|
"""
|
||||||
|
Returns number of fingerprints the database has fingerprinted.
|
||||||
|
"""
|
||||||
|
with self.cursor() as cur:
|
||||||
|
cur.execute(queries.SELECT_NUM_FINGERPRINTS)
|
||||||
|
count = cur.fetchone()[0] if cur.rowcount != 0 else 0
|
||||||
|
cur.close()
|
||||||
|
|
||||||
|
return count
|
||||||
|
|
||||||
|
def set_song_fingerprinted(self, sid):
|
||||||
|
"""
|
||||||
|
Set the fingerprinted flag to TRUE (1) once a song has been completely
|
||||||
|
fingerprinted in the database.
|
||||||
|
"""
|
||||||
|
with self.cursor() as cur:
|
||||||
|
cur.execute(queries.UPDATE_SONG_FINGERPRINTED, (sid,))
|
||||||
|
|
||||||
|
def get_songs(self):
|
||||||
|
"""
|
||||||
|
Return songs that have the fingerprinted flag set TRUE (1).
|
||||||
|
"""
|
||||||
|
with self.cursor(dictionary=True) as cur:
|
||||||
|
cur.execute(queries.SELECT_SONGS)
|
||||||
|
for row in cur:
|
||||||
|
yield row
|
||||||
|
|
||||||
|
def get_song_by_id(self, sid):
|
||||||
|
"""
|
||||||
|
Returns song by its ID.
|
||||||
|
"""
|
||||||
|
with self.cursor(dictionary=True) as cur:
|
||||||
|
cur.execute(queries.SELECT_SONG, (sid,))
|
||||||
|
return cur.fetchone()
|
||||||
|
|
||||||
|
def insert(self, hash, sid, offset):
|
||||||
|
"""
|
||||||
|
Insert a (sha1, song_id, offset) row into database.
|
||||||
|
"""
|
||||||
|
with self.cursor() as cur:
|
||||||
|
cur.execute(queries.INSERT_FINGERPRINT, (hash, sid, offset))
|
||||||
|
|
||||||
|
def insert_song(self, song_name, file_hash):
|
||||||
|
"""
|
||||||
|
Inserts song in the database and returns the ID of the inserted record.
|
||||||
|
"""
|
||||||
|
with self.cursor() as cur:
|
||||||
|
cur.execute(queries.INSERT_SONG, (song_name, file_hash))
|
||||||
|
return cur.lastrowid
|
||||||
|
|
||||||
|
def query(self, hash):
|
||||||
|
"""
|
||||||
|
Return all tuples associated with hash.
|
||||||
|
|
||||||
|
If hash is None, returns all entries in the
|
||||||
|
database (be careful with that one!).
|
||||||
|
"""
|
||||||
|
if hash:
|
||||||
|
with self.cursor() as cur:
|
||||||
|
cur.execute(queries.SELECT, (hash,))
|
||||||
|
for sid, offset in cur:
|
||||||
|
yield (sid, offset)
|
||||||
|
else: # select all if no key
|
||||||
|
with self.cursor() as cur:
|
||||||
|
cur.execute(queries.SELECT_ALL)
|
||||||
|
for sid, offset in cur:
|
||||||
|
yield (sid, offset)
|
||||||
|
|
||||||
|
def get_iterable_kv_pairs(self):
|
||||||
|
"""
|
||||||
|
Returns all tuples in database.
|
||||||
|
"""
|
||||||
|
return self.query(None)
|
||||||
|
|
||||||
|
def insert_hashes(self, sid, hashes, batch=1000):
|
||||||
|
"""
|
||||||
|
Insert series of hash => song_id, offset
|
||||||
|
values into the database.
|
||||||
|
"""
|
||||||
|
values = [(sid, hash, int(offset)) for hash, offset in hashes]
|
||||||
|
|
||||||
|
with self.cursor() as cur:
|
||||||
|
for index in range(0, len(hashes), batch):
|
||||||
|
cur.executemany(queries.INSERT_FINGERPRINT, values[index: index + batch])
|
||||||
|
|
||||||
|
def return_matches(self, hashes, batch=1000):
|
||||||
|
"""
|
||||||
|
Return the (song_id, offset_diff) tuples associated with
|
||||||
|
a list of (sha1, sample_offset) values.
|
||||||
|
"""
|
||||||
|
# Create a dictionary of hash => offset pairs for later lookups
|
||||||
|
mapper = {}
|
||||||
|
for hash, offset in hashes:
|
||||||
|
mapper[hash.upper()] = offset
|
||||||
|
|
||||||
|
# Get an iterable of all the hashes we need
|
||||||
|
values = list(mapper.keys())
|
||||||
|
|
||||||
|
with self.cursor() as cur:
|
||||||
|
for index in range(0, len(values), batch):
|
||||||
|
# Create our IN part of the query
|
||||||
|
query = queries.SELECT_MULTIPLE
|
||||||
|
query = query % ', '.join(['UNHEX(%s)'] * len(values[index: index + batch]))
|
||||||
|
|
||||||
|
cur.execute(query, values[index: index + batch])
|
||||||
|
|
||||||
|
for hash, sid, offset in cur:
|
||||||
|
# (sid, db_offset - song_sampled_offset)
|
||||||
|
yield (sid, offset - mapper[hash])
|
||||||
|
|
||||||
|
def __getstate__(self):
|
||||||
|
return self._options,
|
||||||
|
|
||||||
|
def __setstate__(self, state):
|
||||||
|
self._options, = state
|
||||||
|
self.cursor = cursor_factory(**self._options)
|
||||||
|
|
||||||
|
|
||||||
|
def cursor_factory(**factory_options):
|
||||||
|
def cursor(**options):
|
||||||
|
options.update(factory_options)
|
||||||
|
return Cursor(**options)
|
||||||
|
return cursor
|
||||||
|
|
||||||
|
|
||||||
|
class Cursor(object):
|
||||||
|
"""
|
||||||
|
Establishes a connection to the database and returns an open cursor.
|
||||||
|
# Use as context manager
|
||||||
|
with Cursor() as cur:
|
||||||
|
cur.execute(query)
|
||||||
|
...
|
||||||
|
"""
|
||||||
|
def __init__(self, dictionary=False, **options):
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
self._cache = queue.Queue(maxsize=5)
|
||||||
|
|
||||||
|
try:
|
||||||
|
conn = self._cache.get_nowait()
|
||||||
|
# Ping the connection before using it from the cache.
|
||||||
|
conn.ping(True)
|
||||||
|
except queue.Empty:
|
||||||
|
conn = mysql.connector.connect(**options)
|
||||||
|
|
||||||
|
self.conn = conn
|
||||||
|
self.dictionary = dictionary
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def clear_cache(cls):
|
||||||
|
cls._cache = queue.Queue(maxsize=5)
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
|
self.cursor = self.conn.cursor(dictionary=self.dictionary)
|
||||||
|
return self.cursor
|
||||||
|
|
||||||
|
def __exit__(self, extype, exvalue, traceback):
|
||||||
|
# if we had a MySQL related error we try to rollback the cursor.
|
||||||
|
if extype is DatabaseError:
|
||||||
|
self.cursor.rollback()
|
||||||
|
|
||||||
|
self.cursor.close()
|
||||||
|
self.conn.commit()
|
||||||
|
|
||||||
|
# Put it back on the queue
|
||||||
|
try:
|
||||||
|
self._cache.put_nowait(self.conn)
|
||||||
|
except queue.Full:
|
||||||
|
self.conn.close()
|
126
dejavu/database_handler/mysql_queries.py
Normal file
126
dejavu/database_handler/mysql_queries.py
Normal file
|
@ -0,0 +1,126 @@
|
||||||
|
from dejavu.config.config import (FIELD_FILE_SHA1, FIELD_FINGERPRINTED,
|
||||||
|
FIELD_HASH, FIELD_OFFSET, FIELD_SONG_ID,
|
||||||
|
FIELD_SONGNAME, FINGERPRINTS_TABLENAME,
|
||||||
|
SONGS_TABLENAME)
|
||||||
|
|
||||||
|
"""
|
||||||
|
Queries:
|
||||||
|
|
||||||
|
1) Find duplicates (shouldn't be any, though):
|
||||||
|
|
||||||
|
select `hash`, `song_id`, `offset`, count(*) cnt
|
||||||
|
from fingerprints
|
||||||
|
group by `hash`, `song_id`, `offset`
|
||||||
|
having cnt > 1
|
||||||
|
order by cnt asc;
|
||||||
|
|
||||||
|
2) Get number of hashes by song:
|
||||||
|
|
||||||
|
select song_id, song_name, count(song_id) as num
|
||||||
|
from fingerprints
|
||||||
|
natural join songs
|
||||||
|
group by song_id
|
||||||
|
order by count(song_id) desc;
|
||||||
|
|
||||||
|
3) get hashes with highest number of collisions
|
||||||
|
|
||||||
|
select
|
||||||
|
hash,
|
||||||
|
count(distinct song_id) as n
|
||||||
|
from fingerprints
|
||||||
|
group by `hash`
|
||||||
|
order by n DESC;
|
||||||
|
|
||||||
|
=> 26 different songs with same fingerprint (392 times):
|
||||||
|
|
||||||
|
select songs.song_name, fingerprints.offset
|
||||||
|
from fingerprints natural join songs
|
||||||
|
where fingerprints.hash = "08d3c833b71c60a7b620322ac0c0aba7bf5a3e73";
|
||||||
|
"""
|
||||||
|
|
||||||
|
# creates
|
||||||
|
CREATE_SONGS_TABLE = f"""
|
||||||
|
CREATE TABLE IF NOT EXISTS `{SONGS_TABLENAME}` (
|
||||||
|
`{FIELD_SONG_ID}` mediumint unsigned not null auto_increment,
|
||||||
|
`{FIELD_SONGNAME}` varchar(250) not null,
|
||||||
|
`{FIELD_FINGERPRINTED}` tinyint default 0,
|
||||||
|
`{FIELD_FILE_SHA1}` binary(20) not null,
|
||||||
|
PRIMARY KEY (`{FIELD_SONG_ID}`),
|
||||||
|
UNIQUE KEY `{FIELD_SONG_ID}` (`{FIELD_SONG_ID}`)
|
||||||
|
) ENGINE=INNODB;"""
|
||||||
|
|
||||||
|
CREATE_FINGERPRINTS_TABLE = f"""
|
||||||
|
CREATE TABLE IF NOT EXISTS `{FINGERPRINTS_TABLENAME}` (
|
||||||
|
`{FIELD_HASH}` binary(10) not null,
|
||||||
|
`{FIELD_SONG_ID}` mediumint unsigned not null,
|
||||||
|
`{FIELD_OFFSET}` int unsigned not null,
|
||||||
|
INDEX ({FIELD_HASH}),
|
||||||
|
UNIQUE KEY `unique_constraint` ({FIELD_SONG_ID}, {FIELD_OFFSET}, {FIELD_HASH}),
|
||||||
|
FOREIGN KEY ({FIELD_SONG_ID}) REFERENCES {SONGS_TABLENAME}({FIELD_SONG_ID}) ON DELETE CASCADE
|
||||||
|
) ENGINE=INNODB;"""
|
||||||
|
|
||||||
|
# inserts (ignores duplicates)
|
||||||
|
INSERT_FINGERPRINT = f"""
|
||||||
|
INSERT IGNORE INTO `{FINGERPRINTS_TABLENAME}` (
|
||||||
|
`{FIELD_SONG_ID}`
|
||||||
|
, `{FIELD_HASH}`
|
||||||
|
, `{FIELD_OFFSET}`)
|
||||||
|
VALUES (%s, UNHEX(%s), %s);
|
||||||
|
"""
|
||||||
|
|
||||||
|
INSERT_SONG = f"""
|
||||||
|
INSERT INTO `{SONGS_TABLENAME}` (`{FIELD_SONGNAME}`,`{FIELD_FILE_SHA1}`)
|
||||||
|
VALUES (%s, UNHEX(%s));
|
||||||
|
"""
|
||||||
|
|
||||||
|
# selects
|
||||||
|
SELECT = f"""
|
||||||
|
SELECT `{FIELD_SONG_ID}`, `{FIELD_OFFSET}`
|
||||||
|
FROM `{FINGERPRINTS_TABLENAME}`
|
||||||
|
WHERE `{FIELD_HASH}` = UNHEX(%s);
|
||||||
|
"""
|
||||||
|
|
||||||
|
SELECT_MULTIPLE = f"""
|
||||||
|
SELECT HEX(`{FIELD_HASH}`), `{FIELD_SONG_ID}`, `{FIELD_OFFSET}`
|
||||||
|
FROM `{FINGERPRINTS_TABLENAME}`
|
||||||
|
WHERE `{FIELD_HASH}` IN (%s);
|
||||||
|
"""
|
||||||
|
|
||||||
|
SELECT_ALL = f"SELECT `{FIELD_SONG_ID}`, `{FIELD_OFFSET}` FROM `{FINGERPRINTS_TABLENAME}`;"
|
||||||
|
|
||||||
|
SELECT_SONG = f"""
|
||||||
|
SELECT `{FIELD_SONGNAME}`, HEX(`{FIELD_FILE_SHA1}`) AS `{FIELD_FILE_SHA1}`
|
||||||
|
FROM `{SONGS_TABLENAME}`
|
||||||
|
WHERE `{FIELD_SONG_ID}` = %s;
|
||||||
|
"""
|
||||||
|
|
||||||
|
SELECT_NUM_FINGERPRINTS = f"SELECT COUNT(*) AS n FROM `{FINGERPRINTS_TABLENAME}`;"
|
||||||
|
|
||||||
|
SELECT_UNIQUE_SONG_IDS = f"""
|
||||||
|
SELECT COUNT(`{FIELD_SONG_ID}`) AS n
|
||||||
|
FROM `{SONGS_TABLENAME}`
|
||||||
|
WHERE `{FIELD_FINGERPRINTED}` = 1;
|
||||||
|
"""
|
||||||
|
|
||||||
|
SELECT_SONGS = f"""
|
||||||
|
SELECT
|
||||||
|
`{FIELD_SONG_ID}`
|
||||||
|
, `{FIELD_SONGNAME}`
|
||||||
|
, HEX(`{FIELD_FILE_SHA1}`) AS `{FIELD_FILE_SHA1}`
|
||||||
|
FROM `{SONGS_TABLENAME}`
|
||||||
|
WHERE `{FIELD_FINGERPRINTED}` = 1;
|
||||||
|
"""
|
||||||
|
|
||||||
|
# drops
|
||||||
|
DROP_FINGERPRINTS = f"DROP TABLE IF EXISTS `{FINGERPRINTS_TABLENAME}`;"
|
||||||
|
DROP_SONGS = f"DROP TABLE IF EXISTS `{SONGS_TABLENAME}`;"
|
||||||
|
|
||||||
|
# update
|
||||||
|
UPDATE_SONG_FINGERPRINTED = f"""
|
||||||
|
UPDATE `{SONGS_TABLENAME}` SET `{FIELD_FINGERPRINTED}` = 1 WHERE `{FIELD_SONG_ID}` = %s;
|
||||||
|
"""
|
||||||
|
|
||||||
|
# delete
|
||||||
|
DELETE_UNFINGERPRINTED = f"""
|
||||||
|
DELETE FROM `{SONGS_TABLENAME}` WHERE `{FIELD_FINGERPRINTED}` = 0;
|
||||||
|
"""
|
|
@ -1,373 +0,0 @@
|
||||||
from __future__ import absolute_import
|
|
||||||
from itertools import izip_longest
|
|
||||||
import Queue
|
|
||||||
|
|
||||||
import MySQLdb as mysql
|
|
||||||
from MySQLdb.cursors import DictCursor
|
|
||||||
|
|
||||||
from dejavu.database import Database
|
|
||||||
|
|
||||||
|
|
||||||
class SQLDatabase(Database):
|
|
||||||
"""
|
|
||||||
Queries:
|
|
||||||
|
|
||||||
1) Find duplicates (shouldn't be any, though):
|
|
||||||
|
|
||||||
select `hash`, `song_id`, `offset`, count(*) cnt
|
|
||||||
from fingerprints
|
|
||||||
group by `hash`, `song_id`, `offset`
|
|
||||||
having cnt > 1
|
|
||||||
order by cnt asc;
|
|
||||||
|
|
||||||
2) Get number of hashes by song:
|
|
||||||
|
|
||||||
select song_id, song_name, count(song_id) as num
|
|
||||||
from fingerprints
|
|
||||||
natural join songs
|
|
||||||
group by song_id
|
|
||||||
order by count(song_id) desc;
|
|
||||||
|
|
||||||
3) get hashes with highest number of collisions
|
|
||||||
|
|
||||||
select
|
|
||||||
hash,
|
|
||||||
count(distinct song_id) as n
|
|
||||||
from fingerprints
|
|
||||||
group by `hash`
|
|
||||||
order by n DESC;
|
|
||||||
|
|
||||||
=> 26 different songs with same fingerprint (392 times):
|
|
||||||
|
|
||||||
select songs.song_name, fingerprints.offset
|
|
||||||
from fingerprints natural join songs
|
|
||||||
where fingerprints.hash = "08d3c833b71c60a7b620322ac0c0aba7bf5a3e73";
|
|
||||||
"""
|
|
||||||
|
|
||||||
type = "mysql"
|
|
||||||
|
|
||||||
# tables
|
|
||||||
FINGERPRINTS_TABLENAME = "fingerprints"
|
|
||||||
SONGS_TABLENAME = "songs"
|
|
||||||
|
|
||||||
# fields
|
|
||||||
FIELD_FINGERPRINTED = "fingerprinted"
|
|
||||||
|
|
||||||
# creates
|
|
||||||
CREATE_FINGERPRINTS_TABLE = """
|
|
||||||
CREATE TABLE IF NOT EXISTS `%s` (
|
|
||||||
`%s` binary(10) not null,
|
|
||||||
`%s` mediumint unsigned not null,
|
|
||||||
`%s` int unsigned not null,
|
|
||||||
INDEX (%s),
|
|
||||||
UNIQUE KEY `unique_constraint` (%s, %s, %s),
|
|
||||||
FOREIGN KEY (%s) REFERENCES %s(%s) ON DELETE CASCADE
|
|
||||||
) ENGINE=INNODB;""" % (
|
|
||||||
FINGERPRINTS_TABLENAME, Database.FIELD_HASH,
|
|
||||||
Database.FIELD_SONG_ID, Database.FIELD_OFFSET, Database.FIELD_HASH,
|
|
||||||
Database.FIELD_SONG_ID, Database.FIELD_OFFSET, Database.FIELD_HASH,
|
|
||||||
Database.FIELD_SONG_ID, SONGS_TABLENAME, Database.FIELD_SONG_ID
|
|
||||||
)
|
|
||||||
|
|
||||||
CREATE_SONGS_TABLE = """
|
|
||||||
CREATE TABLE IF NOT EXISTS `%s` (
|
|
||||||
`%s` mediumint unsigned not null auto_increment,
|
|
||||||
`%s` varchar(250) not null,
|
|
||||||
`%s` tinyint default 0,
|
|
||||||
`%s` binary(20) not null,
|
|
||||||
PRIMARY KEY (`%s`),
|
|
||||||
UNIQUE KEY `%s` (`%s`)
|
|
||||||
) ENGINE=INNODB;""" % (
|
|
||||||
SONGS_TABLENAME, Database.FIELD_SONG_ID, Database.FIELD_SONGNAME, FIELD_FINGERPRINTED,
|
|
||||||
Database.FIELD_FILE_SHA1,
|
|
||||||
Database.FIELD_SONG_ID, Database.FIELD_SONG_ID, Database.FIELD_SONG_ID,
|
|
||||||
)
|
|
||||||
|
|
||||||
# inserts (ignores duplicates)
|
|
||||||
INSERT_FINGERPRINT = """
|
|
||||||
INSERT IGNORE INTO %s (%s, %s, %s) values
|
|
||||||
(UNHEX(%%s), %%s, %%s);
|
|
||||||
""" % (FINGERPRINTS_TABLENAME, Database.FIELD_HASH, Database.FIELD_SONG_ID, Database.FIELD_OFFSET)
|
|
||||||
|
|
||||||
INSERT_SONG = "INSERT INTO %s (%s, %s) values (%%s, UNHEX(%%s));" % (
|
|
||||||
SONGS_TABLENAME, Database.FIELD_SONGNAME, Database.FIELD_FILE_SHA1)
|
|
||||||
|
|
||||||
# selects
|
|
||||||
SELECT = """
|
|
||||||
SELECT %s, %s FROM %s WHERE %s = UNHEX(%%s);
|
|
||||||
""" % (Database.FIELD_SONG_ID, Database.FIELD_OFFSET, FINGERPRINTS_TABLENAME, Database.FIELD_HASH)
|
|
||||||
|
|
||||||
SELECT_MULTIPLE = """
|
|
||||||
SELECT HEX(%s), %s, %s FROM %s WHERE %s IN (%%s);
|
|
||||||
""" % (Database.FIELD_HASH, Database.FIELD_SONG_ID, Database.FIELD_OFFSET,
|
|
||||||
FINGERPRINTS_TABLENAME, Database.FIELD_HASH)
|
|
||||||
|
|
||||||
SELECT_ALL = """
|
|
||||||
SELECT %s, %s FROM %s;
|
|
||||||
""" % (Database.FIELD_SONG_ID, Database.FIELD_OFFSET, FINGERPRINTS_TABLENAME)
|
|
||||||
|
|
||||||
SELECT_SONG = """
|
|
||||||
SELECT %s, HEX(%s) as %s FROM %s WHERE %s = %%s;
|
|
||||||
""" % (Database.FIELD_SONGNAME, Database.FIELD_FILE_SHA1, Database.FIELD_FILE_SHA1, SONGS_TABLENAME, Database.FIELD_SONG_ID)
|
|
||||||
|
|
||||||
SELECT_NUM_FINGERPRINTS = """
|
|
||||||
SELECT COUNT(*) as n FROM %s
|
|
||||||
""" % (FINGERPRINTS_TABLENAME)
|
|
||||||
|
|
||||||
SELECT_UNIQUE_SONG_IDS = """
|
|
||||||
SELECT COUNT(DISTINCT %s) as n FROM %s WHERE %s = 1;
|
|
||||||
""" % (Database.FIELD_SONG_ID, SONGS_TABLENAME, FIELD_FINGERPRINTED)
|
|
||||||
|
|
||||||
SELECT_SONGS = """
|
|
||||||
SELECT %s, %s, HEX(%s) as %s FROM %s WHERE %s = 1;
|
|
||||||
""" % (Database.FIELD_SONG_ID, Database.FIELD_SONGNAME, Database.FIELD_FILE_SHA1, Database.FIELD_FILE_SHA1,
|
|
||||||
SONGS_TABLENAME, FIELD_FINGERPRINTED)
|
|
||||||
|
|
||||||
# drops
|
|
||||||
DROP_FINGERPRINTS = "DROP TABLE IF EXISTS %s;" % FINGERPRINTS_TABLENAME
|
|
||||||
DROP_SONGS = "DROP TABLE IF EXISTS %s;" % SONGS_TABLENAME
|
|
||||||
|
|
||||||
# update
|
|
||||||
UPDATE_SONG_FINGERPRINTED = """
|
|
||||||
UPDATE %s SET %s = 1 WHERE %s = %%s
|
|
||||||
""" % (SONGS_TABLENAME, FIELD_FINGERPRINTED, Database.FIELD_SONG_ID)
|
|
||||||
|
|
||||||
# delete
|
|
||||||
DELETE_UNFINGERPRINTED = """
|
|
||||||
DELETE FROM %s WHERE %s = 0;
|
|
||||||
""" % (SONGS_TABLENAME, FIELD_FINGERPRINTED)
|
|
||||||
|
|
||||||
def __init__(self, **options):
|
|
||||||
super(SQLDatabase, self).__init__()
|
|
||||||
self.cursor = cursor_factory(**options)
|
|
||||||
self._options = options
|
|
||||||
|
|
||||||
def after_fork(self):
|
|
||||||
# Clear the cursor cache, we don't want any stale connections from
|
|
||||||
# the previous process.
|
|
||||||
Cursor.clear_cache()
|
|
||||||
|
|
||||||
def setup(self):
|
|
||||||
"""
|
|
||||||
Creates any non-existing tables required for dejavu to function.
|
|
||||||
|
|
||||||
This also removes all songs that have been added but have no
|
|
||||||
fingerprints associated with them.
|
|
||||||
"""
|
|
||||||
with self.cursor(charset="utf8") as cur:
|
|
||||||
cur.execute(self.CREATE_SONGS_TABLE)
|
|
||||||
cur.execute(self.CREATE_FINGERPRINTS_TABLE)
|
|
||||||
cur.execute(self.DELETE_UNFINGERPRINTED)
|
|
||||||
|
|
||||||
def empty(self):
|
|
||||||
"""
|
|
||||||
Drops tables created by dejavu and then creates them again
|
|
||||||
by calling `SQLDatabase.setup`.
|
|
||||||
|
|
||||||
.. warning:
|
|
||||||
This will result in a loss of data
|
|
||||||
"""
|
|
||||||
with self.cursor(charset="utf8") as cur:
|
|
||||||
cur.execute(self.DROP_FINGERPRINTS)
|
|
||||||
cur.execute(self.DROP_SONGS)
|
|
||||||
|
|
||||||
self.setup()
|
|
||||||
|
|
||||||
def delete_unfingerprinted_songs(self):
|
|
||||||
"""
|
|
||||||
Removes all songs that have no fingerprints associated with them.
|
|
||||||
"""
|
|
||||||
with self.cursor(charset="utf8") as cur:
|
|
||||||
cur.execute(self.DELETE_UNFINGERPRINTED)
|
|
||||||
|
|
||||||
def get_num_songs(self):
|
|
||||||
"""
|
|
||||||
Returns number of songs the database has fingerprinted.
|
|
||||||
"""
|
|
||||||
with self.cursor(charset="utf8") as cur:
|
|
||||||
cur.execute(self.SELECT_UNIQUE_SONG_IDS)
|
|
||||||
|
|
||||||
for count, in cur:
|
|
||||||
return count
|
|
||||||
return 0
|
|
||||||
|
|
||||||
def get_num_fingerprints(self):
|
|
||||||
"""
|
|
||||||
Returns number of fingerprints the database has fingerprinted.
|
|
||||||
"""
|
|
||||||
with self.cursor(charset="utf8") as cur:
|
|
||||||
cur.execute(self.SELECT_NUM_FINGERPRINTS)
|
|
||||||
|
|
||||||
for count, in cur:
|
|
||||||
return count
|
|
||||||
return 0
|
|
||||||
|
|
||||||
def set_song_fingerprinted(self, sid):
|
|
||||||
"""
|
|
||||||
Set the fingerprinted flag to TRUE (1) once a song has been completely
|
|
||||||
fingerprinted in the database.
|
|
||||||
"""
|
|
||||||
with self.cursor(charset="utf8") as cur:
|
|
||||||
cur.execute(self.UPDATE_SONG_FINGERPRINTED, (sid,))
|
|
||||||
|
|
||||||
def get_songs(self):
|
|
||||||
"""
|
|
||||||
Return songs that have the fingerprinted flag set TRUE (1).
|
|
||||||
"""
|
|
||||||
with self.cursor(cursor_type=DictCursor, charset="utf8") as cur:
|
|
||||||
cur.execute(self.SELECT_SONGS)
|
|
||||||
for row in cur:
|
|
||||||
yield row
|
|
||||||
|
|
||||||
def get_song_by_id(self, sid):
|
|
||||||
"""
|
|
||||||
Returns song by its ID.
|
|
||||||
"""
|
|
||||||
with self.cursor(cursor_type=DictCursor, charset="utf8") as cur:
|
|
||||||
cur.execute(self.SELECT_SONG, (sid,))
|
|
||||||
return cur.fetchone()
|
|
||||||
|
|
||||||
def insert(self, hash, sid, offset):
|
|
||||||
"""
|
|
||||||
Insert a (sha1, song_id, offset) row into database.
|
|
||||||
"""
|
|
||||||
with self.cursor(charset="utf8") as cur:
|
|
||||||
cur.execute(self.INSERT_FINGERPRINT, (hash, sid, offset))
|
|
||||||
|
|
||||||
def insert_song(self, songname, file_hash):
|
|
||||||
"""
|
|
||||||
Inserts song in the database and returns the ID of the inserted record.
|
|
||||||
"""
|
|
||||||
with self.cursor(charset="utf8") as cur:
|
|
||||||
cur.execute(self.INSERT_SONG, (songname, file_hash))
|
|
||||||
return cur.lastrowid
|
|
||||||
|
|
||||||
def query(self, hash):
|
|
||||||
"""
|
|
||||||
Return all tuples associated with hash.
|
|
||||||
|
|
||||||
If hash is None, returns all entries in the
|
|
||||||
database (be careful with that one!).
|
|
||||||
"""
|
|
||||||
# select all if no key
|
|
||||||
query = self.SELECT_ALL if hash is None else self.SELECT
|
|
||||||
|
|
||||||
with self.cursor(charset="utf8") as cur:
|
|
||||||
cur.execute(query)
|
|
||||||
for sid, offset in cur:
|
|
||||||
yield (sid, offset)
|
|
||||||
|
|
||||||
def get_iterable_kv_pairs(self):
|
|
||||||
"""
|
|
||||||
Returns all tuples in database.
|
|
||||||
"""
|
|
||||||
return self.query(None)
|
|
||||||
|
|
||||||
def insert_hashes(self, sid, hashes):
|
|
||||||
"""
|
|
||||||
Insert series of hash => song_id, offset
|
|
||||||
values into the database.
|
|
||||||
"""
|
|
||||||
values = []
|
|
||||||
for hash, offset in hashes:
|
|
||||||
values.append((hash, sid, offset))
|
|
||||||
|
|
||||||
with self.cursor(charset="utf8") as cur:
|
|
||||||
for split_values in grouper(values, 1000):
|
|
||||||
cur.executemany(self.INSERT_FINGERPRINT, split_values)
|
|
||||||
|
|
||||||
def return_matches(self, hashes):
|
|
||||||
"""
|
|
||||||
Return the (song_id, offset_diff) tuples associated with
|
|
||||||
a list of (sha1, sample_offset) values.
|
|
||||||
"""
|
|
||||||
# Create a dictionary of hash => offset pairs for later lookups
|
|
||||||
mapper = {}
|
|
||||||
for hash, offset in hashes:
|
|
||||||
mapper[hash.upper()] = offset
|
|
||||||
|
|
||||||
# Get an iteratable of all the hashes we need
|
|
||||||
values = mapper.keys()
|
|
||||||
|
|
||||||
with self.cursor(charset="utf8") as cur:
|
|
||||||
for split_values in grouper(values, 1000):
|
|
||||||
# Create our IN part of the query
|
|
||||||
query = self.SELECT_MULTIPLE
|
|
||||||
query = query % ', '.join(['UNHEX(%s)'] * len(split_values))
|
|
||||||
|
|
||||||
cur.execute(query, split_values)
|
|
||||||
|
|
||||||
for hash, sid, offset in cur:
|
|
||||||
# (sid, db_offset - song_sampled_offset)
|
|
||||||
yield (sid, offset - mapper[hash])
|
|
||||||
|
|
||||||
def __getstate__(self):
|
|
||||||
return (self._options,)
|
|
||||||
|
|
||||||
def __setstate__(self, state):
|
|
||||||
self._options, = state
|
|
||||||
self.cursor = cursor_factory(**self._options)
|
|
||||||
|
|
||||||
|
|
||||||
def grouper(iterable, n, fillvalue=None):
|
|
||||||
args = [iter(iterable)] * n
|
|
||||||
return (filter(None, values) for values
|
|
||||||
in izip_longest(fillvalue=fillvalue, *args))
|
|
||||||
|
|
||||||
|
|
||||||
def cursor_factory(**factory_options):
|
|
||||||
def cursor(**options):
|
|
||||||
options.update(factory_options)
|
|
||||||
return Cursor(**options)
|
|
||||||
return cursor
|
|
||||||
|
|
||||||
|
|
||||||
class Cursor(object):
|
|
||||||
"""
|
|
||||||
Establishes a connection to the database and returns an open cursor.
|
|
||||||
|
|
||||||
|
|
||||||
```python
|
|
||||||
# Use as context manager
|
|
||||||
with Cursor() as cur:
|
|
||||||
cur.execute(query)
|
|
||||||
```
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, cursor_type=mysql.cursors.Cursor, **options):
|
|
||||||
super(Cursor, self).__init__()
|
|
||||||
|
|
||||||
self._cache = Queue.Queue(maxsize=5)
|
|
||||||
try:
|
|
||||||
conn = self._cache.get_nowait()
|
|
||||||
except Queue.Empty:
|
|
||||||
conn = mysql.connect(**options)
|
|
||||||
else:
|
|
||||||
# Ping the connection before using it from the cache.
|
|
||||||
conn.ping(True)
|
|
||||||
|
|
||||||
self.conn = conn
|
|
||||||
self.conn.autocommit(False)
|
|
||||||
self.cursor_type = cursor_type
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def clear_cache(cls):
|
|
||||||
cls._cache = Queue.Queue(maxsize=5)
|
|
||||||
|
|
||||||
def __enter__(self):
|
|
||||||
self.cursor = self.conn.cursor(self.cursor_type)
|
|
||||||
return self.cursor
|
|
||||||
|
|
||||||
def __exit__(self, extype, exvalue, traceback):
|
|
||||||
# if we had a MySQL related error we try to rollback the cursor.
|
|
||||||
if extype is mysql.MySQLError:
|
|
||||||
self.cursor.rollback()
|
|
||||||
|
|
||||||
self.cursor.close()
|
|
||||||
self.conn.commit()
|
|
||||||
|
|
||||||
# Put it back on the queue
|
|
||||||
try:
|
|
||||||
self._cache.put_nowait(self.conn)
|
|
||||||
except Queue.Full:
|
|
||||||
self.conn.close()
|
|
|
@ -3,9 +3,10 @@ import fnmatch
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from pydub import AudioSegment
|
from pydub import AudioSegment
|
||||||
from pydub.utils import audioop
|
from pydub.utils import audioop
|
||||||
import wavio
|
from . import wavio
|
||||||
from hashlib import sha1
|
from hashlib import sha1
|
||||||
|
|
||||||
|
|
||||||
def unique_hash(filepath, blocksize=2**20):
|
def unique_hash(filepath, blocksize=2**20):
|
||||||
""" Small function to generate a hash to uniquely generate
|
""" Small function to generate a hash to uniquely generate
|
||||||
a file. Inspired by MD5 version here:
|
a file. Inspired by MD5 version here:
|
||||||
|
@ -14,7 +15,7 @@ def unique_hash(filepath, blocksize=2**20):
|
||||||
Works with large files.
|
Works with large files.
|
||||||
"""
|
"""
|
||||||
s = sha1()
|
s = sha1()
|
||||||
with open(filepath , "rb") as f:
|
with open(filepath, "rb") as f:
|
||||||
while True:
|
while True:
|
||||||
buf = f.read(blocksize)
|
buf = f.read(blocksize)
|
||||||
if not buf:
|
if not buf:
|
||||||
|
@ -29,7 +30,7 @@ def find_files(path, extensions):
|
||||||
|
|
||||||
for dirpath, dirnames, files in os.walk(path):
|
for dirpath, dirnames, files in os.walk(path):
|
||||||
for extension in extensions:
|
for extension in extensions:
|
||||||
for f in fnmatch.filter(files, "*.%s" % extension):
|
for f in fnmatch.filter(files, f"*.{extension}"):
|
||||||
p = os.path.join(dirpath, f)
|
p = os.path.join(dirpath, f)
|
||||||
yield (p, extension)
|
yield (p, extension)
|
||||||
|
|
||||||
|
@ -53,15 +54,15 @@ def read(filename, limit=None):
|
||||||
if limit:
|
if limit:
|
||||||
audiofile = audiofile[:limit * 1000]
|
audiofile = audiofile[:limit * 1000]
|
||||||
|
|
||||||
data = np.fromstring(audiofile._data, np.int16)
|
data = np.fromstring(audiofile.raw_data, np.int16)
|
||||||
|
|
||||||
channels = []
|
channels = []
|
||||||
for chn in xrange(audiofile.channels):
|
for chn in range(audiofile.channels):
|
||||||
channels.append(data[chn::audiofile.channels])
|
channels.append(data[chn::audiofile.channels])
|
||||||
|
|
||||||
fs = audiofile.frame_rate
|
audiofile.frame_rate
|
||||||
except audioop.error:
|
except audioop.error:
|
||||||
fs, _, audiofile = wavio.readwav(filename)
|
_, _, audiofile = wavio.readwav(filename)
|
||||||
|
|
||||||
if limit:
|
if limit:
|
||||||
audiofile = audiofile[:limit * 1000]
|
audiofile = audiofile[:limit * 1000]
|
||||||
|
|
|
@ -1,74 +1,32 @@
|
||||||
import numpy as np
|
|
||||||
import matplotlib.mlab as mlab
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
from scipy.ndimage.filters import maximum_filter
|
|
||||||
from scipy.ndimage.morphology import (generate_binary_structure,
|
|
||||||
iterate_structure, binary_erosion)
|
|
||||||
import hashlib
|
import hashlib
|
||||||
from operator import itemgetter
|
from operator import itemgetter
|
||||||
|
|
||||||
|
import matplotlib.mlab as mlab
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import numpy as np
|
||||||
|
from scipy.ndimage.filters import maximum_filter
|
||||||
|
from scipy.ndimage.morphology import (binary_erosion,
|
||||||
|
generate_binary_structure,
|
||||||
|
iterate_structure)
|
||||||
|
|
||||||
|
from dejavu.config.config import (DEFAULT_AMP_MIN, DEFAULT_FAN_VALUE,
|
||||||
|
DEFAULT_FS, DEFAULT_OVERLAP_RATIO,
|
||||||
|
DEFAULT_WINDOW_SIZE, FINGERPRINT_REDUCTION,
|
||||||
|
MAX_HASH_TIME_DELTA, MIN_HASH_TIME_DELTA,
|
||||||
|
PEAK_NEIGHBORHOOD_SIZE, PEAK_SORT)
|
||||||
|
|
||||||
IDX_FREQ_I = 0
|
IDX_FREQ_I = 0
|
||||||
IDX_TIME_J = 1
|
IDX_TIME_J = 1
|
||||||
|
|
||||||
######################################################################
|
|
||||||
# Sampling rate, related to the Nyquist conditions, which affects
|
|
||||||
# the range frequencies we can detect.
|
|
||||||
DEFAULT_FS = 44100
|
|
||||||
|
|
||||||
######################################################################
|
def fingerprint(channel_samples,
|
||||||
# Size of the FFT window, affects frequency granularity
|
Fs=DEFAULT_FS,
|
||||||
DEFAULT_WINDOW_SIZE = 4096
|
|
||||||
|
|
||||||
######################################################################
|
|
||||||
# Ratio by which each sequential window overlaps the last and the
|
|
||||||
# next window. Higher overlap will allow a higher granularity of offset
|
|
||||||
# matching, but potentially more fingerprints.
|
|
||||||
DEFAULT_OVERLAP_RATIO = 0.5
|
|
||||||
|
|
||||||
######################################################################
|
|
||||||
# Degree to which a fingerprint can be paired with its neighbors --
|
|
||||||
# higher will cause more fingerprints, but potentially better accuracy.
|
|
||||||
DEFAULT_FAN_VALUE = 15
|
|
||||||
|
|
||||||
######################################################################
|
|
||||||
# Minimum amplitude in spectrogram in order to be considered a peak.
|
|
||||||
# This can be raised to reduce number of fingerprints, but can negatively
|
|
||||||
# affect accuracy.
|
|
||||||
DEFAULT_AMP_MIN = 10
|
|
||||||
|
|
||||||
######################################################################
|
|
||||||
# Number of cells around an amplitude peak in the spectrogram in order
|
|
||||||
# for Dejavu to consider it a spectral peak. Higher values mean less
|
|
||||||
# fingerprints and faster matching, but can potentially affect accuracy.
|
|
||||||
PEAK_NEIGHBORHOOD_SIZE = 20
|
|
||||||
|
|
||||||
######################################################################
|
|
||||||
# Thresholds on how close or far fingerprints can be in time in order
|
|
||||||
# to be paired as a fingerprint. If your max is too low, higher values of
|
|
||||||
# DEFAULT_FAN_VALUE may not perform as expected.
|
|
||||||
MIN_HASH_TIME_DELTA = 0
|
|
||||||
MAX_HASH_TIME_DELTA = 200
|
|
||||||
|
|
||||||
######################################################################
|
|
||||||
# If True, will sort peaks temporally for fingerprinting;
|
|
||||||
# not sorting will cut down number of fingerprints, but potentially
|
|
||||||
# affect performance.
|
|
||||||
PEAK_SORT = True
|
|
||||||
|
|
||||||
######################################################################
|
|
||||||
# Number of bits to grab from the front of the SHA1 hash in the
|
|
||||||
# fingerprint calculation. The more you grab, the more memory storage,
|
|
||||||
# with potentially lesser collisions of matches.
|
|
||||||
FINGERPRINT_REDUCTION = 20
|
|
||||||
|
|
||||||
def fingerprint(channel_samples, Fs=DEFAULT_FS,
|
|
||||||
wsize=DEFAULT_WINDOW_SIZE,
|
wsize=DEFAULT_WINDOW_SIZE,
|
||||||
wratio=DEFAULT_OVERLAP_RATIO,
|
wratio=DEFAULT_OVERLAP_RATIO,
|
||||||
fan_value=DEFAULT_FAN_VALUE,
|
fan_value=DEFAULT_FAN_VALUE,
|
||||||
amp_min=DEFAULT_AMP_MIN):
|
amp_min=DEFAULT_AMP_MIN):
|
||||||
"""
|
"""
|
||||||
FFT the channel, log transform output, find local maxima, then return
|
FFT the channel, log transform output, find local maxima, then return locally sensitive hashes.
|
||||||
locally sensitive hashes.
|
|
||||||
"""
|
"""
|
||||||
# FFT the signal and extract frequency components
|
# FFT the signal and extract frequency components
|
||||||
arr2D = mlab.specgram(
|
arr2D = mlab.specgram(
|
||||||
|
@ -78,11 +36,9 @@ def fingerprint(channel_samples, Fs=DEFAULT_FS,
|
||||||
window=mlab.window_hanning,
|
window=mlab.window_hanning,
|
||||||
noverlap=int(wsize * wratio))[0]
|
noverlap=int(wsize * wratio))[0]
|
||||||
|
|
||||||
# apply log transform since specgram() returns linear array
|
# Apply log transform since specgram() returns linear array. 0s are excluded to avoid np warning.
|
||||||
arr2D = 10 * np.log10(arr2D)
|
arr2D = 10 * np.log10(arr2D, out=np.zeros_like(arr2D), where=(arr2D != 0))
|
||||||
arr2D[arr2D == -np.inf] = 0 # replace infs with zeros
|
|
||||||
|
|
||||||
# find local maxima
|
|
||||||
local_maxima = get_2D_peaks(arr2D, plot=False, amp_min=amp_min)
|
local_maxima = get_2D_peaks(arr2D, plot=False, amp_min=amp_min)
|
||||||
|
|
||||||
# return hashes
|
# return hashes
|
||||||
|
@ -97,39 +53,35 @@ def get_2D_peaks(arr2D, plot=False, amp_min=DEFAULT_AMP_MIN):
|
||||||
# find local maxima using our filter shape
|
# find local maxima using our filter shape
|
||||||
local_max = maximum_filter(arr2D, footprint=neighborhood) == arr2D
|
local_max = maximum_filter(arr2D, footprint=neighborhood) == arr2D
|
||||||
background = (arr2D == 0)
|
background = (arr2D == 0)
|
||||||
eroded_background = binary_erosion(background, structure=neighborhood,
|
eroded_background = binary_erosion(background, structure=neighborhood, border_value=1)
|
||||||
border_value=1)
|
|
||||||
|
|
||||||
# Boolean mask of arr2D with True at peaks (Fixed deprecated boolean operator by changing '-' to '^')
|
# Boolean mask of arr2D with True at peaks (Fixed deprecated boolean operator by changing '-' to '^')
|
||||||
detected_peaks = local_max ^ eroded_background
|
detected_peaks = local_max ^ eroded_background
|
||||||
|
|
||||||
# extract peaks
|
# extract peaks
|
||||||
amps = arr2D[detected_peaks]
|
amps = arr2D[detected_peaks]
|
||||||
j, i = np.where(detected_peaks)
|
freqs, times = np.where(detected_peaks)
|
||||||
|
|
||||||
# filter peaks
|
# filter peaks
|
||||||
amps = amps.flatten()
|
amps = amps.flatten()
|
||||||
peaks = zip(i, j, amps)
|
|
||||||
peaks_filtered = filter(lambda x: x[2]>amp_min, peaks) # freq, time, amp
|
|
||||||
# get indices for frequency and time
|
# get indices for frequency and time
|
||||||
frequency_idx = []
|
filter_idxs = np.where(amps > amp_min)
|
||||||
time_idx = []
|
|
||||||
for x in peaks_filtered:
|
freqs_filter = freqs[filter_idxs]
|
||||||
frequency_idx.append(x[1])
|
times_filter = times[filter_idxs]
|
||||||
time_idx.append(x[0])
|
|
||||||
|
|
||||||
if plot:
|
if plot:
|
||||||
# scatter of the peaks
|
# scatter of the peaks
|
||||||
fig, ax = plt.subplots()
|
fig, ax = plt.subplots()
|
||||||
ax.imshow(arr2D)
|
ax.imshow(arr2D)
|
||||||
ax.scatter(time_idx, frequency_idx)
|
ax.scatter(times_filter, freqs_filter)
|
||||||
ax.set_xlabel('Time')
|
ax.set_xlabel('Time')
|
||||||
ax.set_ylabel('Frequency')
|
ax.set_ylabel('Frequency')
|
||||||
ax.set_title("Spectrogram")
|
ax.set_title("Spectrogram")
|
||||||
plt.gca().invert_yaxis()
|
plt.gca().invert_yaxis()
|
||||||
plt.show()
|
plt.show()
|
||||||
|
|
||||||
return zip(frequency_idx, time_idx)
|
return list(zip(freqs_filter, times_filter))
|
||||||
|
|
||||||
|
|
||||||
def generate_hashes(peaks, fan_value=DEFAULT_FAN_VALUE):
|
def generate_hashes(peaks, fan_value=DEFAULT_FAN_VALUE):
|
||||||
|
@ -151,7 +103,6 @@ def generate_hashes(peaks, fan_value=DEFAULT_FAN_VALUE):
|
||||||
t2 = peaks[i + j][IDX_TIME_J]
|
t2 = peaks[i + j][IDX_TIME_J]
|
||||||
t_delta = t2 - t1
|
t_delta = t2 - t1
|
||||||
|
|
||||||
if t_delta >= MIN_HASH_TIME_DELTA and t_delta <= MAX_HASH_TIME_DELTA:
|
if MIN_HASH_TIME_DELTA <= t_delta <= MAX_HASH_TIME_DELTA:
|
||||||
h = hashlib.sha1(
|
h = hashlib.sha1(f"{str(freq1)}|{str(freq2)}|{str(t_delta)}".encode('utf-8'))
|
||||||
"%s|%s|%s" % (str(freq1), str(freq2), str(t_delta)))
|
|
||||||
yield (h.hexdigest()[0:FINGERPRINT_REDUCTION], t1)
|
yield (h.hexdigest()[0:FINGERPRINT_REDUCTION], t1)
|
||||||
|
|
|
@ -1,16 +1,16 @@
|
||||||
# encoding: utf-8
|
import time
|
||||||
import dejavu.fingerprint as fingerprint
|
|
||||||
import dejavu.decoder as decoder
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pyaudio
|
import pyaudio
|
||||||
import time
|
|
||||||
|
import dejavu.decoder as decoder
|
||||||
|
from dejavu.config.config import DEFAULT_FS
|
||||||
|
|
||||||
|
|
||||||
class BaseRecognizer(object):
|
class BaseRecognizer(object):
|
||||||
|
|
||||||
def __init__(self, dejavu):
|
def __init__(self, dejavu):
|
||||||
self.dejavu = dejavu
|
self.dejavu = dejavu
|
||||||
self.Fs = fingerprint.DEFAULT_FS
|
self.Fs = DEFAULT_FS
|
||||||
|
|
||||||
def _recognize(self, *data):
|
def _recognize(self, *data):
|
||||||
matches = []
|
matches = []
|
||||||
|
@ -24,32 +24,32 @@ class BaseRecognizer(object):
|
||||||
|
|
||||||
class FileRecognizer(BaseRecognizer):
|
class FileRecognizer(BaseRecognizer):
|
||||||
def __init__(self, dejavu):
|
def __init__(self, dejavu):
|
||||||
super(FileRecognizer, self).__init__(dejavu)
|
super().__init__(dejavu)
|
||||||
|
|
||||||
def recognize_file(self, filename):
|
def recognize_file(self, filename):
|
||||||
frames, self.Fs, file_hash = decoder.read(filename, self.dejavu.limit)
|
frames, self.Fs, file_hash = decoder.read(filename, self.dejavu.limit)
|
||||||
|
|
||||||
t = time.time()
|
t = time.time()
|
||||||
match = self._recognize(*frames)
|
matches = self._recognize(*frames)
|
||||||
t = time.time() - t
|
t = time.time() - t
|
||||||
|
|
||||||
if match:
|
for match in matches:
|
||||||
match['match_time'] = t
|
match['match_time'] = t
|
||||||
|
|
||||||
return match
|
return matches
|
||||||
|
|
||||||
def recognize(self, filename):
|
def recognize(self, filename):
|
||||||
return self.recognize_file(filename)
|
return self.recognize_file(filename)
|
||||||
|
|
||||||
|
|
||||||
class MicrophoneRecognizer(BaseRecognizer):
|
class MicrophoneRecognizer(BaseRecognizer):
|
||||||
default_chunksize = 8192
|
default_chunksize = 8192
|
||||||
default_format = pyaudio.paInt16
|
default_format = pyaudio.paInt16
|
||||||
default_channels = 2
|
default_channels = 2
|
||||||
default_samplerate = 44100
|
default_samplerate = 44100
|
||||||
|
|
||||||
def __init__(self, dejavu):
|
def __init__(self, dejavu):
|
||||||
super(MicrophoneRecognizer, self).__init__(dejavu)
|
super().__init__(dejavu)
|
||||||
self.audio = pyaudio.PyAudio()
|
self.audio = pyaudio.PyAudio()
|
||||||
self.stream = None
|
self.stream = None
|
||||||
self.data = []
|
self.data = []
|
||||||
|
|
|
@ -1,14 +1,19 @@
|
||||||
from __future__ import division
|
|
||||||
from pydub import AudioSegment
|
import ast
|
||||||
from dejavu.decoder import path_to_songname
|
|
||||||
from dejavu import Dejavu
|
|
||||||
from dejavu.fingerprint import *
|
|
||||||
import traceback
|
|
||||||
import fnmatch
|
import fnmatch
|
||||||
import os, re, ast
|
|
||||||
import subprocess
|
|
||||||
import random
|
|
||||||
import logging
|
import logging
|
||||||
|
import os
|
||||||
|
import random
|
||||||
|
import re
|
||||||
|
import subprocess
|
||||||
|
import traceback
|
||||||
|
|
||||||
|
from pydub import AudioSegment
|
||||||
|
|
||||||
|
from dejavu import Dejavu
|
||||||
|
from dejavu.decoder import path_to_songname
|
||||||
|
from dejavu.fingerprint import *
|
||||||
|
|
||||||
|
|
||||||
def set_seed(seed=None):
|
def set_seed(seed=None):
|
||||||
"""
|
"""
|
||||||
|
@ -20,6 +25,7 @@ def set_seed(seed=None):
|
||||||
if seed != None:
|
if seed != None:
|
||||||
random.seed(seed)
|
random.seed(seed)
|
||||||
|
|
||||||
|
|
||||||
def get_files_recursive(src, fmt):
|
def get_files_recursive(src, fmt):
|
||||||
"""
|
"""
|
||||||
`src` is the source directory.
|
`src` is the source directory.
|
||||||
|
@ -29,6 +35,7 @@ def get_files_recursive(src, fmt):
|
||||||
for filename in fnmatch.filter(filenames, '*' + fmt):
|
for filename in fnmatch.filter(filenames, '*' + fmt):
|
||||||
yield os.path.join(root, filename)
|
yield os.path.join(root, filename)
|
||||||
|
|
||||||
|
|
||||||
def get_length_audio(audiopath, extension):
|
def get_length_audio(audiopath, extension):
|
||||||
"""
|
"""
|
||||||
Returns length of audio in seconds.
|
Returns length of audio in seconds.
|
||||||
|
@ -37,10 +44,11 @@ def get_length_audio(audiopath, extension):
|
||||||
try:
|
try:
|
||||||
audio = AudioSegment.from_file(audiopath, extension.replace(".", ""))
|
audio = AudioSegment.from_file(audiopath, extension.replace(".", ""))
|
||||||
except:
|
except:
|
||||||
print "Error in get_length_audio(): %s" % traceback.format_exc()
|
print(f"Error in get_length_audio(): {traceback.format_exc()}")
|
||||||
return None
|
return None
|
||||||
return int(len(audio) / 1000.0)
|
return int(len(audio) / 1000.0)
|
||||||
|
|
||||||
|
|
||||||
def get_starttime(length, nseconds, padding):
|
def get_starttime(length, nseconds, padding):
|
||||||
"""
|
"""
|
||||||
`length` is total audio length in seconds
|
`length` is total audio length in seconds
|
||||||
|
@ -52,6 +60,7 @@ def get_starttime(length, nseconds, padding):
|
||||||
return 0
|
return 0
|
||||||
return random.randint(padding, maximum)
|
return random.randint(padding, maximum)
|
||||||
|
|
||||||
|
|
||||||
def generate_test_files(src, dest, nseconds, fmts=[".mp3", ".wav"], padding=10):
|
def generate_test_files(src, dest, nseconds, fmts=[".mp3", ".wav"], padding=10):
|
||||||
"""
|
"""
|
||||||
Generates a test file for each file recursively in `src` directory
|
Generates a test file for each file recursively in `src` directory
|
||||||
|
@ -75,42 +84,43 @@ def generate_test_files(src, dest, nseconds, fmts=[".mp3", ".wav"], padding=10):
|
||||||
testsources = get_files_recursive(src, fmt)
|
testsources = get_files_recursive(src, fmt)
|
||||||
for audiosource in testsources:
|
for audiosource in testsources:
|
||||||
|
|
||||||
print "audiosource:", audiosource
|
print("audiosource:", audiosource)
|
||||||
|
|
||||||
filename, extension = os.path.splitext(os.path.basename(audiosource))
|
filename, extension = os.path.splitext(os.path.basename(audiosource))
|
||||||
length = get_length_audio(audiosource, extension)
|
length = get_length_audio(audiosource, extension)
|
||||||
starttime = get_starttime(length, nseconds, padding)
|
starttime = get_starttime(length, nseconds, padding)
|
||||||
|
|
||||||
test_file_name = "%s_%s_%ssec.%s" % (
|
test_file_name = f"{os.path.join(dest, filename)}_{starttime}_{nseconds}sec.{extension.replace('.', '')}"
|
||||||
os.path.join(dest, filename), starttime,
|
|
||||||
nseconds, extension.replace(".", ""))
|
|
||||||
|
|
||||||
subprocess.check_output([
|
subprocess.check_output([
|
||||||
"ffmpeg", "-y",
|
"ffmpeg", "-y",
|
||||||
"-ss", "%d" % starttime,
|
"-ss", f"{starttime}",
|
||||||
'-t' , "%d" % nseconds,
|
'-t', f"{nseconds}",
|
||||||
"-i", audiosource,
|
"-i", audiosource,
|
||||||
test_file_name])
|
test_file_name])
|
||||||
|
|
||||||
|
|
||||||
def log_msg(msg, log=True, silent=False):
|
def log_msg(msg, log=True, silent=False):
|
||||||
if log:
|
if log:
|
||||||
logging.debug(msg)
|
logging.debug(msg)
|
||||||
if not silent:
|
if not silent:
|
||||||
print msg
|
print(msg)
|
||||||
|
|
||||||
|
|
||||||
def autolabel(rects, ax):
|
def autolabel(rects, ax):
|
||||||
# attach some text labels
|
# attach some text labels
|
||||||
for rect in rects:
|
for rect in rects:
|
||||||
height = rect.get_height()
|
height = rect.get_height()
|
||||||
ax.text(rect.get_x() + rect.get_width() / 2., 1.05 * height,
|
ax.text(rect.get_x() + rect.get_width() / 2., 1.05 * height, f'{int(height)}', ha='center', va='bottom')
|
||||||
'%d' % int(height), ha='center', va='bottom')
|
|
||||||
|
|
||||||
def autolabeldoubles(rects, ax):
|
def autolabeldoubles(rects, ax):
|
||||||
# attach some text labels
|
# attach some text labels
|
||||||
for rect in rects:
|
for rect in rects:
|
||||||
height = rect.get_height()
|
height = rect.get_height()
|
||||||
ax.text(rect.get_x() + rect.get_width() / 2., 1.05 * height,
|
ax.text(rect.get_x() + rect.get_width() / 2., 1.05 * height, f'{round(float(height), 3)}',
|
||||||
'%s' % round(float(height), 3), ha='center', va='bottom')
|
ha='center', va='bottom')
|
||||||
|
|
||||||
|
|
||||||
class DejavuTest(object):
|
class DejavuTest(object):
|
||||||
def __init__(self, folder, seconds):
|
def __init__(self, folder, seconds):
|
||||||
|
@ -120,35 +130,35 @@ class DejavuTest(object):
|
||||||
self.test_seconds = seconds
|
self.test_seconds = seconds
|
||||||
self.test_songs = []
|
self.test_songs = []
|
||||||
|
|
||||||
print "test_seconds", self.test_seconds
|
print("test_seconds", self.test_seconds)
|
||||||
|
|
||||||
self.test_files = [
|
self.test_files = [
|
||||||
f for f in os.listdir(self.test_folder)
|
f for f in os.listdir(self.test_folder)
|
||||||
if os.path.isfile(os.path.join(self.test_folder, f))
|
if os.path.isfile(os.path.join(self.test_folder, f))
|
||||||
and re.findall("[0-9]*sec", f)[0] in self.test_seconds]
|
and re.findall("[0-9]*sec", f)[0] in self.test_seconds]
|
||||||
|
|
||||||
print "test_files", self.test_files
|
print("test_files", self.test_files)
|
||||||
|
|
||||||
self.n_columns = len(self.test_seconds)
|
self.n_columns = len(self.test_seconds)
|
||||||
self.n_lines = int(len(self.test_files) / self.n_columns)
|
self.n_lines = int(len(self.test_files) / self.n_columns)
|
||||||
|
|
||||||
print "columns:", self.n_columns
|
print("columns:", self.n_columns)
|
||||||
print "length of test files:", len(self.test_files)
|
print("length of test files:", len(self.test_files))
|
||||||
print "lines:", self.n_lines
|
print("lines:", self.n_lines)
|
||||||
|
|
||||||
# variable match results (yes, no, invalid)
|
# variable match results (yes, no, invalid)
|
||||||
self.result_match = [[0 for x in xrange(self.n_columns)] for x in xrange(self.n_lines)]
|
self.result_match = [[0 for x in range(self.n_columns)] for x in range(self.n_lines)]
|
||||||
|
|
||||||
print "result_match matrix:", self.result_match
|
print("result_match matrix:", self.result_match)
|
||||||
|
|
||||||
# variable match precision (if matched in the corrected time)
|
# variable match precision (if matched in the corrected time)
|
||||||
self.result_matching_times = [[0 for x in xrange(self.n_columns)] for x in xrange(self.n_lines)]
|
self.result_matching_times = [[0 for x in range(self.n_columns)] for x in range(self.n_lines)]
|
||||||
|
|
||||||
# variable mahing time (query time)
|
# variable mahing time (query time)
|
||||||
self.result_query_duration = [[0 for x in xrange(self.n_columns)] for x in xrange(self.n_lines)]
|
self.result_query_duration = [[0 for x in range(self.n_columns)] for x in range(self.n_lines)]
|
||||||
|
|
||||||
# variable confidence
|
# variable confidence
|
||||||
self.result_match_confidence = [[0 for x in xrange(self.n_columns)] for x in xrange(self.n_lines)]
|
self.result_match_confidence = [[0 for x in range(self.n_columns)] for x in range(self.n_lines)]
|
||||||
|
|
||||||
self.begin()
|
self.begin()
|
||||||
|
|
||||||
|
@ -178,19 +188,17 @@ class DejavuTest(object):
|
||||||
|
|
||||||
# add some
|
# add some
|
||||||
ax.set_ylabel(name)
|
ax.set_ylabel(name)
|
||||||
ax.set_title("%s %s Results" % (self.test_seconds[sec], name))
|
ax.set_title(f"{self.test_seconds[sec]} {name} Results")
|
||||||
ax.set_xticks(ind + width)
|
ax.set_xticks(ind + width)
|
||||||
|
|
||||||
labels = [0 for x in range(0, self.n_lines)]
|
labels = [0 for x in range(0, self.n_lines)]
|
||||||
for x in range(0, self.n_lines):
|
for x in range(0, self.n_lines):
|
||||||
labels[x] = "song %s" % (x+1)
|
labels[x] = f"song {x+1}"
|
||||||
ax.set_xticklabels(labels)
|
ax.set_xticklabels(labels)
|
||||||
|
|
||||||
box = ax.get_position()
|
box = ax.get_position()
|
||||||
ax.set_position([box.x0, box.y0, box.width * 0.75, box.height])
|
ax.set_position([box.x0, box.y0, box.width * 0.75, box.height])
|
||||||
|
|
||||||
#ax.legend( (rects1[0]), ('Dejavu'), loc='center left', bbox_to_anchor=(1, 0.5))
|
|
||||||
|
|
||||||
if name == 'Confidence':
|
if name == 'Confidence':
|
||||||
autolabel(rects1, ax)
|
autolabel(rects1, ax)
|
||||||
else:
|
else:
|
||||||
|
@ -198,13 +206,13 @@ class DejavuTest(object):
|
||||||
|
|
||||||
plt.grid()
|
plt.grid()
|
||||||
|
|
||||||
fig_name = os.path.join(results_folder, "%s_%s.png" % (name, self.test_seconds[sec]))
|
fig_name = os.path.join(results_folder, f"{name}_{self.test_seconds[sec]}.png")
|
||||||
fig.savefig(fig_name)
|
fig.savefig(fig_name)
|
||||||
|
|
||||||
def begin(self):
|
def begin(self):
|
||||||
for f in self.test_files:
|
for f in self.test_files:
|
||||||
log_msg('--------------------------------------------------')
|
log_msg('--------------------------------------------------')
|
||||||
log_msg('file: %s' % f)
|
log_msg(f'file: {f}')
|
||||||
|
|
||||||
# get column
|
# get column
|
||||||
col = self.get_column_id(re.findall("[0-9]*sec", f)[0])
|
col = self.get_column_id(re.findall("[0-9]*sec", f)[0])
|
||||||
|
@ -235,8 +243,8 @@ class DejavuTest(object):
|
||||||
# which song did we predict?
|
# which song did we predict?
|
||||||
result = ast.literal_eval(result)
|
result = ast.literal_eval(result)
|
||||||
song_result = result["song_name"]
|
song_result = result["song_name"]
|
||||||
log_msg('song: %s' % song)
|
log_msg(f'song: {song}')
|
||||||
log_msg('song_result: %s' % song_result)
|
log_msg(f'song_result: {song_result}')
|
||||||
|
|
||||||
if song_result != song:
|
if song_result != song:
|
||||||
log_msg('invalid match')
|
log_msg('invalid match')
|
||||||
|
@ -246,31 +254,28 @@ class DejavuTest(object):
|
||||||
self.result_match_confidence[line][col] = 0
|
self.result_match_confidence[line][col] = 0
|
||||||
else:
|
else:
|
||||||
log_msg('correct match')
|
log_msg('correct match')
|
||||||
print self.result_match
|
print(self.result_match)
|
||||||
self.result_match[line][col] = 'yes'
|
self.result_match[line][col] = 'yes'
|
||||||
self.result_query_duration[line][col] = round(result[Dejavu.MATCH_TIME],3)
|
self.result_query_duration[line][col] = round(result[Dejavu.MATCH_TIME],3)
|
||||||
self.result_match_confidence[line][col] = result[Dejavu.CONFIDENCE]
|
self.result_match_confidence[line][col] = result[Dejavu.CONFIDENCE]
|
||||||
|
|
||||||
song_start_time = re.findall("\_[^\_]+",f)
|
song_start_time = re.findall("_[^_]+", f)
|
||||||
song_start_time = song_start_time[0].lstrip("_ ")
|
song_start_time = song_start_time[0].lstrip("_ ")
|
||||||
|
|
||||||
result_start_time = round((result[Dejavu.OFFSET] * DEFAULT_WINDOW_SIZE *
|
result_start_time = round((result[Dejavu.OFFSET] * DEFAULT_WINDOW_SIZE *
|
||||||
DEFAULT_OVERLAP_RATIO) / (DEFAULT_FS), 0)
|
DEFAULT_OVERLAP_RATIO) / DEFAULT_FS, 0)
|
||||||
|
|
||||||
self.result_matching_times[line][col] = int(result_start_time) - int(song_start_time)
|
self.result_matching_times[line][col] = int(result_start_time) - int(song_start_time)
|
||||||
if (abs(self.result_matching_times[line][col]) == 1):
|
if abs(self.result_matching_times[line][col]) == 1:
|
||||||
self.result_matching_times[line][col] = 0
|
self.result_matching_times[line][col] = 0
|
||||||
|
|
||||||
log_msg('query duration: %s' % round(result[Dejavu.MATCH_TIME],3))
|
log_msg(f'query duration: {round(result[Dejavu.MATCH_TIME], 3)}')
|
||||||
log_msg('confidence: %s' % result[Dejavu.CONFIDENCE])
|
log_msg(f'confidence: {result[Dejavu.CONFIDENCE]}')
|
||||||
log_msg('song start_time: %s' % song_start_time)
|
log_msg(f'song start_time: {song_start_time}')
|
||||||
log_msg('result start time: %s' % result_start_time)
|
log_msg(f'result start time: {result_start_time}')
|
||||||
if (self.result_matching_times[line][col] == 0):
|
|
||||||
|
if self.result_matching_times[line][col] == 0:
|
||||||
log_msg('accurate match')
|
log_msg('accurate match')
|
||||||
else:
|
else:
|
||||||
log_msg('inaccurate match')
|
log_msg('inaccurate match')
|
||||||
log_msg('--------------------------------------------------\n')
|
log_msg('--------------------------------------------------\n')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -5,6 +5,7 @@
|
||||||
# Github: github.com/WarrenWeckesser/wavio
|
# Github: github.com/WarrenWeckesser/wavio
|
||||||
|
|
||||||
import wave as _wave
|
import wave as _wave
|
||||||
|
|
||||||
import numpy as _np
|
import numpy as _np
|
||||||
|
|
||||||
|
|
||||||
|
|
42
example.py
42
example.py
|
@ -1,35 +1,37 @@
|
||||||
import warnings
|
|
||||||
import json
|
import json
|
||||||
warnings.filterwarnings("ignore")
|
import warnings
|
||||||
|
|
||||||
from dejavu import Dejavu
|
from dejavu import Dejavu
|
||||||
from dejavu.recognize import FileRecognizer, MicrophoneRecognizer
|
from dejavu.recognize import FileRecognizer, MicrophoneRecognizer
|
||||||
|
|
||||||
|
warnings.filterwarnings("ignore")
|
||||||
|
|
||||||
|
|
||||||
# load config from a JSON file (or anything outputting a python dictionary)
|
# load config from a JSON file (or anything outputting a python dictionary)
|
||||||
with open("dejavu.cnf.SAMPLE") as f:
|
with open("dejavu.cnf.SAMPLE") as f:
|
||||||
config = json.load(f)
|
config = json.load(f)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
||||||
# create a Dejavu instance
|
# create a Dejavu instance
|
||||||
djv = Dejavu(config)
|
djv = Dejavu(config)
|
||||||
|
|
||||||
# Fingerprint all the mp3's in the directory we give it
|
# Fingerprint all the mp3's in the directory we give it
|
||||||
djv.fingerprint_directory("mp3", [".mp3"])
|
djv.fingerprint_directory("mp3", [".mp3"])
|
||||||
|
|
||||||
# Recognize audio from a file
|
# Recognize audio from a file
|
||||||
song = djv.recognize(FileRecognizer, "mp3/Sean-Fournier--Falling-For-You.mp3")
|
song = djv.recognize(FileRecognizer, "mp3/Sean-Fournier--Falling-For-You.mp3")
|
||||||
print "From file we recognized: %s\n" % song
|
print(f"From file we recognized: {song}\n")
|
||||||
|
|
||||||
# Or recognize audio from your microphone for `secs` seconds
|
# Or recognize audio from your microphone for `secs` seconds
|
||||||
secs = 5
|
secs = 5
|
||||||
song = djv.recognize(MicrophoneRecognizer, seconds=secs)
|
song = djv.recognize(MicrophoneRecognizer, seconds=secs)
|
||||||
if song is None:
|
if song is None:
|
||||||
print "Nothing recognized -- did you play the song out loud so your mic could hear it? :)"
|
print("Nothing recognized -- did you play the song out loud so your mic could hear it? :)")
|
||||||
else:
|
else:
|
||||||
print "From mic with %d seconds we recognized: %s\n" % (secs, song)
|
print(f"From mic with %d seconds we recognized: {(secs, song)}\n")
|
||||||
|
|
||||||
# Or use a recognizer without the shortcut, in anyway you would like
|
# Or use a recognizer without the shortcut, in anyway you would like
|
||||||
recognizer = FileRecognizer(djv)
|
recognizer = FileRecognizer(djv)
|
||||||
song = recognizer.recognize_file("mp3/Josh-Woodward--I-Want-To-Destroy-Something-Beautiful.mp3")
|
song = recognizer.recognize_file("mp3/Josh-Woodward--I-Want-To-Destroy-Something-Beautiful.mp3")
|
||||||
print "No shortcut, we recognized: %s\n" % song
|
print(f"No shortcut, we recognized: {song}\n")
|
||||||
|
|
|
@ -1,9 +1,7 @@
|
||||||
# requirements file
|
pydub==0.23.1
|
||||||
|
PyAudio==0.2.11
|
||||||
|
numpy==1.17.2
|
||||||
|
scipy==1.3.1
|
||||||
|
matplotlib==3.1.1
|
||||||
|
mysql-connector-python==8.0.17
|
||||||
|
|
||||||
### BEGIN ###
|
|
||||||
pydub>=0.9.4
|
|
||||||
PyAudio>=0.2.7
|
|
||||||
numpy>=1.8.2
|
|
||||||
scipy>=0.12.1
|
|
||||||
matplotlib>=1.3.1
|
|
||||||
### END ###
|
|
||||||
|
|
|
@ -86,10 +86,10 @@ tests = 1 # djv
|
||||||
n_secs = len(test_seconds)
|
n_secs = len(test_seconds)
|
||||||
|
|
||||||
# set result variables -> 4d variables
|
# set result variables -> 4d variables
|
||||||
all_match_counter = [[[0 for x in xrange(tests)] for x in xrange(3)] for x in xrange(n_secs)]
|
all_match_counter = [[[0 for x in range(tests)] for x in range(3)] for x in range(n_secs)]
|
||||||
all_matching_times_counter = [[[0 for x in xrange(tests)] for x in xrange(2)] for x in xrange(n_secs)]
|
all_matching_times_counter = [[[0 for x in range(tests)] for x in range(2)] for x in range(n_secs)]
|
||||||
all_query_duration = [[[0 for x in xrange(tests)] for x in xrange(djv.n_lines)] for x in xrange(n_secs)]
|
all_query_duration = [[[0 for x in range(tests)] for x in range(djv.n_lines)] for x in range(n_secs)]
|
||||||
all_match_confidence = [[[0 for x in xrange(tests)] for x in xrange(djv.n_lines)] for x in xrange(n_secs)]
|
all_match_confidence = [[[0 for x in range(tests)] for x in range(djv.n_lines)] for x in range(n_secs)]
|
||||||
|
|
||||||
# group results by seconds
|
# group results by seconds
|
||||||
for line in range(0, djv.n_lines):
|
for line in range(0, djv.n_lines):
|
||||||
|
|
6
setup.py
6
setup.py
|
@ -7,11 +7,11 @@ def parse_requirements(requirements):
|
||||||
with open(requirements) as f:
|
with open(requirements) as f:
|
||||||
lines = [l for l in f]
|
lines = [l for l in f]
|
||||||
# remove spaces
|
# remove spaces
|
||||||
stripped = map((lambda x: x.strip()), lines)
|
stripped = list(map((lambda x: x.strip()), lines))
|
||||||
# remove comments
|
# remove comments
|
||||||
nocomments = filter((lambda x: not x.startswith('#')), stripped)
|
nocomments = list(filter((lambda x: not x.startswith('#')), stripped))
|
||||||
# remove empty lines
|
# remove empty lines
|
||||||
reqs = filter((lambda x: x), nocomments)
|
reqs = list(filter((lambda x: x), nocomments))
|
||||||
return reqs
|
return reqs
|
||||||
|
|
||||||
PACKAGE_NAME = "PyDejavu"
|
PACKAGE_NAME = "PyDejavu"
|
||||||
|
|
Loading…
Reference in a new issue