Fixes #17 by adding configuration parameter for fingerprinting seconds, preserves this in FileRecognizer, and documents this in the README

This commit is contained in:
worldveil 2014-01-07 23:25:55 -05:00
parent 9eca3cc05a
commit cd7adc1485
4 changed files with 42 additions and 6 deletions

View file

@ -70,6 +70,36 @@ You'll have a lot of fingerprints once it completes a large folder of mp3s:
Also, any subsequent calls to `fingerprint_file` or `fingerprint_directory` will fingerprint and add those songs to the database as well. It's meant to simulate a system where as new songs are released, they are fingerprinted and added to the database seemlessly without stopping the system.
## Configuration options
The configuration object to the Dejavu constructor must be a dictionary.
The following keys are mandatory:
* `database`, with a value as a dictionary with keys that the database you are using will accept. For example with MySQL, the keys must can be anything that the [`MySQLdb.connect()`](http://mysql-python.sourceforge.net/MySQLdb.html) function will accept.
The following keys are optional:
* `fingerprint_limit`: allows you to control how many seconds of each audio file to fingerprint. Leaving out this key, or alternatively using `-1` and `None` will cause Dejavu to fingerprint the entire audio file. Default value is `None`.
* `database_type`: as of now, only `mysql` (the default value) is supported. If you'd like to subclass `Database` and add another, please fork and send a pull request!
An example configuration is as follows:
```python
>>> from dejavu import Dejavu
>>> config = {
... "database": {
... "host": "127.0.0.1",
... "user": "root",
... "passwd": "Password123",
... "db": "dejavu_db",
... },
... "database_type" : "mysql",
... "fingerprint_limit" : 10
... }
>>> djv = Dejavu(config)
```
## Recognizing
There are two ways to recognize audio using Dejavu. You can use Dejavu interactively through the terminal. Assuming you've already instantiated a Dejavu object, you can match audio through your computer's microphone:

View file

@ -17,6 +17,12 @@ class Dejavu(object):
self.db = db_cls(**config.get("database", {}))
self.db.setup()
# if we should limit seconds fingerprinted,
# None|-1 means use entire track
self.limit = self.config.get("fingerprint_limit", None)
if self.limit == -1: # for JSON compatibility
self.limit = None
# get songs previously indexed
# TODO: should probably use a checksum of the file instead of filename
self.songs = self.db.get_songs()
@ -46,7 +52,7 @@ class Dejavu(object):
continue
result = pool.apply_async(_fingerprint_worker,
(filename, self.db))
(filename, self.db, self.limit))
results.append(result)
while len(results):
@ -134,10 +140,10 @@ class Dejavu(object):
return r.recognize(*options, **kwoptions)
def _fingerprint_worker(filename, db):
def _fingerprint_worker(filename, db, limit):
song_name, extension = os.path.splitext(os.path.basename(filename))
channels, Fs = decoder.read(filename)
channels, Fs = decoder.read(filename, limit)
# insert song into database
sid = db.insert_song(song_name)

View file

@ -26,7 +26,7 @@ class FileRecognizer(BaseRecognizer):
super(FileRecognizer, self).__init__(dejavu)
def recognize_file(self, filename):
frames, self.Fs = decoder.read(filename)
frames, self.Fs = decoder.read(filename, self.dejavu.limit)
t = time.time()
match = self._recognize(*frames)

4
go.py
View file

@ -10,7 +10,7 @@ with open("dejavu.cnf") as f:
# create a Dejavu instance
djv = Dejavu(config)
# Fingerprint all the mp3's in the directory we give it
djv.fingerprint_directory("va_us_top_40/mp3", [".mp3"])
djv.fingerprint_directory("mp3", [".mp3"])
# Recognize audio from a file
from dejavu.recognize import FileRecognizer
@ -23,4 +23,4 @@ song = djv.recognize(MicrophoneRecognizer, seconds=2)
# Or use a recognizer without the shortcut, in anyway you would like
from dejavu.recognize import FileRecognizer
recognizer = FileRecognizer(djv)
song = recognizer.recognize_file("va_us_top_40/wav/17_-_#Beautiful_-_Mariah_Carey_ft.wav")
song = recognizer.recognize_file("mp3/sail.mp3")