From efe3a33915b1a0c49fe9311b449cfa549bbd0f9b Mon Sep 17 00:00:00 2001
From: Saleem Ansari <tuxdna@gmail.com>
Date: Fri, 28 Nov 2014 03:26:25 +0530
Subject: [PATCH 01/11] Added setup.py

---
 requirements.txt |  9 +++++++++
 setup.py         | 44 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 53 insertions(+)
 create mode 100644 requirements.txt
 create mode 100644 setup.py

diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..8b3eea8
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,9 @@
+# requirements file
+
+### BEGIN ###
+pydub==0.9.4
+PyAudio==0.2.8
+numpy==1.8.2
+scipy==0.12.1
+matplotlib==1.3.1
+### END ###
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..39df5a4
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,44 @@
+from setuptools import setup, find_packages
+# import os, sys
+
+
+def parse_requirements(requirements):
+    # load from requirements.txt
+    with open(requirements) as f:
+        lines = [l for l in f]
+        # remove spaces
+        stripped = map((lambda x: x.strip()), lines)
+        # remove comments
+        nocomments = filter((lambda x: not x.startswith('#')), stripped)
+        # remove empty lines
+        reqs = filter((lambda x: x), nocomments)
+        return reqs
+
+PACKAGE_NAME = "dejavu"
+PACKAGE_VERSION = "0.1"
+SUMMARY = 'Dejavu Audio Fingerprinting'
+DESCRIPTION = """Dejavu Audio Fingerprinting"""
+REQUIREMENTS = parse_requirements("requirements.txt")
+
+setup(
+    name=PACKAGE_NAME,
+    version=PACKAGE_VERSION,
+    description=SUMMARY,
+    long_description=DESCRIPTION,
+    author='worldveil',
+    author_email='will.drevo@gmail.com',
+    url='http://github.com/tuxdna/dejavu',
+    license='Apache 2.0',
+    include_package_data=True,
+    packages=find_packages(),
+    platforms=['Any'],
+    install_requires=REQUIREMENTS,
+    classifiers=[
+        'Development Status :: 4 - Beta',
+        'Environment :: Console',
+        'Intended Audience :: Developers',
+        'License :: OSI Approved :: Apache Software License',
+        'Operating System :: OS Independent',
+        'Topic :: Software Development :: Libraries :: Python Modules',
+    ],
+)

From 4d2b91b0526caf6a9253963c995a8250549fbb2e Mon Sep 17 00:00:00 2001
From: Saleem Ansari <tuxdna@gmail.com>
Date: Fri, 28 Nov 2014 03:58:52 +0530
Subject: [PATCH 02/11] Added tuning section to README.md. Removed trailing
 space from fingerprint.py

---
 README.md             | 13 +++++++++++++
 dejavu/fingerprint.py | 22 +++++++++++-----------
 2 files changed, 24 insertions(+), 11 deletions(-)

diff --git a/README.md b/README.md
index 2062adb..8b4f9b4 100755
--- a/README.md
+++ b/README.md
@@ -100,6 +100,19 @@ An example configuration is as follows:
 >>> djv = Dejavu(config)
 ```
 
+## Tuning
+
+Inside `fingerprint.py`, you may want to adjust following parameters (some values are given below).
+
+    FINGERPRINT_REDUCTION = 30
+    PEAK_SORT = False
+    DEFAULT_OVERLAP_RATIO = 0.4
+    DEFAULT_FAN_VALUE = 10
+    DEFAULT_AMP_MIN = 15
+    PEAK_NEIGHBORHOOD_SIZE = 30
+    
+These parameters are described in the `fingerprint.py` in detail. Read that in-order to understand the impact of changing these values.
+
 ## Recognizing
 
 There are two ways to recognize audio using Dejavu. You can recognize by reading and processing files on disk, or through your computer's microphone.
diff --git a/dejavu/fingerprint.py b/dejavu/fingerprint.py
index a8a5aca..95c0076 100755
--- a/dejavu/fingerprint.py
+++ b/dejavu/fingerprint.py
@@ -12,8 +12,8 @@ IDX_TIME_J = 1
 
 ######################################################################
 # Sampling rate, related to the Nyquist conditions, which affects
-# the range frequencies we can detect. 
-DEFAULT_FS = 44100 
+# the range frequencies we can detect.
+DEFAULT_FS = 44100
 
 ######################################################################
 # Size of the FFT window, affects frequency granularity
@@ -23,15 +23,15 @@ DEFAULT_WINDOW_SIZE = 4096
 # Ratio by which each sequential window overlaps the last and the
 # next window. Higher overlap will allow a higher granularity of offset
 # matching, but potentially more fingerprints.
-DEFAULT_OVERLAP_RATIO = 0.5  
+DEFAULT_OVERLAP_RATIO = 0.5
 
 ######################################################################
 # Degree to which a fingerprint can be paired with its neighbors --
-# higher will cause more fingerprints, but potentially better accuracy. 
-DEFAULT_FAN_VALUE = 15 
+# higher will cause more fingerprints, but potentially better accuracy.
+DEFAULT_FAN_VALUE = 15
 
 ######################################################################
-# Minimum amplitude in spectrogram in order to be considered a peak. 
+# Minimum amplitude in spectrogram in order to be considered a peak.
 # This can be raised to reduce number of fingerprints, but can negatively
 # affect accuracy.
 DEFAULT_AMP_MIN = 10
@@ -39,13 +39,13 @@ DEFAULT_AMP_MIN = 10
 ######################################################################
 # Number of cells around an amplitude peak in the spectrogram in order
 # for Dejavu to consider it a spectral peak. Higher values mean less
-# fingerprints and faster matching, but can potentially affect accuracy. 
+# fingerprints and faster matching, but can potentially affect accuracy.
 PEAK_NEIGHBORHOOD_SIZE = 20
 
 ######################################################################
-# Thresholds on how close or far fingerprints can be in time in order 
+# Thresholds on how close or far fingerprints can be in time in order
 # to be paired as a fingerprint. If your max is too low, higher values of
-# DEFAULT_FAN_VALUE may not perform as expected. 
+# DEFAULT_FAN_VALUE may not perform as expected.
 MIN_HASH_TIME_DELTA = 0
 MAX_HASH_TIME_DELTA = 200
 
@@ -56,7 +56,7 @@ MAX_HASH_TIME_DELTA = 200
 PEAK_SORT = True
 
 ######################################################################
-# Number of bits to throw away from the front of the SHA1 hash in the 
+# Number of bits to throw away from the front of the SHA1 hash in the
 # fingerprint calculation. The more you throw away, the less storage, but
 # potentially higher collisions and misclassifications when identifying songs.
 FINGERPRINT_REDUCTION = 20
@@ -137,7 +137,7 @@ def generate_hashes(peaks, fan_value=DEFAULT_FAN_VALUE):
     [(e05b341a9b77a51fd26, 32), ... ]
     """
     fingerprinted = set()  # to avoid rehashing same pairs
-    
+
     if PEAK_SORT:
         peaks.sort(key=itemgetter(1))
 

From 150248cbcdcc114f996ef6746e74f8cd8294190f Mon Sep 17 00:00:00 2001
From: Saleem Ansari <tuxdna@gmail.com>
Date: Fri, 28 Nov 2014 04:06:12 +0530
Subject: [PATCH 03/11] Update project name from dejavu to PyDejavu

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 39df5a4..4afd9a6 100644
--- a/setup.py
+++ b/setup.py
@@ -14,7 +14,7 @@ def parse_requirements(requirements):
         reqs = filter((lambda x: x), nocomments)
         return reqs
 
-PACKAGE_NAME = "dejavu"
+PACKAGE_NAME = "PyDejavu"
 PACKAGE_VERSION = "0.1"
 SUMMARY = 'Dejavu Audio Fingerprinting'
 DESCRIPTION = """Dejavu Audio Fingerprinting"""

From 406187bd1a7fe9490b5f5d996501af3f4df85d3c Mon Sep 17 00:00:00 2001
From: Saleem Ansari <tuxdna@gmail.com>
Date: Fri, 28 Nov 2014 04:34:14 +0530
Subject: [PATCH 04/11] Added MANIFEST.in file

---
 MANIFEST.in | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 MANIFEST.in

diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..f9bd145
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1 @@
+include requirements.txt

From 2972dce6eb5e4d94f109b3844ef7ddc6276262ee Mon Sep 17 00:00:00 2001
From: Saleem Ansari <tuxdna@gmail.com>
Date: Fri, 28 Nov 2014 05:20:29 +0530
Subject: [PATCH 05/11] Arguments parser CLI, with all existing tests passed.

---
 dejavu.py         | 141 ++++++++++++++++++++++------------------------
 dejavu/testing.py |  14 +++--
 run_tests.py      |  28 ++++-----
 setup.py          |   2 +-
 test_dejavu.sh    |   4 +-
 5 files changed, 94 insertions(+), 95 deletions(-)

diff --git a/dejavu.py b/dejavu.py
index e2b34a1..a50c9da 100755
--- a/dejavu.py
+++ b/dejavu.py
@@ -3,96 +3,89 @@
 import sys
 import json
 import warnings
+import argparse
 
 from dejavu import Dejavu
 from dejavu.recognize import FileRecognizer
 from dejavu.recognize import MicrophoneRecognizer
-from dejavu.recognize import FileRecognizer
+from argparse import RawTextHelpFormatter
 
 warnings.filterwarnings("ignore")
 
-def init():
-    # load config from a JSON file (or anything outputting a python dictionary)
-    with open("dejavu.cnf") as f:
-        config = json.load(f)
+DEFAULT_CONFIG_FILE = "dejavu.cnf"
+
+
+def init(config_file):
+    """ Load config from a JSON file
+    or anything outputting a python dictionary
+    """
+    try:
+        with open(config_file) as f:
+            config = json.load(f)
+    except IOError as err:
+        print("Cannot open configuration: %s. Exiting" % (str(err)))
+        sys.exit(1)
 
     # create a Dejavu instance
     return Dejavu(config)
 
-def showHelp():
-    print ""
-    print "------------------------------------------------"
-    print "DejaVu audio fingerprinting and recognition tool"
-    print "------------------------------------------------"
-    print ""
-    print "Usage: dejavu.py [command] [arguments]"
-    print ""
-    print "Available commands:"
-    print ""
-    print "  Fingerprint a file"
-    print "    dejavu.py fingerprint /path/to/file.extension"
-    print ""
-    print "  Fingerprint all files in a directory"
-    print "    dejavu.py fingerprint /path/to/directory extension"
-    print ""
-    print "  Recognize what is playing through the microphone"
-    print "    dejavu.py recognize mic number_of_seconds"
-    print ""
-    print "  Recognize a file by listening to it"
-    print "    dejavu.py recognize file /path/to/file"
-    print ""
-    print "  Display this help screen"
-    print "    dejavu.py help"
-    print ""
-    exit
 
-if len(sys.argv) > 1:
-    command = sys.argv[1]
-else:
-    showHelp()
+if __name__ == '__main__':
+    """ If running from terminal.
+    """
+    parser = argparse.ArgumentParser(
+        description="Audio Fingerprinting library",
+        formatter_class=RawTextHelpFormatter)
+    parser.add_argument('-c', '--config', nargs='?',
+                        help='Path to configuration file\n'
+                             'Usages: \n'
+                             '--config /path/to/congfile\n')
+    parser.add_argument('-f', '--fingerprint', nargs='*',
+                        help='Fingerprint files in a directory\n'
+                             'Usages: \n'
+                             '--fingerprint /path/to/directory extension\n'
+                             '--fingerprint /path/to/directory')
+    parser.add_argument('-r', '--recognize', nargs=2,
+                        help='Recognize what is '
+                             'playing through the microphone\n'
+                             'Usage: \n'
+                             '--recognize mic number_of_seconds \n'
+                             '--recognize file path/to/file \n')
+    args = parser.parse_args()
 
-if command == 'fingerprint': # Fingerprint all files in a directory
+    if not args.fingerprint and not args.recognize:
+        print("No arguments")
+        sys.exit(0)
 
-    djv = init()
-    
+    config_file = args.config
+    if config_file is None:
+        config_file = DEFAULT_CONFIG_FILE
+        # print "Using default config file: %s" % (config_file)
 
-    if len(sys.argv) == 4:
+    djv = init(config_file)
+    if args.fingerprint:
+        # Fingerprint all files in a directory
+        if 2 == len(args.fingerprint):
+            directory = args.fingerprint[0]
+            extension = args.fingerprint[1]
+            print("Fingerprinting all .%s files in the %s directory"
+                  % (extension, directory))
+            djv.fingerprint_directory(directory, ["." + extension], 4)
 
-        directory = sys.argv[2]
-        extension = sys.argv[3]
-        print "Fingerprinting all .%s files in the %s directory" % (extension, directory)
+        elif 1 == len(args.fingerprint):
+            filepath = args.fingerprint[0]
+            djv.fingerprint_file(filepath)
 
-        djv.fingerprint_directory(directory, ["." + extension], 4)
+    elif args.recognize:
+        # Recognize audio source
+        song = None
+        source = args.recognize[0]
+        opt_arg = args.recognize[1]
 
-    else:
-
-        filepath = sys.argv[2]
-        djv.fingerprint_file(filepath)
-
-elif command == 'recognize': # Recognize audio
-
-    source = sys.argv[2]
-    song = None
-
-    if source in ['mic', 'microphone']:
-
-        seconds = int(sys.argv[3])
-        djv = init()
-        song = djv.recognize(MicrophoneRecognizer, seconds=seconds)
-
-    elif source == 'file':
-
-        djv = init()
-        sourceFile = sys.argv[3]
-        song = djv.recognize(FileRecognizer, sourceFile)
-
-    else:
-
-        showHelp()
-
-    print song
-
-else:
-
-    showHelp()
+        if source in ('mic', 'microphone'):
+            song = djv.recognize(MicrophoneRecognizer, seconds=opt_arg)
+        elif source == 'file':
+            song = djv.recognize(FileRecognizer, opt_arg)
+        print(song)
 
+    sys.exit(0)
diff --git a/dejavu/testing.py b/dejavu/testing.py
index d15c12f..d2a3b48 100644
--- a/dejavu/testing.py
+++ b/dejavu/testing.py
@@ -207,10 +207,16 @@ class DejavuTest(object):
             log_msg('file: %s' % f)
 
             # get column 
-            col = self.get_column_id(re.findall("[0-9]*sec",f)[0])
-            song = path_to_songname(f).split("_")[0]  # format: XXXX_offset_length.mp3
-            line = self.get_line_id (song)
-            result = subprocess.check_output(["python", "dejavu.py", 'recognize', 'file', self.test_folder + "/" + f])
+            col = self.get_column_id(re.findall("[0-9]*sec", f)[0])
+            # format: XXXX_offset_length.mp3
+            song = path_to_songname(f).split("_")[0]  
+            line = self.get_line_id(song)
+            result = subprocess.check_output([
+                "python", 
+                "dejavu.py",
+                '-r',
+                'file', 
+                self.test_folder + "/" + f])
 
             if result.strip() == "None":
                 log_msg('No match')
diff --git a/run_tests.py b/run_tests.py
index 6c9ae66..b0dfde9 100644
--- a/run_tests.py
+++ b/run_tests.py
@@ -8,28 +8,28 @@ import shutil
 usage = "usage: %prog [options] TESTING_AUDIOFOLDER"
 parser = OptionParser(usage=usage, version="%prog 1.1")
 parser.add_option("--secs",
-				  action="store",
+                  action="store",
                   dest="secs",
                   default=5,
                   type=int,
                   help='Number of seconds starting from zero to test')
 parser.add_option("--results",
-				  action="store",
+                  action="store",
                   dest="results_folder",
                   default="./dejavu_test_results",
                   help='Sets the path where the results are saved')
 parser.add_option("--temp",
-				  action="store",
+                  action="store",
                   dest="temp_folder",
                   default="./dejavu_temp_testing_files",
                   help='Sets the path where the temp files are saved')
 parser.add_option("--log",
-				  action="store_true",
+                  action="store_true",
                   dest="log",
                   default=True,
                   help='Enables logging')
 parser.add_option("--silent",
-				  action="store_false",
+                  action="store_false",
                   dest="silent",
                   default=False,
                   help='Disables printing')
@@ -38,13 +38,13 @@ parser.add_option("--log-file",
                   default="results-compare.log",
                   help='Set the path and filename of the log file')
 parser.add_option("--padding",
-				  action="store",
+                  action="store",
                   dest="padding",
                   default=10,
                   type=int,
                   help='Number of seconds to pad choice of place to test from')
 parser.add_option("--seed",
-				  action="store",
+                  action="store",
                   dest="seed",
                   default=None,
                   type=int,
@@ -62,27 +62,27 @@ except:
     os.mkdir(options.results_folder)
 
 # set logging 
-if options.log == True:
-	logging.basicConfig(filename=options.log_file, level=logging.DEBUG)
+if options.log:
+    logging.basicConfig(filename=options.log_file, level=logging.DEBUG)
 
 # set test seconds
 test_seconds = ['%dsec' % i for i in range(1, options.secs + 1, 1)]
 
 # generate testing files
 for i in range(1, options.secs + 1, 1):
-	generate_test_files(test_folder, options.temp_folder, 
-		i, padding=options.padding)
+    generate_test_files(test_folder, options.temp_folder, 
+                        i, padding=options.padding)
 
 # scan files
 log_msg("Running Dejavu fingerprinter on files in %s..." % test_folder, 
-	log=options.log, silent=options.silent)
+        log=options.log, silent=options.silent)
 
 tm = time.time()
 djv = DejavuTest(options.temp_folder, test_seconds)
 log_msg("finished obtaining results from dejavu in %s" % (time.time() - tm),
-	log=options.log, silent=options.silent)
+        log=options.log, silent=options.silent)
 
-tests = 1 # djv
+tests = 1  # djv
 n_secs = len(test_seconds) 
 
 # set result variables -> 4d variables
diff --git a/setup.py b/setup.py
index 4afd9a6..f0b6def 100644
--- a/setup.py
+++ b/setup.py
@@ -15,7 +15,7 @@ def parse_requirements(requirements):
         return reqs
 
 PACKAGE_NAME = "PyDejavu"
-PACKAGE_VERSION = "0.1"
+PACKAGE_VERSION = "0.1.1"
 SUMMARY = 'Dejavu Audio Fingerprinting'
 DESCRIPTION = """Dejavu Audio Fingerprinting"""
 REQUIREMENTS = parse_requirements("requirements.txt")
diff --git a/test_dejavu.sh b/test_dejavu.sh
index 7ccdd93..6eecda2 100644
--- a/test_dejavu.sh
+++ b/test_dejavu.sh
@@ -8,7 +8,7 @@ rm -rf ./results ./temp_audio
 
 ###########
 # Fingerprint files of extension mp3 in the ./mp3 folder
-python dejavu.py fingerprint ./mp3/ mp3
+python dejavu.py -f ./mp3/ mp3
 
 ##########
 # Run a test suite on the ./mp3 folder by extracting 1, 2, 3, 4, and 5 
@@ -22,4 +22,4 @@ python run_tests.py \
 	--padding 8 \
 	--seed 42 \
 	--results ./results \
-	./mp3
\ No newline at end of file
+	./mp3

From 4aabea7814b6da8ab52eaad669e776fea6aa62f4 Mon Sep 17 00:00:00 2001
From: Saleem Ansari <tuxdna@gmail.com>
Date: Fri, 28 Nov 2014 22:56:22 +0530
Subject: [PATCH 06/11] Refactor and update documentation for installation on
 Fedora 20+

---
 DEPENDENCIES.md    | 31 -----------------------
 INSTALLATION.md    | 63 ++++++++++++++++++++++++++++++++++++++++++++++
 README.md          | 16 +++---------
 dejavu/__init__.py | 37 ++++++++++++++-------------
 4 files changed, 85 insertions(+), 62 deletions(-)
 delete mode 100755 DEPENDENCIES.md
 create mode 100644 INSTALLATION.md

diff --git a/DEPENDENCIES.md b/DEPENDENCIES.md
deleted file mode 100755
index a9259d7..0000000
--- a/DEPENDENCIES.md
+++ /dev/null
@@ -1,31 +0,0 @@
-# Dependencies required by dejavu
-
-* [`pyaudio`](http://people.csail.mit.edu/hubert/pyaudio/)
-* [`ffmpeg`](https://github.com/FFmpeg/FFmpeg)
-* [`pydub`](http://pydub.com/)
-* [`numpy`](http://www.numpy.org/)
-* [`scipy`](http://www.scipy.org/)
-* [`matplotlib`](http://matplotlib.org/)
-* [`MySQLdb`](http://mysql-python.sourceforge.net/MySQLdb.html)
-
-## Dependency installation for Mac OS X
-
-Tested on OS X Mavericks. An option is to install [Homebrew](http://brew.sh) and do the following:
-
-```
-brew install portaudio
-brew install ffmpeg
-
-sudo easy_install pyaudio
-sudo easy_install pydub
-sudo easy_install numpy
-sudo easy_install scipy
-sudo easy_install matplotlib
-sudo easy_install pip
-
-sudo pip install MySQL-python
-
-sudo ln -s /usr/local/mysql/lib/libmysqlclient.18.dylib /usr/lib/libmysqlclient.18.dylib
-```
-
-However installing `portaudio` and/or `ffmpeg` from source is also doable. 
\ No newline at end of file
diff --git a/INSTALLATION.md b/INSTALLATION.md
new file mode 100644
index 0000000..33b429e
--- /dev/null
+++ b/INSTALLATION.md
@@ -0,0 +1,63 @@
+# Installation of dejavu
+
+So far dejavu has only been tested on Unix systems.
+
+* [`pyaudio`](http://people.csail.mit.edu/hubert/pyaudio/) for grabbing audio from microphone
+* [`ffmpeg`](https://github.com/FFmpeg/FFmpeg) for converting audio files to .wav format
+* [`pydub`](http://pydub.com/), a Python `ffmpeg` wrapper
+* [`numpy`](http://www.numpy.org/) for taking the FFT of audio signals
+* [`scipy`](http://www.scipy.org/), used in peak finding algorithms
+* [`matplotlib`](http://matplotlib.org/), used for spectrograms and plotting
+* [`MySQLdb`](http://mysql-python.sourceforge.net/MySQLdb.html) for interfacing with MySQL databases
+
+For installing `ffmpeg` on Mac OS X, I highly recommend [this post](http://jungels.net/articles/ffmpeg-howto.html).
+
+## Fedora 20+
+
+### Dependency installation for Mac OS X
+
+Install the dependencies
+
+    sudo yum install numpy scipy python-matplotlib ffmpeg portaudio-devel
+    pip install PyAudio
+    pip install pydub
+    
+Now setup virtualenv ([howto?](http://www.pythoncentral.io/how-to-install-virtualenv-python/))
+
+    pip install virtualenv
+    virtualenv --system-site-packages env_with_system
+
+Install from PyPI
+
+    source env_with_system/bin/activate
+    pip install PyDejavu
+
+
+You can also install the latest code from GitHub:
+
+    source env_with_system/bin/activate
+    pip install https://github.com/tuxdna/dejavu/zipball/master
+
+## Max OS X
+
+### Dependency installation for Mac OS X
+
+Tested on OS X Mavericks. An option is to install [Homebrew](http://brew.sh) and do the following:
+
+```
+brew install portaudio
+brew install ffmpeg
+
+sudo easy_install pyaudio
+sudo easy_install pydub
+sudo easy_install numpy
+sudo easy_install scipy
+sudo easy_install matplotlib
+sudo easy_install pip
+
+sudo pip install MySQL-python
+
+sudo ln -s /usr/local/mysql/lib/libmysqlclient.18.dylib /usr/lib/libmysqlclient.18.dylib
+```
+
+However installing `portaudio` and/or `ffmpeg` from source is also doable. 
diff --git a/README.md b/README.md
index 8b4f9b4..e8ea2e5 100755
--- a/README.md
+++ b/README.md
@@ -6,19 +6,9 @@ Audio fingerprinting and recognition algorithm implemented in Python, see the ex
 
 Dejavu can memorize audio by listening to it once and fingerprinting it. Then by playing a song and recording microphone input, Dejavu attempts to match the audio against the fingerprints held in the database, returning the song being played. 
 
-## Dependencies:
+## Installation and Dependencies:
 
-I've only tested this on Unix systems.
-
-* [`pyaudio`](http://people.csail.mit.edu/hubert/pyaudio/) for grabbing audio from microphone
-* [`ffmpeg`](https://github.com/FFmpeg/FFmpeg) for converting audio files to .wav format
-* [`pydub`](http://pydub.com/), a Python `ffmpeg` wrapper
-* [`numpy`](http://www.numpy.org/) for taking the FFT of audio signals
-* [`scipy`](http://www.scipy.org/), used in peak finding algorithms
-* [`matplotlib`](http://matplotlib.org/), used for spectrograms and plotting
-* [`MySQLdb`](http://mysql-python.sourceforge.net/MySQLdb.html) for interfacing with MySQL databases
-
-For installing `ffmpeg` on Mac OS X, I highly recommend [this post](http://jungels.net/articles/ffmpeg-howto.html).
+Read [INSTALLATION.md](INSTALLATION.md)
 
 ## Setup
 
@@ -148,7 +138,7 @@ and with the command line script, you specify the number of seconds to listen:
 $ python dejavu.py recognize mic 10
 ```
 
-## Testing (New!)
+## Testing
 
 Testing out different parameterizations of the fingerprinting algorithm is often useful as the corpus becomes larger and larger, and inevitable tradeoffs between speed and accuracy come into play. 
 
diff --git a/dejavu/__init__.py b/dejavu/__init__.py
index 2eac959..66d9c92 100755
--- a/dejavu/__init__.py
+++ b/dejavu/__init__.py
@@ -3,6 +3,9 @@ import dejavu.decoder as decoder
 import fingerprint
 import multiprocessing
 import os
+import traceback
+import sys
+
 
 class Dejavu(object):
 
@@ -27,7 +30,7 @@ class Dejavu(object):
         # if we should limit seconds fingerprinted,
         # None|-1 means use entire track
         self.limit = self.config.get("fingerprint_limit", None)
-        if self.limit == -1: # for JSON compatibility
+        if self.limit == -1:  # for JSON compatibility
             self.limit = None
         self.get_fingerprinted_songs()
 
@@ -79,9 +82,7 @@ class Dejavu(object):
                 break
             except:
                 print("Failed fingerprinting")
-
                 # Print traceback because we can't reraise it here
-                import traceback, sys
                 traceback.print_exc(file=sys.stdout)
             else:
                 sid = self.db.insert_song(song_name)
@@ -94,13 +95,12 @@ class Dejavu(object):
         pool.join()
 
     def fingerprint_file(self, filepath, song_name=None):
-    	
-    	songname = decoder.path_to_songname(filepath)
-    	song_name = song_name or songname
-    	# don't refingerprint already fingerprinted files
+        songname = decoder.path_to_songname(filepath)
+        song_name = song_name or songname
+        # don't refingerprint already fingerprinted files
         if song_name in self.songnames_set:
             print "%s already fingerprinted, continuing..." % song_name
-       	else:
+        else:
             song_name, hashes = _fingerprint_worker(filepath,
                                                     self.limit,
                                                     song_name=song_name)
@@ -129,9 +129,9 @@ class Dejavu(object):
         song_id = -1
         for tup in matches:
             sid, diff = tup
-            if not diff in diff_counter:
+            if diff not in diff_counter:
                 diff_counter[diff] = {}
-            if not sid in diff_counter[diff]:
+            if sid not in diff_counter[diff]:
                 diff_counter[diff][sid] = 0
             diff_counter[diff][sid] += 1
 
@@ -149,15 +149,16 @@ class Dejavu(object):
             return None
 
         # return match info
-        nseconds = round(float(largest) / fingerprint.DEFAULT_FS * \
-                fingerprint.DEFAULT_WINDOW_SIZE * \
-                fingerprint.DEFAULT_OVERLAP_RATIO, 5)
+        nseconds = round(float(largest) / fingerprint.DEFAULT_FS *
+                         fingerprint.DEFAULT_WINDOW_SIZE *
+                         fingerprint.DEFAULT_OVERLAP_RATIO, 5)
         song = {
-            Dejavu.SONG_ID : song_id,
-            Dejavu.SONG_NAME : songname,
-            Dejavu.CONFIDENCE : largest_count,
-            Dejavu.OFFSET : largest,
-            Dejavu.OFFSET_SECS : nseconds }
+            Dejavu.SONG_ID: song_id,
+            Dejavu.SONG_NAME: songname,
+            Dejavu.CONFIDENCE: largest_count,
+            Dejavu.OFFSET: largest,
+            Dejavu.OFFSET_SECS: nseconds
+        }
 
         return song
 

From 275380bada8de33a3cf2a16aabf59dc527044e48 Mon Sep 17 00:00:00 2001
From: Saleem Ansari <tuxdna@gmail.com>
Date: Sat, 29 Nov 2014 00:17:23 +0530
Subject: [PATCH 07/11] Update examples in README.md with updated CLI arguments

---
 README.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 8b4f9b4..6b676ee 100755
--- a/README.md
+++ b/README.md
@@ -122,7 +122,7 @@ There are two ways to recognize audio using Dejavu. You can recognize by reading
 Through the terminal:
 
 ```bash
-$ python dejavu.py recognize file sometrack.wav 
+$ python dejavu.py --recognize file sometrack.wav 
 {'song_id': 1, 'song_name': 'Taylor Swift - Shake It Off', 'confidence': 3948, 'offset_seconds': 30.00018, 'match_time': 0.7159781455993652, 'offset': 646L}
 ```
 
@@ -145,7 +145,7 @@ With scripting:
 and with the command line script, you specify the number of seconds to listen:
 
 ```bash
-$ python dejavu.py recognize mic 10
+$ python dejavu.py --recognize mic 10
 ```
 
 ## Testing (New!)
@@ -176,7 +176,7 @@ rm -rf ./results ./temp_audio
 
 ###########
 # Fingerprint files of extension mp3 in the ./mp3 folder
-python dejavu.py fingerprint ./mp3/ mp3
+python dejavu.py --fingerprint ./mp3/ mp3
 
 ##########
 # Run a test suite on the ./mp3 folder by extracting 1, 2, 3, 4, and 5 

From 785a1db92d243c705581f34a61d8401f267b64d8 Mon Sep 17 00:00:00 2001
From: Saleem Ansari <tuxdna@gmail.com>
Date: Tue, 2 Dec 2014 15:42:09 +0530
Subject: [PATCH 08/11] Bump version to 0.1.2

---
 requirements.txt |  2 +-
 setup.py         | 30 +++++++++++++++++++++++-------
 2 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 8b3eea8..6e13078 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,7 +2,7 @@
 
 ### BEGIN ###
 pydub==0.9.4
-PyAudio==0.2.8
+PyAudio>=0.2.7
 numpy==1.8.2
 scipy==0.12.1
 matplotlib==1.3.1
diff --git a/setup.py b/setup.py
index f0b6def..cfb64ad 100644
--- a/setup.py
+++ b/setup.py
@@ -15,9 +15,22 @@ def parse_requirements(requirements):
         return reqs
 
 PACKAGE_NAME = "PyDejavu"
-PACKAGE_VERSION = "0.1.1"
-SUMMARY = 'Dejavu Audio Fingerprinting'
-DESCRIPTION = """Dejavu Audio Fingerprinting"""
+PACKAGE_VERSION = "0.1.2"
+SUMMARY = 'Dejavu: Audio Fingerprinting in Python'
+DESCRIPTION = """
+Audio fingerprinting and recognition algorithm implemented in Python
+
+See the explanation here: 
+
+`http://willdrevo.com/fingerprinting-and-audio-recognition-with-python/`__
+
+Dejavu can memorize recorded audio by listening to it once and fingerprinting 
+it. Then by playing a song and recording microphone input or on disk file, 
+Dejavu attempts to match the audio against the fingerprints held in the 
+database, returning the song or recording being played.
+
+__ http://willdrevo.com/fingerprinting-and-audio-recognition-with-python/
+"""
 REQUIREMENTS = parse_requirements("requirements.txt")
 
 setup(
@@ -25,20 +38,23 @@ setup(
     version=PACKAGE_VERSION,
     description=SUMMARY,
     long_description=DESCRIPTION,
-    author='worldveil',
+    author='Will Drevo',
     author_email='will.drevo@gmail.com',
+    maintainer="Saleem Ansari",
+    maintainer_email="tuxdna@gmail.com",
     url='http://github.com/tuxdna/dejavu',
-    license='Apache 2.0',
+    license='MIT License',
     include_package_data=True,
     packages=find_packages(),
-    platforms=['Any'],
+    platforms=['Unix'],
     install_requires=REQUIREMENTS,
     classifiers=[
         'Development Status :: 4 - Beta',
         'Environment :: Console',
         'Intended Audience :: Developers',
-        'License :: OSI Approved :: Apache Software License',
+        'License :: OSI Approved :: MIT License',
         'Operating System :: OS Independent',
         'Topic :: Software Development :: Libraries :: Python Modules',
     ],
+    keywords="python, audio, fingerprinting, music, numpy, landmark",
 )

From 8e67f0da053d6a2003ad3299d1b0d62057420213 Mon Sep 17 00:00:00 2001
From: Saleem Ansari <tuxdna@gmail.com>
Date: Tue, 2 Dec 2014 15:49:25 +0530
Subject: [PATCH 09/11] Update INSTALLATION.md

---
 INSTALLATION.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/INSTALLATION.md b/INSTALLATION.md
index 33b429e..df75562 100644
--- a/INSTALLATION.md
+++ b/INSTALLATION.md
@@ -14,7 +14,7 @@ For installing `ffmpeg` on Mac OS X, I highly recommend [this post](http://junge
 
 ## Fedora 20+
 
-### Dependency installation for Mac OS X
+### Dependency installation on Fedora 20+
 
 Install the dependencies
 

From 29f59be1e7541791caf31df4de00c051a80701a6 Mon Sep 17 00:00:00 2001
From: Saleem Ansari <tuxdna@gmail.com>
Date: Wed, 3 Dec 2014 16:08:53 +0530
Subject: [PATCH 10/11] Update version numbers in requirements file

---
 requirements.txt | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 6e13078..9478f73 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,9 +1,9 @@
 # requirements file
 
 ### BEGIN ###
-pydub==0.9.4
+pydub>=0.9.4
 PyAudio>=0.2.7
-numpy==1.8.2
-scipy==0.12.1
-matplotlib==1.3.1
+numpy>=1.8.2
+scipy>=0.12.1
+matplotlib>=1.3.1
 ### END ###

From ece1c8b22e2500b0a6a206b3e4a3d02659ed548a Mon Sep 17 00:00:00 2001
From: Will Drevo <will.drevo@gmail.com>
Date: Mon, 15 Dec 2014 21:30:33 -0500
Subject: [PATCH 11/11] Fixes #65, fixes #60, merges #64

---
 dejavu.py             | 26 ++++++++++++++------------
 dejavu/fingerprint.py | 10 ++--------
 example.py            | 13 ++++++++++---
 3 files changed, 26 insertions(+), 23 deletions(-)

diff --git a/dejavu.py b/dejavu.py
index a50c9da..0bb4d0c 100755
--- a/dejavu.py
+++ b/dejavu.py
@@ -1,5 +1,6 @@
 #!/usr/bin/python
 
+import os
 import sys
 import json
 import warnings
@@ -12,15 +13,15 @@ from argparse import RawTextHelpFormatter
 
 warnings.filterwarnings("ignore")
 
-DEFAULT_CONFIG_FILE = "dejavu.cnf"
+DEFAULT_CONFIG_FILE = "dejavu.cnf.SAMPLE"
 
 
-def init(config_file):
-    """ Load config from a JSON file
-    or anything outputting a python dictionary
+def init(configpath):
+    """ 
+    Load config from a JSON file
     """
     try:
-        with open(config_file) as f:
+        with open(configpath) as f:
             config = json.load(f)
     except IOError as err:
         print("Cannot open configuration: %s. Exiting" % (str(err)))
@@ -31,15 +32,13 @@ def init(config_file):
 
 
 if __name__ == '__main__':
-    """ If running from terminal.
-    """
     parser = argparse.ArgumentParser(
-        description="Audio Fingerprinting library",
+        description="Dejavu: Audio Fingerprinting library",
         formatter_class=RawTextHelpFormatter)
     parser.add_argument('-c', '--config', nargs='?',
                         help='Path to configuration file\n'
                              'Usages: \n'
-                             '--config /path/to/congfile\n')
+                             '--config /path/to/config-file\n')
     parser.add_argument('-f', '--fingerprint', nargs='*',
                         help='Fingerprint files in a directory\n'
                              'Usages: \n'
@@ -54,7 +53,7 @@ if __name__ == '__main__':
     args = parser.parse_args()
 
     if not args.fingerprint and not args.recognize:
-        print("No arguments")
+        parser.print_help()
         sys.exit(0)
 
     config_file = args.config
@@ -65,15 +64,18 @@ if __name__ == '__main__':
     djv = init(config_file)
     if args.fingerprint:
         # Fingerprint all files in a directory
-        if 2 == len(args.fingerprint):
+        if len(args.fingerprint) == 2:
             directory = args.fingerprint[0]
             extension = args.fingerprint[1]
             print("Fingerprinting all .%s files in the %s directory"
                   % (extension, directory))
             djv.fingerprint_directory(directory, ["." + extension], 4)
 
-        elif 1 == len(args.fingerprint):
+        elif len(args.fingerprint) == 1:
             filepath = args.fingerprint[0]
+            if os.path.isdir(filepath):
+                print("Please specify an extension if you'd like to fingerprint a directory!")
+                sys.exit(1)
             djv.fingerprint_file(filepath)
 
     elif args.recognize:
diff --git a/dejavu/fingerprint.py b/dejavu/fingerprint.py
index 95c0076..4db321b 100755
--- a/dejavu/fingerprint.py
+++ b/dejavu/fingerprint.py
@@ -136,26 +136,20 @@ def generate_hashes(peaks, fan_value=DEFAULT_FAN_VALUE):
        sha1_hash[0:20]    time_offset
     [(e05b341a9b77a51fd26, 32), ... ]
     """
-    fingerprinted = set()  # to avoid rehashing same pairs
-
     if PEAK_SORT:
         peaks.sort(key=itemgetter(1))
 
     for i in range(len(peaks)):
         for j in range(1, fan_value):
-            if (i + j) < len(peaks) and not (i, i + j) in fingerprinted:
+            if (i + j) < len(peaks):
+                
                 freq1 = peaks[i][IDX_FREQ_I]
                 freq2 = peaks[i + j][IDX_FREQ_I]
-
                 t1 = peaks[i][IDX_TIME_J]
                 t2 = peaks[i + j][IDX_TIME_J]
-
                 t_delta = t2 - t1
 
                 if t_delta >= MIN_HASH_TIME_DELTA and t_delta <= MAX_HASH_TIME_DELTA:
                     h = hashlib.sha1(
                         "%s|%s|%s" % (str(freq1), str(freq2), str(t_delta)))
                     yield (h.hexdigest()[0:FINGERPRINT_REDUCTION], t1)
-
-                # ensure we don't repeat hashing
-                fingerprinted.add((i, i + j))
diff --git a/example.py b/example.py
index e2769cb..991c6a9 100755
--- a/example.py
+++ b/example.py
@@ -16,12 +16,19 @@ djv.fingerprint_directory("mp3", [".mp3"])
 # Recognize audio from a file
 from dejavu.recognize import FileRecognizer
 song = djv.recognize(FileRecognizer, "mp3/Sean-Fournier--Falling-For-You.mp3")
+print "From file we recognized: %s\n" % song
 
-# Or recognize audio from your microphone for 10 seconds
+# Or recognize audio from your microphone for `secs` seconds
 from dejavu.recognize import MicrophoneRecognizer
-song = djv.recognize(MicrophoneRecognizer, seconds=2)
+secs = 5
+song = djv.recognize(MicrophoneRecognizer, seconds=secs)
+if song is None:
+	print "Nothing recognized -- did you play the song out loud so your mic could hear it? :)"
+else:
+	print "From mic with %d seconds we recognized: %s\n" % (secs, song)
 
 # Or use a recognizer without the shortcut, in anyway you would like
 from dejavu.recognize import FileRecognizer
 recognizer = FileRecognizer(djv)
-song = recognizer.recognize_file("mp3/Josh-Woodward--I-Want-To-Destroy-Something-Beautiful.mp3")
\ No newline at end of file
+song = recognizer.recognize_file("mp3/Josh-Woodward--I-Want-To-Destroy-Something-Beautiful.mp3")
+print "No shortcut, we recognized: %s\n" % song
\ No newline at end of file