From 8b200c39f78fd92959c76b300a24686cebad3e29 Mon Sep 17 00:00:00 2001
From: sg3510 <sg3510@ic.ac.uk>
Date: Thu, 26 Feb 2015 14:16:55 +0000
Subject: [PATCH 1/4] adding wavio support

---
 dejavu/decoder.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dejavu/decoder.py b/dejavu/decoder.py
index e2e2d33..f063d0f 100755
--- a/dejavu/decoder.py
+++ b/dejavu/decoder.py
@@ -2,7 +2,7 @@ import os
 import fnmatch
 import numpy as np
 from pydub import AudioSegment
-
+import wavio
 
 def find_files(path, extensions):
     # Allow both with ".mp3" and without "mp3" to be used for extensions

From 2c961cc43f3b330fb1eb3880e3a07b358498d6e4 Mon Sep 17 00:00:00 2001
From: sg3510 <sg3510@ic.ac.uk>
Date: Thu, 26 Feb 2015 14:24:44 +0000
Subject: [PATCH 2/4] waive add

---
 dejavu/decoder.py |  30 +++++++++---
 dejavu/wavio.py   | 121 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 143 insertions(+), 8 deletions(-)
 create mode 100644 dejavu/wavio.py

diff --git a/dejavu/decoder.py b/dejavu/decoder.py
index f063d0f..69eab52 100755
--- a/dejavu/decoder.py
+++ b/dejavu/decoder.py
@@ -2,6 +2,7 @@ import os
 import fnmatch
 import numpy as np
 from pydub import AudioSegment
+from pydub.utils import audioop
 import wavio
 
 def find_files(path, extensions):
@@ -26,18 +27,31 @@ def read(filename, limit=None):
 
     returns: (channels, samplerate)
     """
-    audiofile = AudioSegment.from_file(filename)
+    # pydub does not support 24-bit wav files, use wavio when this occurs
+    try:
+        audiofile = AudioSegment.from_file(filename)
 
-    if limit:
-        audiofile = audiofile[:limit * 1000]
+        if limit:
+            audiofile = audiofile[:limit * 1000]
 
-    data = np.fromstring(audiofile._data, np.int16)
+        data = np.fromstring(audiofile._data, np.int16)
 
-    channels = []
-    for chn in xrange(audiofile.channels):
-        channels.append(data[chn::audiofile.channels])
+        channels = []
+        for chn in xrange(audiofile.channels):
+            channels.append(data[chn::audiofile.channels])
 
-    return channels, audiofile.frame_rate
+        fs = audiofile.frame_rate
+    except audioop.error:
+        fs, _, audiofile = wavio.readwav(filename)
+
+        audiofile = audiofile.T
+        audiofile = audiofile.astype(np.int16)
+        
+        channels = []
+        for chn in audiofile:
+            channels.append(chn)
+
+    return channels, fs
 
 
 def path_to_songname(path):
diff --git a/dejavu/wavio.py b/dejavu/wavio.py
new file mode 100644
index 0000000..e8d1fc3
--- /dev/null
+++ b/dejavu/wavio.py
@@ -0,0 +1,121 @@
+# wavio.py
+# Author: Warren Weckesser
+# License: BSD 3-Clause (http://opensource.org/licenses/BSD-3-Clause)
+# Synopsis: A Python module for reading and writing 24 bit WAV files.
+# Github: github.com/WarrenWeckesser/wavio
+
+import wave as _wave
+import numpy as _np
+
+
+def _wav2array(nchannels, sampwidth, data):
+    """data must be the string containing the bytes from the wav file."""
+    num_samples, remainder = divmod(len(data), sampwidth * nchannels)
+    if remainder > 0:
+        raise ValueError('The length of data is not a multiple of '
+                         'sampwidth * num_channels.')
+    if sampwidth > 4:
+        raise ValueError("sampwidth must not be greater than 4.")
+
+    if sampwidth == 3:
+        a = _np.empty((num_samples, nchannels, 4), dtype=_np.uint8)
+        raw_bytes = _np.fromstring(data, dtype=_np.uint8)
+        a[:, :, :sampwidth] = raw_bytes.reshape(-1, nchannels, sampwidth)
+        a[:, :, sampwidth:] = (a[:, :, sampwidth - 1:sampwidth] >> 7) * 255
+        result = a.view('<i4').reshape(a.shape[:-1])
+    else:
+        # 8 bit samples are stored as unsigned ints; others as signed ints.
+        dt_char = 'u' if sampwidth == 1 else 'i'
+        a = _np.fromstring(data, dtype='<%s%d' % (dt_char, sampwidth))
+        result = a.reshape(-1, nchannels)
+    return result
+
+
+def readwav(file):
+    """
+    Read a WAV file.
+
+    Parameters
+    ----------
+    file : string or file object
+        Either the name of a file or an open file pointer.
+
+    Return Values
+    -------------
+    rate : float
+        The sampling frequency (i.e. frame rate)
+    sampwidth : float
+        The sample width, in bytes.  E.g. for a 24 bit WAV file,
+        sampwidth is 3.
+    data : numpy array
+        The array containing the data.  The shape of the array is
+        (num_samples, num_channels).  num_channels is the number of
+        audio channels (1 for mono, 2 for stereo).
+
+    Notes
+    -----
+    This function uses the `wave` module of the Python standard libary
+    to read the WAV file, so it has the same limitations as that library.
+    In particular, the function does not read compressed WAV files.
+
+    """
+    wav = _wave.open(file)
+    rate = wav.getframerate()
+    nchannels = wav.getnchannels()
+    sampwidth = wav.getsampwidth()
+    nframes = wav.getnframes()
+    data = wav.readframes(nframes)
+    wav.close()
+    array = _wav2array(nchannels, sampwidth, data)
+    return rate, sampwidth, array
+
+
+def writewav24(filename, rate, data):
+    """
+    Create a 24 bit wav file.
+
+    Parameters
+    ----------
+    filename : string
+        Name of the file to create.
+    rate : float
+        The sampling frequency (i.e. frame rate) of the data.
+    data : array-like collection of integer or floating point values
+        data must be "array-like", either 1- or 2-dimensional.  If it
+        is 2-d, the rows are the frames (i.e. samples) and the columns
+        are the channels.
+
+    Notes
+    -----
+    The data is assumed to be signed, and the values are assumed to be
+    within the range of a 24 bit integer.  Floating point values are
+    converted to integers.  The data is not rescaled or normalized before
+    writing it to the file.
+
+    Example
+    -------
+    Create a 3 second 440 Hz sine wave.
+
+    >>> rate = 22050  # samples per second
+    >>> T = 3         # sample duration (seconds)
+    >>> f = 440.0     # sound frequency (Hz)
+    >>> t = np.linspace(0, T, T*rate, endpoint=False)
+    >>> x = (2**23 - 1) * np.sin(2 * np.pi * f * t)
+    >>> writewav24("sine24.wav", rate, x)
+
+    """
+    a32 = _np.asarray(data, dtype=_np.int32)
+    if a32.ndim == 1:
+        # Convert to a 2D array with a single column.
+        a32.shape = a32.shape + (1,)
+    # By shifting first 0 bits, then 8, then 16, the resulting output
+    # is 24 bit little-endian.
+    a8 = (a32.reshape(a32.shape + (1,)) >> _np.array([0, 8, 16])) & 255
+    wavdata = a8.astype(_np.uint8).tostring()
+
+    w = _wave.open(filename, 'wb')
+    w.setnchannels(a32.shape[1])
+    w.setsampwidth(3)
+    w.setframerate(rate)
+    w.writeframes(wavdata)
+    w.close()

From 3a9e3a4da735d22f2dad877073d6c9f55b68c5e1 Mon Sep 17 00:00:00 2001
From: sg3510 <sg3510@ic.ac.uk>
Date: Thu, 26 Feb 2015 14:33:41 +0000
Subject: [PATCH 3/4] adding limit support

---
 dejavu/decoder.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/dejavu/decoder.py b/dejavu/decoder.py
index 69eab52..960fbd8 100755
--- a/dejavu/decoder.py
+++ b/dejavu/decoder.py
@@ -44,9 +44,12 @@ def read(filename, limit=None):
     except audioop.error:
         fs, _, audiofile = wavio.readwav(filename)
 
+        if limit:
+            audiofile = audiofile[:limit * 1000]
+
         audiofile = audiofile.T
         audiofile = audiofile.astype(np.int16)
-        
+
         channels = []
         for chn in audiofile:
             channels.append(chn)

From 2feecce34b797d932f318abb7e0f33f07a3417ec Mon Sep 17 00:00:00 2001
From: sg3510 <sg3510@ic.ac.uk>
Date: Thu, 26 Feb 2015 16:10:09 +0000
Subject: [PATCH 4/4] updated docstring

---
 dejavu/decoder.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/dejavu/decoder.py b/dejavu/decoder.py
index 960fbd8..830b8f7 100755
--- a/dejavu/decoder.py
+++ b/dejavu/decoder.py
@@ -19,7 +19,8 @@ def find_files(path, extensions):
 def read(filename, limit=None):
     """
     Reads any file supported by pydub (ffmpeg) and returns the data contained
-    within.
+    within. If file reading fails due to input being a 24-bit wav file,
+    wavio is used as a backup.
 
     Can be optionally limited to a certain amount of seconds from the start
     of the file by specifying the `limit` parameter. This is the amount of