Merge pull request #71 from sg3510/wav-24-bit

24-bit wav file support
2024-11-23 11:09:52 +00:00 · 2015-03-21 18:35:14 -04:00 · 2015-03-21 18:35:14 -04:00 · 1fe7d4fd45
commit 1fe7d4fd45
parent dad98961f4 2feecce34b
2 changed files with 149 additions and 10 deletions
--- a/dejavu/decoder.py
+++ b/dejavu/decoder.py
@ -2,7 +2,8 @@ import os
 import fnmatch
 import numpy as np
 from pydub import AudioSegment
-
+from pydub.utils import audioop
+import wavio

 def find_files(path, extensions):
    # Allow both with ".mp3" and without "mp3" to be used for extensions
@ -18,7 +19,8 @@ def find_files(path, extensions):
 def read(filename, limit=None):
    """
    Reads any file supported by pydub (ffmpeg) and returns the data contained
-    within.
+    within. If file reading fails due to input being a 24-bit wav file,
+    wavio is used as a backup.

    Can be optionally limited to a certain amount of seconds from the start
    of the file by specifying the `limit` parameter. This is the amount of
@ -26,6 +28,8 @@ def read(filename, limit=None):

    returns: (channels, samplerate)
    """
+    # pydub does not support 24-bit wav files, use wavio when this occurs
+    try:
        audiofile = AudioSegment.from_file(filename)

        if limit:
@ -37,7 +41,21 @@ def read(filename, limit=None):
        for chn in xrange(audiofile.channels):
            channels.append(data[chn::audiofile.channels])

-    return channels, audiofile.frame_rate
+        fs = audiofile.frame_rate
+    except audioop.error:
+        fs, _, audiofile = wavio.readwav(filename)
+
+        if limit:
+            audiofile = audiofile[:limit * 1000]
+
+        audiofile = audiofile.T
+        audiofile = audiofile.astype(np.int16)
+
+        channels = []
+        for chn in audiofile:
+            channels.append(chn)
+
+    return channels, fs


 def path_to_songname(path):
--- a/dejavu/wavio.py
+++ b/dejavu/wavio.py
@ -0,0 +1,121 @@
+# wavio.py
+# Author: Warren Weckesser
+# License: BSD 3-Clause (http://opensource.org/licenses/BSD-3-Clause)
+# Synopsis: A Python module for reading and writing 24 bit WAV files.
+# Github: github.com/WarrenWeckesser/wavio
+
+import wave as _wave
+import numpy as _np
+
+
+def _wav2array(nchannels, sampwidth, data):
+    """data must be the string containing the bytes from the wav file."""
+    num_samples, remainder = divmod(len(data), sampwidth * nchannels)
+    if remainder > 0:
+        raise ValueError('The length of data is not a multiple of '
+                         'sampwidth * num_channels.')
+    if sampwidth > 4:
+        raise ValueError("sampwidth must not be greater than 4.")
+
+    if sampwidth == 3:
+        a = _np.empty((num_samples, nchannels, 4), dtype=_np.uint8)
+        raw_bytes = _np.fromstring(data, dtype=_np.uint8)
+        a[:, :, :sampwidth] = raw_bytes.reshape(-1, nchannels, sampwidth)
+        a[:, :, sampwidth:] = (a[:, :, sampwidth - 1:sampwidth] >> 7) * 255
+        result = a.view('<i4').reshape(a.shape[:-1])
+    else:
+        # 8 bit samples are stored as unsigned ints; others as signed ints.
+        dt_char = 'u' if sampwidth == 1 else 'i'
+        a = _np.fromstring(data, dtype='<%s%d' % (dt_char, sampwidth))
+        result = a.reshape(-1, nchannels)
+    return result
+
+
+def readwav(file):
+    """
+    Read a WAV file.
+
+    Parameters
+    ----------
+    file : string or file object
+        Either the name of a file or an open file pointer.
+
+    Return Values
+    -------------
+    rate : float
+        The sampling frequency (i.e. frame rate)
+    sampwidth : float
+        The sample width, in bytes.  E.g. for a 24 bit WAV file,
+        sampwidth is 3.
+    data : numpy array
+        The array containing the data.  The shape of the array is
+        (num_samples, num_channels).  num_channels is the number of
+        audio channels (1 for mono, 2 for stereo).
+
+    Notes
+    -----
+    This function uses the `wave` module of the Python standard libary
+    to read the WAV file, so it has the same limitations as that library.
+    In particular, the function does not read compressed WAV files.
+
+    """
+    wav = _wave.open(file)
+    rate = wav.getframerate()
+    nchannels = wav.getnchannels()
+    sampwidth = wav.getsampwidth()
+    nframes = wav.getnframes()
+    data = wav.readframes(nframes)
+    wav.close()
+    array = _wav2array(nchannels, sampwidth, data)
+    return rate, sampwidth, array
+
+
+def writewav24(filename, rate, data):
+    """
+    Create a 24 bit wav file.
+
+    Parameters
+    ----------
+    filename : string
+        Name of the file to create.
+    rate : float
+        The sampling frequency (i.e. frame rate) of the data.
+    data : array-like collection of integer or floating point values
+        data must be "array-like", either 1- or 2-dimensional.  If it
+        is 2-d, the rows are the frames (i.e. samples) and the columns
+        are the channels.
+
+    Notes
+    -----
+    The data is assumed to be signed, and the values are assumed to be
+    within the range of a 24 bit integer.  Floating point values are
+    converted to integers.  The data is not rescaled or normalized before
+    writing it to the file.
+
+    Example
+    -------
+    Create a 3 second 440 Hz sine wave.
+
+    >>> rate = 22050  # samples per second
+    >>> T = 3         # sample duration (seconds)
+    >>> f = 440.0     # sound frequency (Hz)
+    >>> t = np.linspace(0, T, T*rate, endpoint=False)
+    >>> x = (2**23 - 1) * np.sin(2 * np.pi * f * t)
+    >>> writewav24("sine24.wav", rate, x)
+
+    """
+    a32 = _np.asarray(data, dtype=_np.int32)
+    if a32.ndim == 1:
+        # Convert to a 2D array with a single column.
+        a32.shape = a32.shape + (1,)
+    # By shifting first 0 bits, then 8, then 16, the resulting output
+    # is 24 bit little-endian.
+    a8 = (a32.reshape(a32.shape + (1,)) >> _np.array([0, 8, 16])) & 255
+    wavdata = a8.astype(_np.uint8).tostring()
+
+    w = _wave.open(filename, 'wb')
+    w.setnchannels(a32.shape[1])
+    w.setsampwidth(3)
+    w.setframerate(rate)
+    w.writeframes(wavdata)
+    w.close()