Merge pull request #71 from sg3510/wav-24-bit

24-bit wav file support
2024-11-23 11:09:52 +00:00 · 2015-03-21 18:35:14 -04:00 · 2015-03-21 18:35:14 -04:00 · 1fe7d4fd45
commit 1fe7d4fd45
parent dad98961f4 2feecce34b
2 changed files with 149 additions and 10 deletions
--- a/dejavu/decoder.py
+++ b/dejavu/decoder.py
@ -2,7 +2,8 @@ import os
 import fnmatch
 import numpy as np
 from pydub import AudioSegment
-
+from pydub.utils import audioop
 import wavio
 def find_files(path, extensions):
    # Allow both with ".mp3" and without "mp3" to be used for extensions
@ -18,7 +19,8 @@ def find_files(path, extensions):
 def read(filename, limit=None):
    """
    Reads any file supported by pydub (ffmpeg) and returns the data contained
-    within.
+    within. If file reading fails due to input being a 24-bit wav file,
    wavio is used as a backup.
    Can be optionally limited to a certain amount of seconds from the start
    of the file by specifying the `limit` parameter. This is the amount of
@ -26,18 +28,34 @@ def read(filename, limit=None):
    returns: (channels, samplerate)
    """
-    audiofile = AudioSegment.from_file(filename)
+    # pydub does not support 24-bit wav files, use wavio when this occurs
    try:
        audiofile = AudioSegment.from_file(filename)
-    if limit:
+        if limit:
-        audiofile = audiofile[:limit * 1000]
+            audiofile = audiofile[:limit * 1000]
-    data = np.fromstring(audiofile._data, np.int16)
+        data = np.fromstring(audiofile._data, np.int16)
-    channels = []
+        channels = []
-    for chn in xrange(audiofile.channels):
+        for chn in xrange(audiofile.channels):
-        channels.append(data[chn::audiofile.channels])
+            channels.append(data[chn::audiofile.channels])
-    return channels, audiofile.frame_rate
+        fs = audiofile.frame_rate
    except audioop.error:
        fs, _, audiofile = wavio.readwav(filename)
        if limit:
            audiofile = audiofile[:limit * 1000]
        audiofile = audiofile.T
        audiofile = audiofile.astype(np.int16)
        channels = []
        for chn in audiofile:
            channels.append(chn)
    return channels, fs
 def path_to_songname(path):
--- a/dejavu/wavio.py
+++ b/dejavu/wavio.py
@ -0,0 +1,121 @@
 # wavio.py
 # Author: Warren Weckesser
 # License: BSD 3-Clause (http://opensource.org/licenses/BSD-3-Clause)
 # Synopsis: A Python module for reading and writing 24 bit WAV files.
 # Github: github.com/WarrenWeckesser/wavio
 import wave as _wave
 import numpy as _np
 def _wav2array(nchannels, sampwidth, data):
    """data must be the string containing the bytes from the wav file."""
    num_samples, remainder = divmod(len(data), sampwidth * nchannels)
    if remainder > 0:
        raise ValueError('The length of data is not a multiple of '
                         'sampwidth * num_channels.')
    if sampwidth > 4:
        raise ValueError("sampwidth must not be greater than 4.")
    if sampwidth == 3:
        a = _np.empty((num_samples, nchannels, 4), dtype=_np.uint8)
        raw_bytes = _np.fromstring(data, dtype=_np.uint8)
        a[:, :, :sampwidth] = raw_bytes.reshape(-1, nchannels, sampwidth)
        a[:, :, sampwidth:] = (a[:, :, sampwidth - 1:sampwidth] >> 7) * 255
        result = a.view('<i4').reshape(a.shape[:-1])
    else:
        # 8 bit samples are stored as unsigned ints; others as signed ints.
        dt_char = 'u' if sampwidth == 1 else 'i'
        a = _np.fromstring(data, dtype='<%s%d' % (dt_char, sampwidth))
        result = a.reshape(-1, nchannels)
    return result
 def readwav(file):
    """
    Read a WAV file.
    Parameters
    ----------
    file : string or file object
        Either the name of a file or an open file pointer.
    Return Values
    -------------
    rate : float
        The sampling frequency (i.e. frame rate)
    sampwidth : float
        The sample width, in bytes.  E.g. for a 24 bit WAV file,
        sampwidth is 3.
    data : numpy array
        The array containing the data.  The shape of the array is
        (num_samples, num_channels).  num_channels is the number of
        audio channels (1 for mono, 2 for stereo).
    Notes
    -----
    This function uses the `wave` module of the Python standard libary
    to read the WAV file, so it has the same limitations as that library.
    In particular, the function does not read compressed WAV files.
    """
    wav = _wave.open(file)
    rate = wav.getframerate()
    nchannels = wav.getnchannels()
    sampwidth = wav.getsampwidth()
    nframes = wav.getnframes()
    data = wav.readframes(nframes)
    wav.close()
    array = _wav2array(nchannels, sampwidth, data)
    return rate, sampwidth, array
 def writewav24(filename, rate, data):
    """
    Create a 24 bit wav file.
    Parameters
    ----------
    filename : string
        Name of the file to create.
    rate : float
        The sampling frequency (i.e. frame rate) of the data.
    data : array-like collection of integer or floating point values
        data must be "array-like", either 1- or 2-dimensional.  If it
        is 2-d, the rows are the frames (i.e. samples) and the columns
        are the channels.
    Notes
    -----
    The data is assumed to be signed, and the values are assumed to be
    within the range of a 24 bit integer.  Floating point values are
    converted to integers.  The data is not rescaled or normalized before
    writing it to the file.
    Example
    -------
    Create a 3 second 440 Hz sine wave.
    >>> rate = 22050  # samples per second
    >>> T = 3         # sample duration (seconds)
    >>> f = 440.0     # sound frequency (Hz)
    >>> t = np.linspace(0, T, T*rate, endpoint=False)
    >>> x = (2**23 - 1) * np.sin(2 * np.pi * f * t)
    >>> writewav24("sine24.wav", rate, x)
    """
    a32 = _np.asarray(data, dtype=_np.int32)
    if a32.ndim == 1:
        # Convert to a 2D array with a single column.
        a32.shape = a32.shape + (1,)
    # By shifting first 0 bits, then 8, then 16, the resulting output
    # is 24 bit little-endian.
    a8 = (a32.reshape(a32.shape + (1,)) >> _np.array([0, 8, 16])) & 255
    wavdata = a8.astype(_np.uint8).tostring()
    w = _wave.open(filename, 'wb')
    w.setnchannels(a32.shape[1])
    w.setsampwidth(3)
    w.setframerate(rate)
    w.writeframes(wavdata)
    w.close()