mirror of
https://github.com/correl/dejavu.git
synced 2024-11-23 19:19:53 +00:00
Finished the changes to the recognize module
- Recognizers now use a class structure - Generic code for matching is in BaseRecognizer - Two recognizers are available: - Wave file recognizer - Recording recognizer
This commit is contained in:
parent
371742a314
commit
d25c564d4f
1 changed files with 72 additions and 49 deletions
|
@ -10,6 +10,10 @@ import sys
|
||||||
import time
|
import time
|
||||||
import array
|
import array
|
||||||
|
|
||||||
|
CHUNK = 8192 # 44100 is a multiple of 1225
|
||||||
|
FORMAT = pyaudio.paInt16
|
||||||
|
CHANNELS = 2
|
||||||
|
RATE = 44100
|
||||||
|
|
||||||
class BaseRecognizer(object):
|
class BaseRecognizer(object):
|
||||||
|
|
||||||
|
@ -17,19 +21,25 @@ class BaseRecognizer(object):
|
||||||
self.dejavu = dejavu
|
self.dejavu = dejavu
|
||||||
self.Fs = dejavu.fingerprint.DEFAULT_FS
|
self.Fs = dejavu.fingerprint.DEFAULT_FS
|
||||||
|
|
||||||
def recognize(self, *data):
|
def _recognize(self, *data):
|
||||||
matches = []
|
matches = []
|
||||||
for d in data:
|
for d in data:
|
||||||
matches.extend(self.dejavu.find_matches(data, Fs=self.Fs))
|
matches.extend(self.dejavu.find_matches(data, Fs=self.Fs))
|
||||||
return self.dejavu.align_matches(matches)
|
return self.dejavu.align_matches(matches)
|
||||||
|
|
||||||
|
def recognize(self):
|
||||||
|
pass # base class does nothing
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class WaveFileRecognizer(BaseRecognizer):
|
class WaveFileRecognizer(BaseRecognizer):
|
||||||
|
|
||||||
def __init__(self, dejavu):
|
def __init__(self, dejavu, filename=None):
|
||||||
super(BaseRecognizer, self).__init__(dejavu)
|
super(BaseRecognizer, self).__init__(dejavu)
|
||||||
|
self.filename = filename
|
||||||
|
|
||||||
def recognize_file(self, filepath):
|
def recognize_file(self, filename):
|
||||||
Fs, frames = wavfile.read(filename)
|
Fs, frames = wavfile.read(filename)
|
||||||
self.Fs = Fs
|
self.Fs = Fs
|
||||||
|
|
||||||
|
@ -41,7 +51,7 @@ class WaveFileRecognizer(BaseRecognizer):
|
||||||
channels.append(frames[:, channel])
|
channels.append(frames[:, channel])
|
||||||
|
|
||||||
t = time.time()
|
t = time.time()
|
||||||
match = self.recognize(*channels)
|
match = self._recognize(*channels)
|
||||||
t = time.time() - t
|
t = time.time() - t
|
||||||
|
|
||||||
if match:
|
if match:
|
||||||
|
@ -49,54 +59,67 @@ class WaveFileRecognizer(BaseRecognizer):
|
||||||
|
|
||||||
return match
|
return match
|
||||||
|
|
||||||
|
def recognize(self):
|
||||||
|
return self.recognize_file(self.filename)
|
||||||
|
|
||||||
|
|
||||||
class MicrophoneRecognizer(BaseRecognizer):
|
class MicrophoneRecognizer(BaseRecognizer):
|
||||||
|
|
||||||
|
def __init__(self, dejavu, seconds=None)
|
||||||
|
super(BaseRecognizer, self).__init__(dejavu)
|
||||||
|
self.audio = pyaudio.PyAudio()
|
||||||
|
self.stream = None
|
||||||
|
self.data = []
|
||||||
|
self.channels = CHANNELS
|
||||||
|
self.chunk_size = CHUNK
|
||||||
|
self.rate = RATE
|
||||||
|
self.recorded = False
|
||||||
|
|
||||||
|
def start_recording(self, channels=CHANNELS, rate=RATE, chunk=CHUNK):
|
||||||
|
self.chunk_size = chunk
|
||||||
|
self.channels = channels
|
||||||
|
self.recorded = False
|
||||||
|
self.rate = rate
|
||||||
|
|
||||||
|
if self.stream:
|
||||||
|
self.stream.stop_stream()
|
||||||
|
self.stream.close()
|
||||||
|
|
||||||
|
self.stream = self.audio.open(format=FORMAT,
|
||||||
|
channels=channels,
|
||||||
|
rate=rate,
|
||||||
|
input=True,
|
||||||
|
frames_per_buffer=chunk)
|
||||||
|
|
||||||
|
self.data = [[] for i in range(channels)]
|
||||||
|
|
||||||
|
def process_recording(self):
|
||||||
|
data = self.stream.read(self.chunk_size)
|
||||||
|
nums = np.fromstring(data, np.int16)
|
||||||
|
for c in range(self.channels):
|
||||||
|
self.data[c].extend(nums[c::c+1])
|
||||||
|
|
||||||
|
def stop_recording(self):
|
||||||
|
self.stream.stop_stream()
|
||||||
|
self.stream.close()
|
||||||
|
self.stream = None
|
||||||
|
self.recorded = True
|
||||||
|
|
||||||
|
def recognize_recording(self):
|
||||||
|
if not self.recorded:
|
||||||
|
raise NoRecordingError("Recording was not complete/begun")
|
||||||
|
return self._recognize(*data)
|
||||||
|
|
||||||
|
def get_recorded_time(self):
|
||||||
|
return len(self.data[0]) / self.rate
|
||||||
|
|
||||||
|
def recognize(self):
|
||||||
|
self.start_recording()
|
||||||
|
for i in range(0, int(self.rate / self.chunk * self.seconds)):
|
||||||
|
self.process_recording()
|
||||||
|
self.stop_recording()
|
||||||
|
return self.recognize_recording()
|
||||||
|
|
||||||
|
class NoRecordingError(Exception):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class Recognizer(object):
|
|
||||||
|
|
||||||
CHUNK = 8192 # 44100 is a multiple of 1225
|
|
||||||
FORMAT = pyaudio.paInt16
|
|
||||||
CHANNELS = 2
|
|
||||||
RATE = 44100
|
|
||||||
|
|
||||||
def __init__(self, fingerprinter, config):
|
|
||||||
|
|
||||||
self.fingerprinter = fingerprinter
|
|
||||||
self.config = config
|
|
||||||
self.audio = pyaudio.PyAudio()
|
|
||||||
|
|
||||||
def listen(self, seconds=10, verbose=False):
|
|
||||||
|
|
||||||
# open stream
|
|
||||||
stream = self.audio.open(format=Recognizer.FORMAT,
|
|
||||||
channels=Recognizer.CHANNELS,
|
|
||||||
rate=Recognizer.RATE,
|
|
||||||
input=True,
|
|
||||||
frames_per_buffer=Recognizer.CHUNK)
|
|
||||||
|
|
||||||
# record
|
|
||||||
if verbose: print("* recording")
|
|
||||||
left, right = [], []
|
|
||||||
for i in range(0, int(Recognizer.RATE / Recognizer.CHUNK * seconds)):
|
|
||||||
data = stream.read(Recognizer.CHUNK)
|
|
||||||
nums = np.fromstring(data, np.int16)
|
|
||||||
left.extend(nums[1::2])
|
|
||||||
right.extend(nums[0::2])
|
|
||||||
if verbose: print("* done recording")
|
|
||||||
|
|
||||||
# close and stop the stream
|
|
||||||
stream.stop_stream()
|
|
||||||
stream.close()
|
|
||||||
|
|
||||||
# match both channels
|
|
||||||
starttime = time.time()
|
|
||||||
matches = []
|
|
||||||
matches.extend(self.fingerprinter.match(left))
|
|
||||||
matches.extend(self.fingerprinter.match(right))
|
|
||||||
|
|
||||||
# align and return
|
|
||||||
return self.fingerprinter.align_matches(matches, starttime, record_seconds=seconds, verbose=verbose)
|
|
Loading…
Reference in a new issue