whisper-live/audiosource.py

177 lines
5.2 KiB
Python

#!/usr/bin/python3
import socket
import select
import time
from queue import Queue
import json
import threading
import speech_recognition
import wave
from pyogg.opus_decoder import OpusDecoder
class AudioSource:
def __init__(self):
# Thread safe Queue for passing data from the threaded recording
# callback.
self.data_queue = Queue()
def is_done(self):
return True
# -----------------------------------------------------------------------------
# Microphone
# How real time the recording is in seconds.
record_timeout = 2
class MicrophoneAudioSource(AudioSource):
def __init__(self):
super().__init__()
self._recorder = speech_recognition.Recognizer()
self._recorder.energy_threshold = 1200
# Definitely do this, dynamic energy compensation lowers the energy
# threshold dramatically to a point where the SpeechRecognizer
# never stops recording.
self._recorder.dynamic_energy_threshold = False
self._source = speech_recognition.Microphone(sample_rate=16000)
with self._source:
self._recorder.adjust_for_ambient_noise(self._source)
def record_callback(_, audio:speech_recognition.AudioData) -> None:
"""
Threaded callback function to receive audio data when recordings finish.
audio: An AudioData containing the recorded bytes.
"""
# Grab the raw bytes and push it into the thread safe queue.
data = audio.get_raw_data()
self.data_queue.put(bytearray(data))
# Create a background thread that will pass us raw audio bytes.
# We could do this manually but SpeechRecognizer provides a nice helper.
self._stopper = self._recorder.listen_in_background(
self._source, record_callback,
phrase_time_limit=record_timeout)
def stop(self):
assert(self._stopper)
self._stopper()
self._recorder = None
self._stopper = None
self._source = None
def is_done(self):
return self._recorder == None
# -----------------------------------------------------------------------------
# Opus stream
# For debugging
# wave_out = wave.open("wave2.wav", "wb")
# wave_out.setnchannels(1)
# wave_out.setframerate(16000)
# wave_out.setsampwidth(2)
class OpusStreamAudioSource(AudioSource):
def __init__(self, sock):
super().__init__()
self._socket = sock
self._opus_decoder = OpusDecoder()
self._opus_decoder.set_channels(1)
self._opus_decoder.set_sampling_frequency(16000)
# Fetch user info.
user_info_tmp = self._read_packet(self._socket)
self._user_info = json.loads(user_info_tmp.decode("utf-8"))
print("User connection...")
print(json.dumps(self._user_info, indent=4))
self._is_done = False
# Start input thread.
self._input_thread = threading.Thread(
target=self._input_thread_func, daemon=True)
self._input_thread.start()
def _read_packet(self, sock):
try:
input_buffer = b''
#print("Reading packet size...")
while len(input_buffer) < 4:
input_buffer = input_buffer + sock.recv(1)
if not input_buffer:
raise Exception("Failed to read size of packet.")
packet_size = int.from_bytes(input_buffer, "little")
#print("Packet size: ", packet_size)
input_buffer = b''
while len(input_buffer) < packet_size:
input_buffer = input_buffer + sock.recv(1)
if not input_buffer:
raise Exception("Failed to read packet.")
return input_buffer
except Exception as e: # FIXME: Use socket-specific exception type.
return None
def _input_thread_func(self):
print("input thread start")
try:
while not self._is_done:
next_packet = self._read_packet(self._socket)
if next_packet:
# If we don't use bytearray here to copy, we run into a weird
# exception about the memory not being writeable.
decoded_data = self._opus_decoder.decode(bytearray(next_packet))
# For debugging.
#wave_out.writeframes(decoded_data)
# We need to copy decoded_data here or we end up with
# recycled buffers in our queue, which leads to broken
# audio.
self.data_queue.put(bytearray(decoded_data))
else:
break
except Exception as e:
# Probably disconnected. We don't care. Just clean up.
# FIXME: Limit exception to socket errors.
pass
print("input thread done")
self._is_done = True
def stop(self):
self._is_done = True
# We won't join() the input thread because we don't want to sit around
# and wait for a packet. It'll die on its own, so whatever.
def is_done(self):
return self._is_done