Default microphone option and selection for linux users

This commit is contained in:
John Ciubuc 2023-01-14 22:31:04 -06:00
parent f180571916
commit 0625e35b87

View File

@ -11,11 +11,12 @@ from datetime import datetime, timedelta
from queue import Queue from queue import Queue
from tempfile import NamedTemporaryFile from tempfile import NamedTemporaryFile
from time import sleep from time import sleep
from sys import platform
def main(): def main():
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("--model", default="medium", help="Model to use", parser.add_argument("--model", default="tiny", help="Model to use",
choices=["tiny", "base", "small", "medium", "large"]) choices=["tiny", "base", "small", "medium", "large"])
parser.add_argument("--non_english", action='store_true', parser.add_argument("--non_english", action='store_true',
help="Don't use the english model.") help="Don't use the english model.")
@ -25,9 +26,43 @@ def main():
help="How real time the recording is in seconds.", type=float) help="How real time the recording is in seconds.", type=float)
parser.add_argument("--phrase_timeout", default=3, parser.add_argument("--phrase_timeout", default=3,
help="How much empty space between recordings before we " help="How much empty space between recordings before we "
"consider it a new line in the transcription.", type=float) "consider it a new line in the transcription.", type=float)
if 'linux' in platform:
parser.add_argument("--default_microphone", default='pulse',
help="Default microphone name for SpeechRecognition. "
"Run this with 'list' to view available Microphones.", type=str)
args = parser.parse_args() args = parser.parse_args()
# The last time a recording was retreived from the queue.
phrase_time = None
# Current raw audio bytes.
last_sample = bytes()
# Thread safe Queue for passing data from the threaded recording callback.
data_queue = Queue()
# We use SpeechRecognizer to record our audio because it has a nice feauture where it can detect when speech ends.
recorder = sr.Recognizer()
recorder.energy_threshold = args.energy_threshold
# Definitely do this, dynamic energy compensation lowers the energy threshold dramtically to a point where the SpeechRecognizer never stops recording.
recorder.dynamic_energy_threshold = False
# Important for linux users.
# Prevents permanent application hang and crash by using the wrong Microphone
if 'linux' in platform:
mic_name = args.default_microphone
if not mic_name or mic_name == 'list':
print("Available microphone devices are: ")
for index, name in enumerate(sr.Microphone.list_microphone_names()):
print(f"Microphone with name \"{name}\" found")
return
else:
for index, name in enumerate(sr.Microphone.list_microphone_names()):
if mic_name in name:
source = sr.Microphone(sample_rate=16000, device_index=index)
break
else:
source = sr.Microphone(sample_rate=16000)
# Load / Download model
model = args.model model = args.model
if args.model != "large" and not args.non_english: if args.model != "large" and not args.non_english:
model = model + ".en" model = model + ".en"
@ -38,21 +73,7 @@ def main():
temp_file = NamedTemporaryFile().name temp_file = NamedTemporaryFile().name
transcription = [''] transcription = ['']
# The last time a recording was retreived from the queue.
phrase_time = None
# Current raw audio bytes.
last_sample = bytes()
# Thread safe Queue for passing data from the threaded recording callback.
data_queue = Queue()
# We use SpeechRecognizer to record our audio because it has a nice feauture where it can detect when speech ends.
recorder = sr.Recognizer()
recorder.energy_threshold = args.energy_threshold
# Definitely do this, dynamic energy compensation lowers the energy threshold dramtically to a point where the SpeechRecognizer never stops recording.
recorder.dynamic_energy_threshold = False
source = sr.Microphone(sample_rate=16000)
with source: with source:
recorder.adjust_for_ambient_noise(source) recorder.adjust_for_ambient_noise(source)