Merge pull request #1 from JohnCiubuc/master
Better Linux support and fixed FP16 Warning
This commit is contained in:
commit
a0b5bdbb0a
@ -5,11 +5,13 @@ import io
|
||||
import os
|
||||
import speech_recognition as sr
|
||||
import whisper
|
||||
import torch
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
from queue import Queue
|
||||
from tempfile import NamedTemporaryFile
|
||||
from time import sleep
|
||||
from sys import platform
|
||||
|
||||
|
||||
def main():
|
||||
@ -25,8 +27,42 @@ def main():
|
||||
parser.add_argument("--phrase_timeout", default=3,
|
||||
help="How much empty space between recordings before we "
|
||||
"consider it a new line in the transcription.", type=float)
|
||||
if 'linux' in platform:
|
||||
parser.add_argument("--default_microphone", default='pulse',
|
||||
help="Default microphone name for SpeechRecognition. "
|
||||
"Run this with 'list' to view available Microphones.", type=str)
|
||||
args = parser.parse_args()
|
||||
|
||||
# The last time a recording was retreived from the queue.
|
||||
phrase_time = None
|
||||
# Current raw audio bytes.
|
||||
last_sample = bytes()
|
||||
# Thread safe Queue for passing data from the threaded recording callback.
|
||||
data_queue = Queue()
|
||||
# We use SpeechRecognizer to record our audio because it has a nice feauture where it can detect when speech ends.
|
||||
recorder = sr.Recognizer()
|
||||
recorder.energy_threshold = args.energy_threshold
|
||||
# Definitely do this, dynamic energy compensation lowers the energy threshold dramtically to a point where the SpeechRecognizer never stops recording.
|
||||
recorder.dynamic_energy_threshold = False
|
||||
|
||||
# Important for linux users.
|
||||
# Prevents permanent application hang and crash by using the wrong Microphone
|
||||
if 'linux' in platform:
|
||||
mic_name = args.default_microphone
|
||||
if not mic_name or mic_name == 'list':
|
||||
print("Available microphone devices are: ")
|
||||
for index, name in enumerate(sr.Microphone.list_microphone_names()):
|
||||
print(f"Microphone with name \"{name}\" found")
|
||||
return
|
||||
else:
|
||||
for index, name in enumerate(sr.Microphone.list_microphone_names()):
|
||||
if mic_name in name:
|
||||
source = sr.Microphone(sample_rate=16000, device_index=index)
|
||||
break
|
||||
else:
|
||||
source = sr.Microphone(sample_rate=16000)
|
||||
|
||||
# Load / Download model
|
||||
model = args.model
|
||||
if args.model != "large" and not args.non_english:
|
||||
model = model + ".en"
|
||||
@ -38,20 +74,6 @@ def main():
|
||||
temp_file = NamedTemporaryFile().name
|
||||
transcription = ['']
|
||||
|
||||
# The last time a recording was retreived from the queue.
|
||||
phrase_time = None
|
||||
# Current raw audio bytes.
|
||||
last_sample = bytes()
|
||||
# Thread safe Queue for passing data from the threaded recording callback.
|
||||
data_queue = Queue()
|
||||
|
||||
# We use SpeechRecognizer to record our audio because it has a nice feauture where it can detect when speech ends.
|
||||
recorder = sr.Recognizer()
|
||||
recorder.energy_threshold = args.energy_threshold
|
||||
# Definitely do this, dynamic energy compensation lowers the energy threshold dramtically to a point where the SpeechRecognizer never stops recording.
|
||||
recorder.dynamic_energy_threshold = False
|
||||
|
||||
source = sr.Microphone(sample_rate=16000)
|
||||
with source:
|
||||
recorder.adjust_for_ambient_noise(source)
|
||||
|
||||
@ -99,7 +121,7 @@ def main():
|
||||
f.write(wav_data.read())
|
||||
|
||||
# Read the transcription.
|
||||
result = audio_model.transcribe(temp_file)
|
||||
result = audio_model.transcribe(temp_file, fp16=torch.cuda.is_available())
|
||||
text = result['text'].strip()
|
||||
|
||||
# If we detected a pause between recordings, add a new item to our transcripion.
|
||||
|
Loading…
Reference in New Issue
Block a user