Kiri tweaks.

This commit is contained in:
Kiri 2025-03-06 20:27:44 -08:00
parent f1bdcbdb5e
commit 38eba07644

View File

@ -12,6 +12,7 @@ from queue import Queue
from time import sleep from time import sleep
from sys import platform from sys import platform
import textwrap
def main(): def main():
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
@ -89,6 +90,8 @@ def main():
# Cue the user that we're ready to go. # Cue the user that we're ready to go.
print("Model loaded.\n") print("Model loaded.\n")
audio_data = b''
while True: while True:
try: try:
now = datetime.utcnow() now = datetime.utcnow()
@ -102,8 +105,12 @@ def main():
# This is the last time we received new audio data from the queue. # This is the last time we received new audio data from the queue.
phrase_time = now phrase_time = now
# for d in data_queue:
# if d > 0.5:
# print("Got something: ", d)
# Combine audio data from queue # Combine audio data from queue
audio_data = b''.join(data_queue.queue) audio_data += b''.join(data_queue.queue)
data_queue.queue.clear() data_queue.queue.clear()
# Convert in-ram buffer to something the model can use directly without needing a temp file. # Convert in-ram buffer to something the model can use directly without needing a temp file.
@ -115,22 +122,44 @@ def main():
result = audio_model.transcribe(audio_np, fp16=torch.cuda.is_available()) result = audio_model.transcribe(audio_np, fp16=torch.cuda.is_available())
text = result['text'].strip() text = result['text'].strip()
# If we detected a pause between recordings, add a new item to our transcription. # # If we detected a pause between recordings, add a new item to our transcription.
# Otherwise edit the existing one. # # Otherwise edit the existing one.
# if phrase_complete:
# transcription.append(text)
# else:
# transcription[-1] += text
print(text)
# Update rolling transcription file.
f = open("transcription.txt", "w+")
output_text = transcription[-4:]
output_text.append(text)
f.write(" ".join(output_text))
f.close()
if phrase_complete: if phrase_complete:
# Append to full transcription.
transcription.append(text) transcription.append(text)
else:
transcription[-1] = text # text += "\n"
# f = open("transcription.txt", "w+")
# f.write("\n".join(textwrap.wrap(text)))
# f.close()
print("* Phrase complete.")
audio_data = b''
# Clear the console to reprint the updated transcription. # Clear the console to reprint the updated transcription.
os.system('cls' if os.name=='nt' else 'clear') # os.system('cls' if os.name=='nt' else 'clear')
for line in transcription: for line in transcription:
print(line) print(line)
# Flush stdout. # Flush stdout.
print('', end='', flush=True) print('', end='', flush=True)
else: else:
# Infinite loops are bad for processors, must sleep. # Infinite loops are bad for processors, must sleep.
sleep(0.25) sleep(0.01)
except KeyboardInterrupt: except KeyboardInterrupt:
break break