Kiri tweaks.

2025-03-06 20:27:44 -08:00 · 2025-03-06 20:27:44 -08:00 · 38eba07644
commit 38eba07644
parent f1bdcbdb5e
1 changed files with 38 additions and 9 deletions
--- a/transcribe_demo.py
+++ b/transcribe_demo.py
@ -12,6 +12,7 @@ from queue import Queue
 from time import sleep
 from sys import platform

+import textwrap

 def main():
    parser = argparse.ArgumentParser()
@ -89,6 +90,8 @@ def main():
    # Cue the user that we're ready to go.
    print("Model loaded.\n")

+    audio_data = b''
+
    while True:
        try:
            now = datetime.utcnow()
@ -102,8 +105,12 @@ def main():
                # This is the last time we received new audio data from the queue.
                phrase_time = now

+                # for d in data_queue:
+                #     if d > 0.5:
+                #         print("Got something: ", d)
+
                # Combine audio data from queue
-                audio_data = b''.join(data_queue.queue)
+                audio_data += b''.join(data_queue.queue)
                data_queue.queue.clear()

                # Convert in-ram buffer to something the model can use directly without needing a temp file.
@ -115,22 +122,44 @@ def main():
                result = audio_model.transcribe(audio_np, fp16=torch.cuda.is_available())
                text = result['text'].strip()

-                # If we detected a pause between recordings, add a new item to our transcription.
-                # Otherwise edit the existing one.
+                # # If we detected a pause between recordings, add a new item to our transcription.
+                # # Otherwise edit the existing one.
+                # if phrase_complete:
+                #     transcription.append(text)
+                # else:
+                #     transcription[-1] += text
+                print(text)
+
+                # Update rolling transcription file.
+                f = open("transcription.txt", "w+")
+                output_text = transcription[-4:]
+                output_text.append(text)
+                f.write(" ".join(output_text))
+                f.close()
+
                if phrase_complete:
+
+                    # Append to full transcription.
                    transcription.append(text)
-                else:
-                    transcription[-1] = text
+
+                    # text += "\n"
+                    # f = open("transcription.txt", "w+")
+                    # f.write("\n".join(textwrap.wrap(text)))
+                    # f.close()
+
+                    print("* Phrase complete.")
+                    audio_data = b''
+

                # Clear the console to reprint the updated transcription.
-                os.system('cls' if os.name=='nt' else 'clear')
+                # os.system('cls' if os.name=='nt' else 'clear')
                for line in transcription:
                    print(line)
                # Flush stdout.
                print('', end='', flush=True)
            else:
                # Infinite loops are bad for processors, must sleep.
-                sleep(0.25)
+                sleep(0.01)
        except KeyboardInterrupt:
            break