Kiri tweaks.

2025-03-06 20:27:44 -08:00 · 2025-03-06 20:27:44 -08:00 · 38eba07644
commit 38eba07644
parent f1bdcbdb5e
1 changed files with 38 additions and 9 deletions
--- a/transcribe_demo.py
+++ b/transcribe_demo.py
@ -12,6 +12,7 @@ from queue import Queue
 from time import sleep
 from sys import platform
 import textwrap
 def main():
    parser = argparse.ArgumentParser()
@ -89,6 +90,8 @@ def main():
    # Cue the user that we're ready to go.
    print("Model loaded.\n")
    audio_data = b''
    while True:
        try:
            now = datetime.utcnow()
@ -101,11 +104,15 @@ def main():
                    phrase_complete = True
                # This is the last time we received new audio data from the queue.
                phrase_time = now
-                
+
                # for d in data_queue:
                #     if d > 0.5:
                #         print("Got something: ", d)
                # Combine audio data from queue
-                audio_data = b''.join(data_queue.queue)
+                audio_data += b''.join(data_queue.queue)
                data_queue.queue.clear()
-                
+
                # Convert in-ram buffer to something the model can use directly without needing a temp file.
                # Convert data from 16 bit wide integers to floating point with a width of 32 bits.
                # Clamp the audio stream frequency to a PCM wavelength compatible default of 32768hz max.
@ -115,22 +122,44 @@ def main():
                result = audio_model.transcribe(audio_np, fp16=torch.cuda.is_available())
                text = result['text'].strip()
-                # If we detected a pause between recordings, add a new item to our transcription.
+                # # If we detected a pause between recordings, add a new item to our transcription.
-                # Otherwise edit the existing one.
+                # # Otherwise edit the existing one.
                # if phrase_complete:
                #     transcription.append(text)
                # else:
                #     transcription[-1] += text
                print(text)
                # Update rolling transcription file.
                f = open("transcription.txt", "w+")
                output_text = transcription[-4:]
                output_text.append(text)
                f.write(" ".join(output_text))
                f.close()
                if phrase_complete:
                    # Append to full transcription.
                    transcription.append(text)
-                else:
+
-                    transcription[-1] = text
+                    # text += "\n"
                    # f = open("transcription.txt", "w+")
                    # f.write("\n".join(textwrap.wrap(text)))
                    # f.close()
                    print("* Phrase complete.")
                    audio_data = b''
                # Clear the console to reprint the updated transcription.
-                os.system('cls' if os.name=='nt' else 'clear')
+                # os.system('cls' if os.name=='nt' else 'clear')
                for line in transcription:
                    print(line)
                # Flush stdout.
                print('', end='', flush=True)
            else:
                # Infinite loops are bad for processors, must sleep.
-                sleep(0.25)
+                sleep(0.01)
        except KeyboardInterrupt:
            break