diff --git a/transcribe_demo.py b/transcribe_demo.py index 6ea633c..2f2ae45 100644 --- a/transcribe_demo.py +++ b/transcribe_demo.py @@ -12,6 +12,7 @@ from queue import Queue from time import sleep from sys import platform +import textwrap def main(): parser = argparse.ArgumentParser() @@ -89,6 +90,8 @@ def main(): # Cue the user that we're ready to go. print("Model loaded.\n") + audio_data = b'' + while True: try: now = datetime.utcnow() @@ -101,11 +104,15 @@ def main(): phrase_complete = True # This is the last time we received new audio data from the queue. phrase_time = now - + + # for d in data_queue: + # if d > 0.5: + # print("Got something: ", d) + # Combine audio data from queue - audio_data = b''.join(data_queue.queue) + audio_data += b''.join(data_queue.queue) data_queue.queue.clear() - + # Convert in-ram buffer to something the model can use directly without needing a temp file. # Convert data from 16 bit wide integers to floating point with a width of 32 bits. # Clamp the audio stream frequency to a PCM wavelength compatible default of 32768hz max. @@ -115,22 +122,44 @@ def main(): result = audio_model.transcribe(audio_np, fp16=torch.cuda.is_available()) text = result['text'].strip() - # If we detected a pause between recordings, add a new item to our transcription. - # Otherwise edit the existing one. + # # If we detected a pause between recordings, add a new item to our transcription. + # # Otherwise edit the existing one. + # if phrase_complete: + # transcription.append(text) + # else: + # transcription[-1] += text + print(text) + + # Update rolling transcription file. + f = open("transcription.txt", "w+") + output_text = transcription[-4:] + output_text.append(text) + f.write(" ".join(output_text)) + f.close() + if phrase_complete: + + # Append to full transcription. transcription.append(text) - else: - transcription[-1] = text + + # text += "\n" + # f = open("transcription.txt", "w+") + # f.write("\n".join(textwrap.wrap(text))) + # f.close() + + print("* Phrase complete.") + audio_data = b'' + # Clear the console to reprint the updated transcription. - os.system('cls' if os.name=='nt' else 'clear') + # os.system('cls' if os.name=='nt' else 'clear') for line in transcription: print(line) # Flush stdout. print('', end='', flush=True) else: # Infinite loops are bad for processors, must sleep. - sleep(0.25) + sleep(0.01) except KeyboardInterrupt: break