diff --git a/extensions/whisper_stt/script.py b/extensions/whisper_stt/script.py index 287a1fdd..dcb4dfc9 100644 --- a/extensions/whisper_stt/script.py +++ b/extensions/whisper_stt/script.py @@ -1,6 +1,6 @@ import gradio as gr import speech_recognition as sr -import modules.shared as shared + input_hijack = { 'state': False, @@ -16,25 +16,21 @@ def do_stt(): transcription = "" r = sr.Recognizer() with sr.Microphone() as source: - print("Say something!") r.adjust_for_ambient_noise(source) audio = r.listen(source) - # recognize speech using whisper try: transcription = r.recognize_whisper(audio, language="english", model="tiny.en") - print("Whisper thinks you said " + transcription) except sr.UnknownValueError: print("Whisper could not understand audio") except sr.RequestError as e: - print("Could not request results from Whisper") + print("Could not request results from Whisper", e) - # input_modifier(transcription) input_hijack.update({"state": True, "value": [transcription, transcription]}) return transcription def ui(): - speech_button = gr.Button(value="STT") - output_transcription = gr.Textbox(label="Speech Preview") + speech_button = gr.Button(value="🎙️") + output_transcription = gr.Textbox(label="STT-Preview", placeholder="Speech Preview. Click \"Generate\" to send") speech_button.click(do_stt, outputs=[output_transcription])