Auto-submit the whisper extension transcription

This commit is contained in:
oobabooga 2023-04-07 15:57:29 -03:00
parent 1dc464dcb0
commit 5543a5089d
3 changed files with 19 additions and 24 deletions

View File

@ -36,3 +36,8 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
.wrap.svelte-6roggh.svelte-6roggh { .wrap.svelte-6roggh.svelte-6roggh {
max-height: 92.5%; max-height: 92.5%;
} }
/* This is for the microphone button in the whisper extension */
.sm.svelte-1ipelgc {
width: 100%;
}

View File

@ -1,5 +1,6 @@
import gradio as gr import gradio as gr
import speech_recognition as sr import speech_recognition as sr
from modules import shared
input_hijack = { input_hijack = {
'state': False, 'state': False,
@ -7,7 +8,7 @@ input_hijack = {
} }
def do_stt(audio, text_state=""): def do_stt(audio):
transcription = "" transcription = ""
r = sr.Recognizer() r = sr.Recognizer()
@ -21,34 +22,23 @@ def do_stt(audio, text_state=""):
except sr.RequestError as e: except sr.RequestError as e:
print("Could not request results from Whisper", e) print("Could not request results from Whisper", e)
input_hijack.update({"state": True, "value": [transcription, transcription]}) return transcription
text_state += transcription + " "
return text_state, text_state
def update_hijack(val): def auto_transcribe(audio, auto_submit):
input_hijack.update({"state": True, "value": [val, val]})
return val
def auto_transcribe(audio, audio_auto, text_state=""):
if audio is None: if audio is None:
return "", "" return "", ""
if audio_auto:
return do_stt(audio, text_state) transcription = do_stt(audio)
return "", "" if auto_submit:
input_hijack.update({"state": True, "value": [transcription, transcription]})
return transcription, None
def ui(): def ui():
tr_state = gr.State(value="")
output_transcription = gr.Textbox(label="STT-Input",
placeholder="Speech Preview. Click \"Generate\" to send",
interactive=True)
output_transcription.change(fn=update_hijack, inputs=[output_transcription], outputs=[tr_state])
audio_auto = gr.Checkbox(label="Auto-Transcribe", value=True)
with gr.Row(): with gr.Row():
audio = gr.Audio(source="microphone") audio = gr.Audio(source="microphone")
audio.change(fn=auto_transcribe, inputs=[audio, audio_auto, tr_state], outputs=[output_transcription, tr_state]) auto_submit = gr.Checkbox(label='Submit the transcribed audio automatically', value=True)
transcribe_button = gr.Button(value="Transcribe") audio.change(fn=auto_transcribe, inputs=[audio, auto_submit], outputs=[shared.gradio['textbox'], audio])
transcribe_button.click(do_stt, inputs=[audio, tr_state], outputs=[output_transcription, tr_state]) audio.change(None, auto_submit, None, _js="(check) => {if (check) { document.getElementById('Generate').click() }}")

View File

@ -330,7 +330,7 @@ def create_interface():
shared.gradio['display'] = gr.HTML(value=chat_html_wrapper(shared.history['visible'], shared.settings['name1'], shared.settings['name2'], 'cai-chat')) shared.gradio['display'] = gr.HTML(value=chat_html_wrapper(shared.history['visible'], shared.settings['name1'], shared.settings['name2'], 'cai-chat'))
shared.gradio['textbox'] = gr.Textbox(label='Input') shared.gradio['textbox'] = gr.Textbox(label='Input')
with gr.Row(): with gr.Row():
shared.gradio['Generate'] = gr.Button('Generate') shared.gradio['Generate'] = gr.Button('Generate', elem_id='Generate')
shared.gradio['Stop'] = gr.Button('Stop', elem_id="stop") shared.gradio['Stop'] = gr.Button('Stop', elem_id="stop")
with gr.Row(): with gr.Row():
shared.gradio['Impersonate'] = gr.Button('Impersonate') shared.gradio['Impersonate'] = gr.Button('Impersonate')