mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2024-11-22 16:17:57 +01:00
45 lines
1.3 KiB
Python
45 lines
1.3 KiB
Python
import gradio as gr
|
|
import speech_recognition as sr
|
|
from modules import shared
|
|
|
|
input_hijack = {
|
|
'state': False,
|
|
'value': ["", ""]
|
|
}
|
|
|
|
|
|
def do_stt(audio):
|
|
transcription = ""
|
|
r = sr.Recognizer()
|
|
|
|
# Convert to AudioData
|
|
audio_data = sr.AudioData(sample_rate=audio[0], frame_data=audio[1], sample_width=4)
|
|
|
|
try:
|
|
transcription = r.recognize_whisper(audio_data, language="english", model="base.en")
|
|
except sr.UnknownValueError:
|
|
print("Whisper could not understand audio")
|
|
except sr.RequestError as e:
|
|
print("Could not request results from Whisper", e)
|
|
|
|
return transcription
|
|
|
|
|
|
def auto_transcribe(audio, auto_submit):
|
|
if audio is None:
|
|
return "", ""
|
|
|
|
transcription = do_stt(audio)
|
|
if auto_submit:
|
|
input_hijack.update({"state": True, "value": [transcription, transcription]})
|
|
|
|
return transcription, None
|
|
|
|
|
|
def ui():
|
|
with gr.Row():
|
|
audio = gr.Audio(source="microphone")
|
|
auto_submit = gr.Checkbox(label='Submit the transcribed audio automatically', value=True)
|
|
audio.change(fn=auto_transcribe, inputs=[audio, auto_submit], outputs=[shared.gradio['textbox'], audio])
|
|
audio.change(None, auto_submit, None, _js="(check) => {if (check) { document.getElementById('Generate').click() }}")
|