mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2024-11-22 08:07:56 +01:00
Whisper_stt params for model, language, and auto_submit (#3031)
This commit is contained in:
parent
79679b3cfd
commit
acf24ebb49
15
extensions/whisper_stt/readme.md
Normal file
15
extensions/whisper_stt/readme.md
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
# whisper_stt
|
||||||
|
|
||||||
|
Allows you to enter your inputs in chat mode using your microphone.
|
||||||
|
|
||||||
|
## Settings
|
||||||
|
|
||||||
|
To adjust your default settings, you can add the following to your settings.yaml file.
|
||||||
|
|
||||||
|
```
|
||||||
|
whisper_stt-whipser_language: chinese
|
||||||
|
whisper_stt-whipser_model: tiny
|
||||||
|
whisper_stt-auto_submit: False
|
||||||
|
```
|
||||||
|
|
||||||
|
See source documentation for [model names](https://github.com/openai/whisper#available-models-and-languages) and (languages)[https://github.com/openai/whisper/blob/main/whisper/tokenizer.py] you can use.
|
@ -8,8 +8,15 @@ input_hijack = {
|
|||||||
'value': ["", ""]
|
'value': ["", ""]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# parameters which can be customized in settings.json of webui
|
||||||
|
params = {
|
||||||
|
'whipser_language': 'english',
|
||||||
|
'whipser_model': 'small.en',
|
||||||
|
'auto_submit': True
|
||||||
|
}
|
||||||
|
|
||||||
def do_stt(audio):
|
|
||||||
|
def do_stt(audio,whipser_model,whipser_language):
|
||||||
transcription = ""
|
transcription = ""
|
||||||
r = sr.Recognizer()
|
r = sr.Recognizer()
|
||||||
|
|
||||||
@ -17,7 +24,7 @@ def do_stt(audio):
|
|||||||
audio_data = sr.AudioData(sample_rate=audio[0], frame_data=audio[1], sample_width=4)
|
audio_data = sr.AudioData(sample_rate=audio[0], frame_data=audio[1], sample_width=4)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
transcription = r.recognize_whisper(audio_data, language="english", model="base.en")
|
transcription = r.recognize_whisper(audio_data, language=whipser_language, model=whipser_model)
|
||||||
except sr.UnknownValueError:
|
except sr.UnknownValueError:
|
||||||
print("Whisper could not understand audio")
|
print("Whisper could not understand audio")
|
||||||
except sr.RequestError as e:
|
except sr.RequestError as e:
|
||||||
@ -26,11 +33,10 @@ def do_stt(audio):
|
|||||||
return transcription
|
return transcription
|
||||||
|
|
||||||
|
|
||||||
def auto_transcribe(audio, auto_submit):
|
def auto_transcribe(audio, auto_submit,whipser_model,whipser_language):
|
||||||
if audio is None:
|
if audio is None:
|
||||||
return "", ""
|
return "", ""
|
||||||
|
transcription = do_stt(audio,whipser_model,whipser_language)
|
||||||
transcription = do_stt(audio)
|
|
||||||
if auto_submit:
|
if auto_submit:
|
||||||
input_hijack.update({"state": True, "value": [transcription, transcription]})
|
input_hijack.update({"state": True, "value": [transcription, transcription]})
|
||||||
|
|
||||||
@ -38,10 +44,18 @@ def auto_transcribe(audio, auto_submit):
|
|||||||
|
|
||||||
|
|
||||||
def ui():
|
def ui():
|
||||||
|
with gr.Accordion("Whisper STT", open=True):
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
audio = gr.Audio(source="microphone")
|
audio = gr.Audio(source="microphone")
|
||||||
auto_submit = gr.Checkbox(label='Submit the transcribed audio automatically', value=True)
|
with gr.Row():
|
||||||
|
with gr.Accordion("Settings", open=False):
|
||||||
|
auto_submit = gr.Checkbox(label='Submit the transcribed audio automatically', value=params['auto_submit'])
|
||||||
|
whipser_model = gr.Dropdown(label='Whisper Model', value=params['whipser_model'],choices=["tiny.en","base.en", "small.en","medium.en","tiny","base","small","medium","large"])
|
||||||
|
whipser_language = gr.Dropdown(label='Whisper Language', value=params['whipser_language'],choices=["chinese","german","spanish","russian","korean","french","japanese","portuguese","turkish","polish","catalan","dutch","arabic","swedish","italian","indonesian","hindi","finnish","vietnamese","hebrew","ukrainian","greek","malay","czech","romanian","danish","hungarian","tamil","norwegian","thai","urdu","croatian","bulgarian","lithuanian","latin","maori","malayalam","welsh","slovak","telugu","persian","latvian","bengali","serbian","azerbaijani","slovenian","kannada","estonian","macedonian","breton","basque","icelandic","armenian","nepali","mongolian","bosnian","kazakh","albanian","swahili","galician","marathi","punjabi","sinhala","khmer","shona","yoruba","somali","afrikaans","occitan","georgian","belarusian","tajik","sindhi","gujarati","amharic","yiddish","lao","uzbek","faroese","haitian creole","pashto","turkmen","nynorsk","maltese","sanskrit","luxembourgish","myanmar","tibetan","tagalog","malagasy","assamese","tatar","hawaiian","lingala","hausa","bashkir","javanese","sundanese"])
|
||||||
|
|
||||||
audio.change(
|
audio.change(
|
||||||
auto_transcribe, [audio, auto_submit], [shared.gradio['textbox'], audio]).then(
|
auto_transcribe, [audio, auto_submit,whipser_model,whipser_language], [shared.gradio['textbox'], audio]).then(
|
||||||
None, auto_submit, None, _js="(check) => {if (check) { document.getElementById('Generate').click() }}")
|
None, auto_submit, None, _js="(check) => {if (check) { document.getElementById('Generate').click() }}")
|
||||||
|
whipser_model.change(lambda x: params.update({"whipser_model": x}), whipser_model, None)
|
||||||
|
whipser_language.change(lambda x: params.update({"whipser_language": x}), whipser_language, None)
|
||||||
|
auto_submit.change(lambda x: params.update({"auto_submit": x}), auto_submit, None)
|
||||||
|
Loading…
Reference in New Issue
Block a user