mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2024-11-22 08:07:56 +01:00
use Gradio microphone input instead
This commit is contained in:
parent
3b4145966d
commit
48aa52849b
@ -1,39 +0,0 @@
|
|||||||
# Installation instructions
|
|
||||||
|
|
||||||
- On all platforms, run `pip install -r requirements.txt` in this folder
|
|
||||||
- You need **PortAudio** to run the speech recognition. Below are guides for all platforms
|
|
||||||
|
|
||||||
|
|
||||||
## Windows
|
|
||||||
|
|
||||||
- You don't need to do anything, `pyaudio` already comes with PortAudio included on Windows.
|
|
||||||
|
|
||||||
## Mac
|
|
||||||
|
|
||||||
```commandline
|
|
||||||
brew install portaudio
|
|
||||||
brew link --overwrite portaudio
|
|
||||||
pip install pyaudio
|
|
||||||
```
|
|
||||||
|
|
||||||
## Linux
|
|
||||||
|
|
||||||
- You have to use your distro's package manager to install PortAudio.
|
|
||||||
|
|
||||||
### Ubuntu / Debian / Mint
|
|
||||||
|
|
||||||
```commandline
|
|
||||||
sudo apt install portaudio19-dev python3-pyaudio
|
|
||||||
```
|
|
||||||
|
|
||||||
### Arch / Manjaro
|
|
||||||
|
|
||||||
```commandline
|
|
||||||
sudo pacman -S portaudio
|
|
||||||
```
|
|
||||||
|
|
||||||
### Fedora
|
|
||||||
|
|
||||||
```commandline
|
|
||||||
sudo dnf -y install portaudio
|
|
||||||
```
|
|
@ -1,5 +1,4 @@
|
|||||||
git+https://github.com/Uberi/speech_recognition.git@010382b
|
git+https://github.com/Uberi/speech_recognition.git@010382b
|
||||||
PyAudio
|
|
||||||
openai-whisper
|
openai-whisper
|
||||||
soundfile
|
soundfile
|
||||||
ffmpeg
|
ffmpeg
|
||||||
|
@ -7,22 +7,24 @@ input_hijack = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def do_stt():
|
def do_stt(audio, text_state=""):
|
||||||
transcription = ""
|
transcription = ""
|
||||||
r = sr.Recognizer()
|
r = sr.Recognizer()
|
||||||
with sr.Microphone() as source:
|
|
||||||
r.adjust_for_ambient_noise(source, 0.2)
|
# Convert to AudioData
|
||||||
audio = r.listen(source)
|
audio_data = sr.AudioData(sample_rate=audio[0], frame_data=audio[1], sample_width=4)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
transcription = r.recognize_whisper(audio, language="english", model="base.en")
|
transcription = r.recognize_whisper(audio_data, language="english", model="base.en")
|
||||||
except sr.UnknownValueError:
|
except sr.UnknownValueError:
|
||||||
print("Whisper could not understand audio")
|
print("Whisper could not understand audio")
|
||||||
except sr.RequestError as e:
|
except sr.RequestError as e:
|
||||||
print("Could not request results from Whisper", e)
|
print("Could not request results from Whisper", e)
|
||||||
|
|
||||||
input_hijack.update({"state": True, "value": [transcription, transcription]})
|
input_hijack.update({"state": True, "value": [transcription, transcription]})
|
||||||
return transcription
|
|
||||||
|
text_state += transcription + " "
|
||||||
|
return text_state, text_state
|
||||||
|
|
||||||
|
|
||||||
def update_hijack(val):
|
def update_hijack(val):
|
||||||
@ -31,7 +33,12 @@ def update_hijack(val):
|
|||||||
|
|
||||||
|
|
||||||
def ui():
|
def ui():
|
||||||
speech_button = gr.Button(value="🎙️")
|
tr_state = gr.State(value="")
|
||||||
output_transcription = gr.Textbox(label="STT-Input", placeholder="Speech Preview. Click \"Generate\" to send", interactive=True)
|
output_transcription = gr.Textbox(label="STT-Input",
|
||||||
output_transcription.change(fn=update_hijack, inputs=[output_transcription])
|
placeholder="Speech Preview. Click \"Generate\" to send",
|
||||||
speech_button.click(do_stt, outputs=[output_transcription])
|
interactive=True)
|
||||||
|
output_transcription.change(fn=update_hijack, inputs=[output_transcription], outputs=[tr_state])
|
||||||
|
with gr.Row():
|
||||||
|
audio = gr.Audio(source="microphone")
|
||||||
|
transcribe_button = gr.Button(value="Transcribe")
|
||||||
|
transcribe_button.click(do_stt, inputs=[audio, tr_state], outputs=[output_transcription, tr_state])
|
||||||
|
Loading…
Reference in New Issue
Block a user