Elevenlabs Extension Improvement and migration to official API (#1830)

This commit is contained in:
Steve Randall 2023-05-06 14:56:31 +01:00 committed by GitHub
parent 56f6b7052a
commit b03a2ac512
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 107 additions and 52 deletions

View File

@ -1,3 +1 @@
elevenlabslib elevenlabs==0.2.*
soundfile
sounddevice

View File

@ -2,51 +2,55 @@ import re
from pathlib import Path from pathlib import Path
import gradio as gr import gradio as gr
from elevenlabslib import ElevenLabsUser import elevenlabs
from elevenlabslib.helpers import save_bytes_to_path
import modules.shared as shared from modules import chat, shared
from modules.html_generator import chat_html_wrapper
params = { params = {
'activate': True, 'activate': True,
'api_key': '12345', 'api_key': None,
'selected_voice': 'None', 'selected_voice': 'None',
'autoplay': False,
'show_text': True,
} }
initial_voice = ['None'] voices = None
wav_idx = 0 wav_idx = 0
user = ElevenLabsUser(params['api_key'])
user_info = None
# Check if the API is valid and refresh the UI accordingly.
def check_valid_api():
global user, user_info, params
user = ElevenLabsUser(params['api_key'])
user_info = user._get_subscription_data()
print('checking api')
if not params['activate']:
return gr.update(value='Disconnected')
elif user_info is None:
print('Incorrect API Key')
return gr.update(value='Disconnected')
else:
print('Got an API Key!')
return gr.update(value='Connected')
# Once the API is verified, get the available voices and update the dropdown list
def refresh_voices(): def refresh_voices():
global params
your_voices = elevenlabs.voices(api_key=params['api_key'])
voice_names = [voice.name for voice in your_voices]
return voice_names
global user, user_info
your_voices = [None] def refresh_voices_dd():
if user_info is not None: all_voices = refresh_voices()
for voice in user.get_available_voices(): return gr.Dropdown.update(value=all_voices[0], choices=all_voices)
your_voices.append(voice.initialName)
return gr.Dropdown.update(choices=your_voices)
else: def remove_tts_from_history(name1, name2, mode):
return for i, entry in enumerate(shared.history['internal']):
shared.history['visible'][i] = [shared.history['visible'][i][0], entry[1]]
return chat_html_wrapper(shared.history['visible'], name1, name2, mode)
def toggle_text_in_history(name1, name2, mode):
for i, entry in enumerate(shared.history['visible']):
visible_reply = entry[1]
if visible_reply.startswith('<audio'):
if params['show_text']:
reply = shared.history['internal'][i][1]
shared.history['visible'][i] = [
shared.history['visible'][i][0], f"{visible_reply.split('</audio>')[0]}</audio>\n\n{reply}"
]
else:
shared.history['visible'][i] = [
shared.history['visible'][i][0], f"{visible_reply.split('</audio>')[0]}</audio>"
]
return chat_html_wrapper(shared.history['visible'], name1, name2, mode)
def remove_surrounded_chars(string): def remove_surrounded_chars(string):
@ -65,7 +69,14 @@ def input_modifier(string):
This function is applied to your text inputs before This function is applied to your text inputs before
they are fed into the model. they are fed into the model.
""" """
# Remove autoplay from the last reply
if shared.is_chat() and len(shared.history['internal']) > 0:
shared.history['visible'][-1] = [
shared.history['visible'][-1][0],
shared.history['visible'][-1][1].replace('controls autoplay>', 'controls>')
]
if params['activate']:
shared.processing_message = "*Is recording a voice message...*"
return string return string
@ -74,13 +85,12 @@ def output_modifier(string):
This function is applied to the model outputs. This function is applied to the model outputs.
""" """
global params, wav_idx, user, user_info global params, wav_idx
if not params['activate']: if not params['activate']:
return string return string
elif user_info is None:
return string
original_string = string
string = remove_surrounded_chars(string) string = remove_surrounded_chars(string)
string = string.replace('"', '') string = string.replace('"', '')
string = string.replace('', '') string = string.replace('', '')
@ -90,31 +100,78 @@ def output_modifier(string):
if string == '': if string == '':
string = 'empty reply, try regenerating' string = 'empty reply, try regenerating'
output_file = Path(f'extensions/elevenlabs_tts/outputs/{wav_idx:06d}.wav'.format(wav_idx)) output_file = Path(f'extensions/elevenlabs_tts/outputs/{wav_idx:06d}.mp3'.format(wav_idx))
voice = user.get_voices_by_name(params['selected_voice'])[0] print(f'Outputing audio to {str(output_file)}')
audio_data = voice.generate_audio_bytes(string) try:
save_bytes_to_path(Path(f'extensions/elevenlabs_tts/outputs/{wav_idx:06d}.wav'), audio_data) audio = elevenlabs.generate(text=string, voice=params['selected_voice'], model="eleven_monolingual_v1")
elevenlabs.save(audio, str(output_file))
string = f'<audio src="file/{output_file.as_posix()}" controls></audio>' autoplay = 'autoplay' if params['autoplay'] else ''
wav_idx += 1 string = f'<audio src="file/{output_file.as_posix()}" controls {autoplay}></audio>'
wav_idx += 1
except elevenlabs.api.error.UnauthenticatedRateLimitError:
string = "🤖 ElevenLabs Unauthenticated Rate Limit Reached - Please create an API key to continue\n\n"
except elevenlabs.api.error.RateLimitError:
string = "🤖 ElevenLabs API Tier Limit Reached\n\n"
except elevenlabs.api.error.APIError as err:
string = f"🤖 ElevenLabs Error: {err}\n\n"
if params['show_text']:
string += f'\n\n{original_string}'
shared.processing_message = "*Is typing...*"
return string return string
def ui(): def ui():
global voices
if not voices:
voices = refresh_voices()
params['selected_voice'] = voices[0]
# Gradio elements # Gradio elements
with gr.Row(): with gr.Row():
activate = gr.Checkbox(value=params['activate'], label='Activate TTS') activate = gr.Checkbox(value=params['activate'], label='Activate TTS')
connection_status = gr.Textbox(value='Disconnected', label='Connection Status') autoplay = gr.Checkbox(value=params['autoplay'], label='Play TTS automatically')
show_text = gr.Checkbox(value=params['show_text'], label='Show message text under audio player')
voice = gr.Dropdown(value=params['selected_voice'], choices=initial_voice, label='TTS Voice') with gr.Row():
voice = gr.Dropdown(value=params['selected_voice'], choices=voices, label='TTS Voice')
refresh = gr.Button(value='Refresh')
with gr.Row(): with gr.Row():
api_key = gr.Textbox(placeholder="Enter your API key.", label='API Key') api_key = gr.Textbox(placeholder="Enter your API key.", label='API Key')
connect = gr.Button(value='Connect') with gr.Row():
convert = gr.Button('Permanently replace audios with the message texts')
convert_cancel = gr.Button('Cancel', visible=False)
convert_confirm = gr.Button('Confirm (cannot be undone)', variant="stop", visible=False)
# Convert history with confirmation
convert_arr = [convert_confirm, convert, convert_cancel]
convert.click(
lambda: [gr.update(visible=True), gr.update(visible=False),
gr.update(visible=True)], None, convert_arr
)
convert_confirm.click(
lambda: [gr.update(visible=False), gr.update(visible=True),
gr.update(visible=False)], None, convert_arr
)
convert_confirm.click(
remove_tts_from_history, [shared.gradio[k] for k in ['name1', 'name2', 'mode']], shared.gradio['display']
)
convert_confirm.click(lambda: chat.save_history(timestamp=False), [], [], show_progress=False)
convert_cancel.click(
lambda: [gr.update(visible=False), gr.update(visible=True),
gr.update(visible=False)], None, convert_arr
)
# Event functions to update the parameters in the backend # Event functions to update the parameters in the backend
activate.change(lambda x: params.update({'activate': x}), activate, None) activate.change(lambda x: params.update({'activate': x}), activate, None)
voice.change(lambda x: params.update({'selected_voice': x}), voice, None) voice.change(lambda x: params.update({'selected_voice': x}), voice, None)
api_key.change(lambda x: params.update({'api_key': x}), api_key, None) api_key.change(lambda x: params.update({'api_key': x}), api_key, None)
connect.click(check_valid_api, [], connection_status) # connect.click(check_valid_api, [], connection_status)
connect.click(refresh_voices, [], voice) refresh.click(refresh_voices_dd, [], voice)
# Toggle message text in history
show_text.change(lambda x: params.update({"show_text": x}), show_text, None)
show_text.change(
toggle_text_in_history, [shared.gradio[k] for k in ['name1', 'name2', 'mode']], shared.gradio['display']
)
show_text.change(lambda: chat.save_history(timestamp=False), [], [], show_progress=False)
# Event functions to update the parameters in the backend
autoplay.change(lambda x: params.update({"autoplay": x}), autoplay, None)