diff --git a/extensions/elevenlabs_tts/script.py b/extensions/elevenlabs_tts/script.py
index 12eba1c9..f74e1047 100644
--- a/extensions/elevenlabs_tts/script.py
+++ b/extensions/elevenlabs_tts/script.py
@@ -1,5 +1,4 @@
import re
-import time
from pathlib import Path
import elevenlabs
@@ -94,7 +93,7 @@ def history_modifier(history):
return history
-def output_modifier(string, state):
+def output_modifier(string):
global params, wav_idx
if not params['activate']:
@@ -109,7 +108,7 @@ def output_modifier(string, state):
if string == '':
string = 'empty reply, try regenerating'
- output_file = Path(f'extensions/elevenlabs_tts/outputs/{state["character_menu"]}_{int(time.time())}.mp3'.format(wav_idx))
+ output_file = Path(f'extensions/elevenlabs_tts/outputs/{wav_idx:06d}.mp3'.format(wav_idx))
print(f'Outputting audio to {str(output_file)}')
try:
audio = elevenlabs.generate(text=string, voice=params['selected_voice'], model=params['model'])
@@ -161,7 +160,7 @@ def ui():
api_key = gr.Textbox(placeholder="Enter your API key.", label='API Key')
with gr.Row():
- tts_model = gr.Dropdown(value=params['model'], choices=LANG_MODELS, label='Language model')
+ model = gr.Dropdown(value=params['model'], choices=LANG_MODELS, label='Language model')
with gr.Row():
convert = gr.Button('Permanently replace audios with the message texts')
@@ -191,7 +190,7 @@ def ui():
activate.change(lambda x: params.update({'activate': x}), activate, None)
voice.change(lambda x: params.update({'selected_voice': x}), voice, None)
api_key.change(update_api_key, api_key, None)
- tts_model.change(lambda x: params.update({'model': x}), tts_model, None)
+ model.change(lambda x: params.update({'model': x}), model, None)
# connect.click(check_valid_api, [], connection_status)
refresh.click(refresh_voices_dd, [], voice)
# Event functions to update the parameters in the backend
diff --git a/extensions/silero_tts/script.py b/extensions/silero_tts/script.py
index 47099cc2..3ecd5bd9 100644
--- a/extensions/silero_tts/script.py
+++ b/extensions/silero_tts/script.py
@@ -49,12 +49,12 @@ def load_model():
model_path = torch_cache_path + "/snakers4_silero-models_master/src/silero/model/" + params['model_id'] + ".pt"
if Path(model_path).is_file():
print(f'\nUsing Silero TTS cached checkpoint found at {torch_cache_path}')
- tts_model, example_text = torch.hub.load(repo_or_dir=torch_cache_path + '/snakers4_silero-models_master/', model='silero_tts', language=params['language'], speaker=params['model_id'], source='local', path=model_path, force_reload=True)
+ model, example_text = torch.hub.load(repo_or_dir=torch_cache_path + '/snakers4_silero-models_master/', model='silero_tts', language=params['language'], speaker=params['model_id'], source='local', path=model_path, force_reload=True)
else:
print(f'\nSilero TTS cache not found at {torch_cache_path}. Attempting to download...')
- tts_model, example_text = torch.hub.load(repo_or_dir='snakers4/silero-models', model='silero_tts', language=params['language'], speaker=params['model_id'])
- tts_model.to(params['device'])
- return tts_model
+ model, example_text = torch.hub.load(repo_or_dir='snakers4/silero-models', model='silero_tts', language=params['language'], speaker=params['model_id'])
+ model.to(params['device'])
+ return model
def remove_tts_from_history(history):
@@ -105,10 +105,10 @@ def history_modifier(history):
def output_modifier(string, state):
- global tts_model, current_params, streaming_state
+ global model, current_params, streaming_state
for i in params:
if params[i] != current_params[i]:
- tts_model = load_model()
+ model = load_model()
current_params = params.copy()
break
@@ -124,7 +124,7 @@ def output_modifier(string, state):
output_file = Path(f'extensions/silero_tts/outputs/{state["character_menu"]}_{int(time.time())}.wav')
prosody = ''.format(params['voice_speed'], params['voice_pitch'])
silero_input = f'{prosody}{xmlesc(string)}'
- tts_model.save_wav(ssml_text=silero_input, speaker=params['speaker'], sample_rate=int(params['sample_rate']), audio_path=str(output_file))
+ model.save_wav(ssml_text=silero_input, speaker=params['speaker'], sample_rate=int(params['sample_rate']), audio_path=str(output_file))
autoplay = 'autoplay' if params['autoplay'] else ''
string = f''
@@ -136,8 +136,8 @@ def output_modifier(string, state):
def setup():
- global tts_model
- tts_model = load_model()
+ global model
+ model = load_model()
def ui():
diff --git a/modules/shared.py b/modules/shared.py
index f0a426a0..d805d30f 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -13,7 +13,6 @@ is_seq2seq = False
model_name = "None"
lora_names = []
model_dirty_from_training = False
-tts_model = None
# Chat variables
stop_everything = False