Minor style changes to silero_tts

This commit is contained in:
oobabooga 2023-03-11 11:17:13 -03:00
parent 33df4bd91f
commit 8f8da6707d

View File

@ -14,11 +14,12 @@ params = {
'model_id': 'v3_en', 'model_id': 'v3_en',
'sample_rate': 48000, 'sample_rate': 48000,
'device': 'cpu', 'device': 'cpu',
'show_text': True, 'show_text': False,
'autoplay': True, 'autoplay': True,
'voice_pitch': 'medium', 'voice_pitch': 'medium',
'voice_speed': 'medium', 'voice_speed': 'medium',
} }
current_params = params.copy() current_params = params.copy()
voices_by_gender = ['en_99', 'en_45', 'en_18', 'en_117', 'en_49', 'en_51', 'en_68', 'en_0', 'en_26', 'en_56', 'en_74', 'en_5', 'en_38', 'en_53', 'en_21', 'en_37', 'en_107', 'en_10', 'en_82', 'en_16', 'en_41', 'en_12', 'en_67', 'en_61', 'en_14', 'en_11', 'en_39', 'en_52', 'en_24', 'en_97', 'en_28', 'en_72', 'en_94', 'en_36', 'en_4', 'en_43', 'en_88', 'en_25', 'en_65', 'en_6', 'en_44', 'en_75', 'en_91', 'en_60', 'en_109', 'en_85', 'en_101', 'en_108', 'en_50', 'en_96', 'en_64', 'en_92', 'en_76', 'en_33', 'en_116', 'en_48', 'en_98', 'en_86', 'en_62', 'en_54', 'en_95', 'en_55', 'en_111', 'en_3', 'en_83', 'en_8', 'en_47', 'en_59', 'en_1', 'en_2', 'en_7', 'en_9', 'en_13', 'en_15', 'en_17', 'en_19', 'en_20', 'en_22', 'en_23', 'en_27', 'en_29', 'en_30', 'en_31', 'en_32', 'en_34', 'en_35', 'en_40', 'en_42', 'en_46', 'en_57', 'en_58', 'en_63', 'en_66', 'en_69', 'en_70', 'en_71', 'en_73', 'en_77', 'en_78', 'en_79', 'en_80', 'en_81', 'en_84', 'en_87', 'en_89', 'en_90', 'en_93', 'en_100', 'en_102', 'en_103', 'en_104', 'en_105', 'en_106', 'en_110', 'en_112', 'en_113', 'en_114', 'en_115'] voices_by_gender = ['en_99', 'en_45', 'en_18', 'en_117', 'en_49', 'en_51', 'en_68', 'en_0', 'en_26', 'en_56', 'en_74', 'en_5', 'en_38', 'en_53', 'en_21', 'en_37', 'en_107', 'en_10', 'en_82', 'en_16', 'en_41', 'en_12', 'en_67', 'en_61', 'en_14', 'en_11', 'en_39', 'en_52', 'en_24', 'en_97', 'en_28', 'en_72', 'en_94', 'en_36', 'en_4', 'en_43', 'en_88', 'en_25', 'en_65', 'en_6', 'en_44', 'en_75', 'en_91', 'en_60', 'en_109', 'en_85', 'en_101', 'en_108', 'en_50', 'en_96', 'en_64', 'en_92', 'en_76', 'en_33', 'en_116', 'en_48', 'en_98', 'en_86', 'en_62', 'en_54', 'en_95', 'en_55', 'en_111', 'en_3', 'en_83', 'en_8', 'en_47', 'en_59', 'en_1', 'en_2', 'en_7', 'en_9', 'en_13', 'en_15', 'en_17', 'en_19', 'en_20', 'en_22', 'en_23', 'en_27', 'en_29', 'en_30', 'en_31', 'en_32', 'en_34', 'en_35', 'en_40', 'en_42', 'en_46', 'en_57', 'en_58', 'en_63', 'en_66', 'en_69', 'en_70', 'en_71', 'en_73', 'en_77', 'en_78', 'en_79', 'en_80', 'en_81', 'en_84', 'en_87', 'en_89', 'en_90', 'en_93', 'en_100', 'en_102', 'en_103', 'en_104', 'en_105', 'en_106', 'en_110', 'en_112', 'en_113', 'en_114', 'en_115']
voice_pitches = ['x-low', 'low', 'medium', 'high', 'x-high'] voice_pitches = ['x-low', 'low', 'medium', 'high', 'x-high']
@ -33,6 +34,7 @@ table = str.maketrans({
"'": "'", "'": "'",
'"': """, '"': """,
}) })
def xmlesc(txt): def xmlesc(txt):
return txt.translate(table) return txt.translate(table)
@ -57,7 +59,8 @@ def input_modifier(string):
This function is applied to your text inputs before This function is applied to your text inputs before
they are fed into the model. they are fed into the model.
""" """
#remove autoplay from previous
# Remove autoplay from previous
if len(shared.history['internal'])>0: if len(shared.history['internal'])>0:
[text, reply] = shared.history['internal'][-1] [text, reply] = shared.history['internal'][-1]
[visible_text, visible_reply] = shared.history['visible'][-1] [visible_text, visible_reply] = shared.history['visible'][-1]
@ -103,13 +106,13 @@ def output_modifier(string):
prosody=f'<prosody rate="{speed}" pitch="{pitch}">' prosody=f'<prosody rate="{speed}" pitch="{pitch}">'
string = '<speak>'+prosody+xmlesc(string)+'</prosody></speak>' string = '<speak>'+prosody+xmlesc(string)+'</prosody></speak>'
current_msg_id=len(shared.history['visible'])#check length here, since output_modifier can run many times on the same message current_msg_id = len(shared.history['visible']) # Check length here, since output_modifier can run many times on the same message
output_file = Path(f'extensions/silero_tts/outputs/{shared.character}_{current_msg_id:06d}.wav') output_file = Path(f'extensions/silero_tts/outputs/{shared.character}_{current_msg_id:06d}.wav')
if not shared.still_streaming and not silent_string: if not shared.still_streaming and not silent_string:
model.save_wav(ssml_text=string, speaker=params['speaker'], sample_rate=int(params['sample_rate']), audio_path=str(output_file)) model.save_wav(ssml_text=string, speaker=params['speaker'], sample_rate=int(params['sample_rate']), audio_path=str(output_file))
string = f'<audio id="audio_{current_msg_id:06d}" src="file/{output_file.as_posix()}" controls autoplay></audio>\n\n' string = f'<audio id="audio_{current_msg_id:06d}" src="file/{output_file.as_posix()}" controls autoplay></audio>\n\n'
else: else:
#placeholder so text doesn't shift around so much # Placeholder so text doesn't shift around so much
string = '<audio controls></audio>\n\n' string = '<audio controls></audio>\n\n'
if params['show_text']: if params['show_text']: