text-generation-webui/extensions/elevenlabs_tts/script.py

import re
from pathlib import Path

import elevenlabs
import gradio as gr
from modules import chat, shared

params = {
    'activate': True,
    'api_key': None,
    'selected_voice': 'None',
    'autoplay': False,
    'show_text': True,
}

voices = None
wav_idx = 0


def refresh_voices():
    global params
    your_voices = elevenlabs.voices(api_key=params['api_key'])
    voice_names = [voice.name for voice in your_voices]
    return voice_names


def refresh_voices_dd():
    all_voices = refresh_voices()
    return gr.Dropdown.update(value=all_voices[0], choices=all_voices)


def remove_tts_from_history():
    for i, entry in enumerate(shared.history['internal']):
        shared.history['visible'][i] = [shared.history['visible'][i][0], entry[1]]


def toggle_text_in_history():
    for i, entry in enumerate(shared.history['visible']):
        visible_reply = entry[1]
        if visible_reply.startswith('<audio'):
            if params['show_text']:
                reply = shared.history['internal'][i][1]
                shared.history['visible'][i] = [
                    shared.history['visible'][i][0], f"{visible_reply.split('</audio>')[0]}</audio>\n\n{reply}"
                ]
            else:
                shared.history['visible'][i] = [
                    shared.history['visible'][i][0], f"{visible_reply.split('</audio>')[0]}</audio>"
                ]


def remove_surrounded_chars(string):
    # this expression matches to 'as few symbols as possible (0 upwards) between any asterisks' OR
    # 'as few symbols as possible (0 upwards) between an asterisk and the end of the string'
    return re.sub('\*[^\*]*?(\*|$)', '', string)


def state_modifier(state):
    state['stream'] = False
    return state


def input_modifier(string):
    """
    This function is applied to your text inputs before
    they are fed into the model.
    """
    # Remove autoplay from the last reply
    if shared.is_chat() and len(shared.history['internal']) > 0:
        shared.history['visible'][-1] = [
            shared.history['visible'][-1][0],
            shared.history['visible'][-1][1].replace('controls autoplay>', 'controls>')
        ]

    if params['activate']:
        shared.processing_message = "*Is recording a voice message...*"

    return string


def output_modifier(string):
    """
    This function is applied to the model outputs.
    """

    global params, wav_idx

    if not params['activate']:
        return string

    original_string = string
    string = remove_surrounded_chars(string)
    string = string.replace('"', '')
    string = string.replace('“', '')
    string = string.replace('\n', ' ')
    string = string.strip()
    if string == '':
        string = 'empty reply, try regenerating'

    output_file = Path(f'extensions/elevenlabs_tts/outputs/{wav_idx:06d}.mp3'.format(wav_idx))
    print(f'Outputing audio to {str(output_file)}')
    try:
        audio = elevenlabs.generate(text=string, voice=params['selected_voice'], model="eleven_monolingual_v1")
        elevenlabs.save(audio, str(output_file))

        autoplay = 'autoplay' if params['autoplay'] else ''
        string = f'<audio src="file/{output_file.as_posix()}" controls {autoplay}></audio>'
        wav_idx += 1
    except elevenlabs.api.error.UnauthenticatedRateLimitError:
        string = "🤖 ElevenLabs Unauthenticated Rate Limit Reached - Please create an API key to continue\n\n"
    except elevenlabs.api.error.RateLimitError:
        string = "🤖 ElevenLabs API Tier Limit Reached\n\n"
    except elevenlabs.api.error.APIError as err:
        string = f"🤖 ElevenLabs Error: {err}\n\n"

    if params['show_text']:
        string += f'\n\n{original_string}'

    shared.processing_message = "*Is typing...*"
    return string


def ui():
    global voices
    if not voices:
        voices = refresh_voices()
        params['selected_voice'] = voices[0]

    # Gradio elements
    with gr.Row():
        activate = gr.Checkbox(value=params['activate'], label='Activate TTS')
        autoplay = gr.Checkbox(value=params['autoplay'], label='Play TTS automatically')
        show_text = gr.Checkbox(value=params['show_text'], label='Show message text under audio player')

    with gr.Row():
        voice = gr.Dropdown(value=params['selected_voice'], choices=voices, label='TTS Voice')
        refresh = gr.Button(value='Refresh')

    with gr.Row():
        api_key = gr.Textbox(placeholder="Enter your API key.", label='API Key')

    with gr.Row():
        convert = gr.Button('Permanently replace audios with the message texts')
        convert_cancel = gr.Button('Cancel', visible=False)
        convert_confirm = gr.Button('Confirm (cannot be undone)', variant="stop", visible=False)

    # Convert history with confirmation
    convert_arr = [convert_confirm, convert, convert_cancel]
    convert.click(lambda: [gr.update(visible=True), gr.update(visible=False), gr.update(visible=True)], None, convert_arr)
    convert_confirm.click(
        lambda: [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, convert_arr).then(
        remove_tts_from_history, None, None).then(
        chat.save_history, shared.gradio['mode'], None, show_progress=False).then(
        chat.redraw_html, shared.reload_inputs, shared.gradio['display'])

    convert_cancel.click(lambda: [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, convert_arr)

    # Toggle message text in history
    show_text.change(
        lambda x: params.update({"show_text": x}), show_text, None).then(
        toggle_text_in_history, None, None).then(
        chat.save_history, shared.gradio['mode'], None, show_progress=False).then(
        chat.redraw_html, shared.reload_inputs, shared.gradio['display'])

    convert_cancel.click(lambda: [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, convert_arr)

    # Event functions to update the parameters in the backend
    activate.change(lambda x: params.update({'activate': x}), activate, None)
    voice.change(lambda x: params.update({'selected_voice': x}), voice, None)
    api_key.change(lambda x: params.update({'api_key': x}), api_key, None)
    # connect.click(check_valid_api, [], connection_status)
    refresh.click(refresh_voices_dd, [], voice)
    # Event functions to update the parameters in the backend
    autoplay.change(lambda x: params.update({"autoplay": x}), autoplay, None)
Minor changes 2023-03-22 19:55:03 +01:00			`import re`
Rename the folder 2023-03-06 23:38:36 +01:00			`from pathlib import Path`
Move new extension to a separate file 2023-03-06 23:28:53 +01:00
Elevenlabs Extension Improvement and migration to official API (#1830) 2023-05-06 15:56:31 +02:00			`import elevenlabs`
Minor fixes to elevenlabs_tts 2023-05-06 15:57:34 +02:00			`import gradio as gr`
Elevenlabs Extension Improvement and migration to official API (#1830) 2023-05-06 15:56:31 +02:00			`from modules import chat, shared`
Sort imports 2023-04-07 19:42:03 +02:00
Move new extension to a separate file 2023-03-06 23:28:53 +01:00			`params = {`
			`'activate': True,`
Elevenlabs Extension Improvement and migration to official API (#1830) 2023-05-06 15:56:31 +02:00			`'api_key': None,`
Move new extension to a separate file 2023-03-06 23:28:53 +01:00			`'selected_voice': 'None',`
Elevenlabs Extension Improvement and migration to official API (#1830) 2023-05-06 15:56:31 +02:00			`'autoplay': False,`
			`'show_text': True,`
Move new extension to a separate file 2023-03-06 23:28:53 +01:00			`}`
Improve readability 2023-03-06 23:46:46 +01:00
Elevenlabs Extension Improvement and migration to official API (#1830) 2023-05-06 15:56:31 +02:00			`voices = None`
Move new extension to a separate file 2023-03-06 23:28:53 +01:00			`wav_idx = 0`


Elevenlabs Extension Improvement and migration to official API (#1830) 2023-05-06 15:56:31 +02:00			`def refresh_voices():`
			`global params`
			`your_voices = elevenlabs.voices(api_key=params['api_key'])`
			`voice_names = [voice.name for voice in your_voices]`
			`return voice_names`
Make the code more like PEP8 for readability (#862) 2023-04-07 05:15:45 +02:00

Elevenlabs Extension Improvement and migration to official API (#1830) 2023-05-06 15:56:31 +02:00			`def refresh_voices_dd():`
			`all_voices = refresh_voices()`
			`return gr.Dropdown.update(value=all_voices[0], choices=all_voices)`

Make the code more like PEP8 for readability (#862) 2023-04-07 05:15:45 +02:00
Refactor chat functions (#2003) 2023-05-11 20:37:04 +02:00			`def remove_tts_from_history():`
Elevenlabs Extension Improvement and migration to official API (#1830) 2023-05-06 15:56:31 +02:00			`for i, entry in enumerate(shared.history['internal']):`
			`shared.history['visible'][i] = [shared.history['visible'][i][0], entry[1]]`
Minor fixes to elevenlabs_tts 2023-05-06 15:57:34 +02:00
Elevenlabs Extension Improvement and migration to official API (#1830) 2023-05-06 15:56:31 +02:00
Refactor chat functions (#2003) 2023-05-11 20:37:04 +02:00			`def toggle_text_in_history():`
Elevenlabs Extension Improvement and migration to official API (#1830) 2023-05-06 15:56:31 +02:00			`for i, entry in enumerate(shared.history['visible']):`
			`visible_reply = entry[1]`
			`if visible_reply.startswith('<audio'):`
			`if params['show_text']:`
			`reply = shared.history['internal'][i][1]`
			`shared.history['visible'][i] = [`
			`shared.history['visible'][i][0], f"{visible_reply.split('</audio>')[0]}</audio>\n\n{reply}"`
			`]`
			`else:`
			`shared.history['visible'][i] = [`
			`shared.history['visible'][i][0], f"{visible_reply.split('</audio>')[0]}</audio>"`
			`]`
Minor fixes to elevenlabs_tts 2023-05-06 15:57:34 +02:00
Make the code more like PEP8 for readability (#862) 2023-04-07 05:15:45 +02:00
Move new extension to a separate file 2023-03-06 23:28:53 +01:00			`def remove_surrounded_chars(string):`
Extensions performance & memory optimisations Reworked remove_surrounded_chars() to use regular expression ( https://regexr.com/7alb5 ) instead of repeated string concatenations for elevenlab_tts, silero_tts, sd_api_pictures. This should be both faster and more robust in handling asterisks. Reduced the memory footprint of send_pictures and sd_api_pictures by scaling the images in the chat to 300 pixels max-side wise. (The user already has the original in case of the sent picture and there's an option to save the SD generation). This should fix history growing annoyingly large with multiple pictures present 2023-03-22 05:47:54 +01:00			`# this expression matches to 'as few symbols as possible (0 upwards) between any asterisks' OR`
			`# 'as few symbols as possible (0 upwards) between an asterisk and the end of the string'`
Make the code more like PEP8 for readability (#862) 2023-04-07 05:15:45 +02:00			`return re.sub('\[^\]?(\\|$)', '', string)`

Move new extension to a separate file 2023-03-06 23:28:53 +01:00
Refactor text_generation.py, add support for custom generation functions (#1817) 2023-05-05 23:53:03 +02:00			`def state_modifier(state):`
			`state['stream'] = False`
			`return state`


Move new extension to a separate file 2023-03-06 23:28:53 +01:00			`def input_modifier(string):`
			`"""`
			`This function is applied to your text inputs before`
			`they are fed into the model.`
			`"""`
Elevenlabs Extension Improvement and migration to official API (#1830) 2023-05-06 15:56:31 +02:00			`# Remove autoplay from the last reply`
			`if shared.is_chat() and len(shared.history['internal']) > 0:`
			`shared.history['visible'][-1] = [`
			`shared.history['visible'][-1][0],`
			`shared.history['visible'][-1][1].replace('controls autoplay>', 'controls>')`
			`]`
Minor fixes to elevenlabs_tts 2023-05-06 15:57:34 +02:00
Elevenlabs Extension Improvement and migration to official API (#1830) 2023-05-06 15:56:31 +02:00			`if params['activate']:`
			`shared.processing_message = "Is recording a voice message..."`
Minor fixes to elevenlabs_tts 2023-05-06 15:57:34 +02:00
Move new extension to a separate file 2023-03-06 23:28:53 +01:00			`return string`

Make the code more like PEP8 for readability (#862) 2023-04-07 05:15:45 +02:00
Move new extension to a separate file 2023-03-06 23:28:53 +01:00			`def output_modifier(string):`
			`"""`
			`This function is applied to the model outputs.`
			`"""`
Improve readability 2023-03-06 23:46:46 +01:00
Remove unused variable 2023-05-06 16:03:12 +02:00			`global params, wav_idx`
Make the code more like PEP8 for readability (#862) 2023-04-07 05:15:45 +02:00
			`if not params['activate']:`
Move new extension to a separate file 2023-03-06 23:28:53 +01:00			`return string`

Elevenlabs Extension Improvement and migration to official API (#1830) 2023-05-06 15:56:31 +02:00			`original_string = string`
Move new extension to a separate file 2023-03-06 23:28:53 +01:00			`string = remove_surrounded_chars(string)`
			`string = string.replace('"', '')`
			`string = string.replace('“', '')`
			`string = string.replace('\n', ' ')`
			`string = string.strip()`
			`if string == '':`
			`string = 'empty reply, try regenerating'`
Make the code more like PEP8 for readability (#862) 2023-04-07 05:15:45 +02:00
Elevenlabs Extension Improvement and migration to official API (#1830) 2023-05-06 15:56:31 +02:00			`output_file = Path(f'extensions/elevenlabs_tts/outputs/{wav_idx:06d}.mp3'.format(wav_idx))`
			`print(f'Outputing audio to {str(output_file)}')`
			`try:`
			`audio = elevenlabs.generate(text=string, voice=params['selected_voice'], model="eleven_monolingual_v1")`
			`elevenlabs.save(audio, str(output_file))`

			`autoplay = 'autoplay' if params['autoplay'] else ''`
			`string = f'<audio src="file/{output_file.as_posix()}" controls {autoplay}></audio>'`
			`wav_idx += 1`
			`except elevenlabs.api.error.UnauthenticatedRateLimitError:`
			`string = "🤖 ElevenLabs Unauthenticated Rate Limit Reached - Please create an API key to continue\n\n"`
			`except elevenlabs.api.error.RateLimitError:`
			`string = "🤖 ElevenLabs API Tier Limit Reached\n\n"`
			`except elevenlabs.api.error.APIError as err:`
			`string = f"🤖 ElevenLabs Error: {err}\n\n"`

			`if params['show_text']:`
			`string += f'\n\n{original_string}'`

			`shared.processing_message = "Is typing..."`
Move new extension to a separate file 2023-03-06 23:28:53 +01:00			`return string`

Make the code more like PEP8 for readability (#862) 2023-04-07 05:15:45 +02:00
Move new extension to a separate file 2023-03-06 23:28:53 +01:00			`def ui():`
Elevenlabs Extension Improvement and migration to official API (#1830) 2023-05-06 15:56:31 +02:00			`global voices`
			`if not voices:`
			`voices = refresh_voices()`
			`params['selected_voice'] = voices[0]`
Minor fixes to elevenlabs_tts 2023-05-06 15:57:34 +02:00
Move new extension to a separate file 2023-03-06 23:28:53 +01:00			`# Gradio elements`
			`with gr.Row():`
			`activate = gr.Checkbox(value=params['activate'], label='Activate TTS')`
Elevenlabs Extension Improvement and migration to official API (#1830) 2023-05-06 15:56:31 +02:00			`autoplay = gr.Checkbox(value=params['autoplay'], label='Play TTS automatically')`
			`show_text = gr.Checkbox(value=params['show_text'], label='Show message text under audio player')`
Minor fixes to elevenlabs_tts 2023-05-06 15:57:34 +02:00
Elevenlabs Extension Improvement and migration to official API (#1830) 2023-05-06 15:56:31 +02:00			`with gr.Row():`
			`voice = gr.Dropdown(value=params['selected_voice'], choices=voices, label='TTS Voice')`
			`refresh = gr.Button(value='Refresh')`
Minor fixes to elevenlabs_tts 2023-05-06 15:57:34 +02:00
Move new extension to a separate file 2023-03-06 23:28:53 +01:00			`with gr.Row():`
			`api_key = gr.Textbox(placeholder="Enter your API key.", label='API Key')`
Minor fixes to elevenlabs_tts 2023-05-06 15:57:34 +02:00
Elevenlabs Extension Improvement and migration to official API (#1830) 2023-05-06 15:56:31 +02:00			`with gr.Row():`
			`convert = gr.Button('Permanently replace audios with the message texts')`
			`convert_cancel = gr.Button('Cancel', visible=False)`
			`convert_confirm = gr.Button('Confirm (cannot be undone)', variant="stop", visible=False)`

			`# Convert history with confirmation`
			`convert_arr = [convert_confirm, convert, convert_cancel]`
Refactor chat functions (#2003) 2023-05-11 20:37:04 +02:00			`convert.click(lambda: [gr.update(visible=True), gr.update(visible=False), gr.update(visible=True)], None, convert_arr)`
Elevenlabs Extension Improvement and migration to official API (#1830) 2023-05-06 15:56:31 +02:00			`convert_confirm.click(`
Refactor chat functions (#2003) 2023-05-11 20:37:04 +02:00			`lambda: [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, convert_arr).then(`
			`remove_tts_from_history, None, None).then(`
			`chat.save_history, shared.gradio['mode'], None, show_progress=False).then(`
			`chat.redraw_html, shared.reload_inputs, shared.gradio['display'])`

			`convert_cancel.click(lambda: [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, convert_arr)`

			`# Toggle message text in history`
			`show_text.change(`
			`lambda x: params.update({"show_text": x}), show_text, None).then(`
			`toggle_text_in_history, None, None).then(`
			`chat.save_history, shared.gradio['mode'], None, show_progress=False).then(`
			`chat.redraw_html, shared.reload_inputs, shared.gradio['display'])`

			`convert_cancel.click(lambda: [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, convert_arr)`
Minor fixes to elevenlabs_tts 2023-05-06 15:57:34 +02:00
Move new extension to a separate file 2023-03-06 23:28:53 +01:00			`# Event functions to update the parameters in the backend`
			`activate.change(lambda x: params.update({'activate': x}), activate, None)`
			`voice.change(lambda x: params.update({'selected_voice': x}), voice, None)`
			`api_key.change(lambda x: params.update({'api_key': x}), api_key, None)`
Elevenlabs Extension Improvement and migration to official API (#1830) 2023-05-06 15:56:31 +02:00			`# connect.click(check_valid_api, [], connection_status)`
			`refresh.click(refresh_voices_dd, [], voice)`
			`# Event functions to update the parameters in the backend`
			`autoplay.change(lambda x: params.update({"autoplay": x}), autoplay, None)`