text-generation-webui/extensions/openai/models.py

from modules import shared
from modules.logging_colors import logger
from modules.LoRA import add_lora_to_model
from modules.models import load_model, unload_model
from modules.models_settings import get_model_metadata, update_model_parameters
from modules.utils import get_available_loras, get_available_models


def get_current_model_info():
    return {
        'model_name': shared.model_name,
        'lora_names': shared.lora_names,
        'loader': shared.args.loader
    }


def list_models():
    return {'model_names': get_available_models()[1:]}


def list_dummy_models():
    result = {
        "object": "list",
        "data": []
    }

    models = get_available_models()[1:]
    # these are expected by so much, so include some here as a dummy
    for model in ['gpt-3.5-turbo', 'text-embedding-ada-002']:
        result["data"].append(model_info_dict(model))
    for model in models:
        result["data"].append(model_info_dict(model))

    return result


def model_info_dict(model_name: str) -> dict:
    return {
        "id": model_name,
        "object": "model",
        "created": 0,
        "owned_by": "user"
    }


def _load_model(data):
    model_name = data["model_name"]
    args = data.get("args", None)
    settings = data.get("settings", None)

    unload_model()
    model_settings = get_model_metadata(model_name)
    update_model_parameters(model_settings)

    # Update shared.args with custom model loading settings
    if args:
        for k in args:
            if hasattr(shared.args, k):
                setattr(shared.args, k, args[k])

    shared.model, shared.tokenizer = load_model(model_name)

    # Update shared.settings with custom generation defaults
    if settings:
        for k in settings:
            if k in shared.settings:
                shared.settings[k] = settings[k]
                if k == 'truncation_length':
                    logger.info(f"TRUNCATION LENGTH (UPDATED): {shared.settings['truncation_length']}")
                elif k == 'instruction_template':
                    logger.info(f"INSTRUCTION TEMPLATE (UPDATED): {shared.settings['instruction_template']}")


def list_loras():
    return {'lora_names': get_available_loras()[1:]}


def load_loras(lora_names):
    add_lora_to_model(lora_names)


def unload_all_loras():
    add_lora_to_model([])
Read GGUF metadata (#3873) 2023-09-11 23:49:30 +02:00			`from modules import shared`
Add more info messages for truncation / instruction template 2023-11-16 01:13:36 +01:00			`from modules.logging_colors import logger`
Add /v1/internal/lora endpoints (#4652) 2023-11-19 04:35:22 +01:00			`from modules.LoRA import add_lora_to_model`
Add /v1/internal/model/load endpoint (tentative) 2023-11-08 05:58:06 +01:00			`from modules.models import load_model, unload_model`
			`from modules.models_settings import get_model_metadata, update_model_parameters`
Add /v1/internal/lora endpoints (#4652) 2023-11-19 04:35:22 +01:00			`from modules.utils import get_available_loras, get_available_models`
extensions/openai: Major openai extension updates & fixes (#3049) * many openai updates * total reorg & cleanup. * fixups * missing import os for images * +moderations, custom_stopping_strings, more fixes * fix bugs in completion streaming * moderation fix (flagged) * updated moderation categories --------- Co-authored-by: Matthew Ashton <mashton-gitlab@zhero.org> 2023-07-11 23:50:08 +02:00
lint 2023-07-12 20:33:25 +02:00
Add /v1/internal/model-info endpoint 2023-11-08 03:59:02 +01:00			`def get_current_model_info():`
			`return {`
			`'model_name': shared.model_name,`
Add a /v1/internal/chat-prompt endpoint (#5879) 2024-04-19 05:24:46 +02:00			`'lora_names': shared.lora_names,`
			`'loader': shared.args.loader`
Add /v1/internal/model-info endpoint 2023-11-08 03:59:02 +01:00			`}`


Refactor the /v1/models endpoint 2023-11-08 04:59:27 +01:00			`def list_models():`
Add /v1/internal/lora endpoints (#4652) 2023-11-19 04:35:22 +01:00			`return {'model_names': get_available_models()[1:]}`


			`def list_dummy_models():`
Refactor the /v1/models endpoint 2023-11-08 04:59:27 +01:00			`result = {`
extensions/openai: Major openai extension updates & fixes (#3049) * many openai updates * total reorg & cleanup. * fixups * missing import os for images * +moderations, custom_stopping_strings, more fixes * fix bugs in completion streaming * moderation fix (flagged) * updated moderation categories --------- Co-authored-by: Matthew Ashton <mashton-gitlab@zhero.org> 2023-07-11 23:50:08 +02:00			`"object": "list",`
Refactor the /v1/models endpoint 2023-11-08 04:59:27 +01:00			`"data": []`
extensions/openai: Major openai extension updates & fixes (#3049) * many openai updates * total reorg & cleanup. * fixups * missing import os for images * +moderations, custom_stopping_strings, more fixes * fix bugs in completion streaming * moderation fix (flagged) * updated moderation categories --------- Co-authored-by: Matthew Ashton <mashton-gitlab@zhero.org> 2023-07-11 23:50:08 +02:00			`}`

- added available models into dummy models - changed args and settings to the get method to make it more Robust and easier to use 2024-11-18 17:15:29 +01:00			`models = get_available_models()[1:]`
Add /v1/internal/lora endpoints (#4652) 2023-11-19 04:35:22 +01:00			`# these are expected by so much, so include some here as a dummy`
			`for model in ['gpt-3.5-turbo', 'text-embedding-ada-002']:`
Refactor the /v1/models endpoint 2023-11-08 04:59:27 +01:00			`result["data"].append(model_info_dict(model))`
- added available models into dummy models - changed args and settings to the get method to make it more Robust and easier to use 2024-11-18 17:15:29 +01:00			`for model in models:`
			`result["data"].append(model_info_dict(model))`
extensions/openai: Major openai extension updates & fixes (#3049) * many openai updates * total reorg & cleanup. * fixups * missing import os for images * +moderations, custom_stopping_strings, more fixes * fix bugs in completion streaming * moderation fix (flagged) * updated moderation categories --------- Co-authored-by: Matthew Ashton <mashton-gitlab@zhero.org> 2023-07-11 23:50:08 +02:00
Refactor the /v1/models endpoint 2023-11-08 04:59:27 +01:00			`return result`
extensions/openai: Major openai extension updates & fixes (#3049) * many openai updates * total reorg & cleanup. * fixups * missing import os for images * +moderations, custom_stopping_strings, more fixes * fix bugs in completion streaming * moderation fix (flagged) * updated moderation categories --------- Co-authored-by: Matthew Ashton <mashton-gitlab@zhero.org> 2023-07-11 23:50:08 +02:00
Refactor the /v1/models endpoint 2023-11-08 04:59:27 +01:00
			`def model_info_dict(model_name: str) -> dict:`
extensions/openai: Major openai extension updates & fixes (#3049) * many openai updates * total reorg & cleanup. * fixups * missing import os for images * +moderations, custom_stopping_strings, more fixes * fix bugs in completion streaming * moderation fix (flagged) * updated moderation categories --------- Co-authored-by: Matthew Ashton <mashton-gitlab@zhero.org> 2023-07-11 23:50:08 +02:00			`return {`
			`"id": model_name,`
			`"object": "model",`
Refactor the /v1/models endpoint 2023-11-08 04:59:27 +01:00			`"created": 0,`
			`"owned_by": "user"`
extensions/openai: Major openai extension updates & fixes (#3049) * many openai updates * total reorg & cleanup. * fixups * missing import os for images * +moderations, custom_stopping_strings, more fixes * fix bugs in completion streaming * moderation fix (flagged) * updated moderation categories --------- Co-authored-by: Matthew Ashton <mashton-gitlab@zhero.org> 2023-07-11 23:50:08 +02:00			`}`
Refactor the /v1/models endpoint 2023-11-08 04:59:27 +01:00

Add /v1/internal/model/load endpoint (tentative) 2023-11-08 05:58:06 +01:00			`def _load_model(data):`
			`model_name = data["model_name"]`
- added available models into dummy models - changed args and settings to the get method to make it more Robust and easier to use 2024-11-18 17:15:29 +01:00			`args = data.get("args", None)`
			`settings = data.get("settings", None)`
Add /v1/internal/model/load endpoint (tentative) 2023-11-08 05:58:06 +01:00
			`unload_model()`
			`model_settings = get_model_metadata(model_name)`
Document /v1/internal/model/load and fix a bug 2023-11-09 02:41:12 +01:00			`update_model_parameters(model_settings)`
Add /v1/internal/model/load endpoint (tentative) 2023-11-08 05:58:06 +01:00
			`# Update shared.args with custom model loading settings`
			`if args:`
			`for k in args:`
Small bug fix in /v1/internal/model/load 2023-11-08 06:34:13 +01:00			`if hasattr(shared.args, k):`
Add /v1/internal/model/load endpoint (tentative) 2023-11-08 05:58:06 +01:00			`setattr(shared.args, k, args[k])`

			`shared.model, shared.tokenizer = load_model(model_name)`
Make /v1/embeddings functional, add request/response types 2023-11-10 16:34:27 +01:00
Add /v1/internal/model/load endpoint (tentative) 2023-11-08 05:58:06 +01:00			`# Update shared.settings with custom generation defaults`
			`if settings:`
			`for k in settings:`
			`if k in shared.settings:`
			`shared.settings[k] = settings[k]`
Add more info messages for truncation / instruction template 2023-11-16 01:13:36 +01:00			`if k == 'truncation_length':`
			`logger.info(f"TRUNCATION LENGTH (UPDATED): {shared.settings['truncation_length']}")`
			`elif k == 'instruction_template':`
			`logger.info(f"INSTRUCTION TEMPLATE (UPDATED): {shared.settings['instruction_template']}")`
Add /v1/internal/lora endpoints (#4652) 2023-11-19 04:35:22 +01:00

			`def list_loras():`
			`return {'lora_names': get_available_loras()[1:]}`


			`def load_loras(lora_names):`
			`add_lora_to_model(lora_names)`


			`def unload_all_loras():`
			`add_lora_to_model([])`