Document /v1/internal/model/load and fix a bug

2024-11-22 08:07:56 +01:00 · 2023-11-08 17:41:12 -08:00 · 2023-11-08 17:41:12 -08:00 · 678fd73aef
commit 678fd73aef
parent 1754a3761b
2 changed files with 24 additions and 1 deletions
--- a/extensions/openai/models.py
+++ b/extensions/openai/models.py
@ -46,7 +46,7 @@ def _load_model(data):
    unload_model()
    model_settings = get_model_metadata(model_name)
-    update_model_parameters(model_settings, initial=True)
+    update_model_parameters(model_settings)
    # Update shared.args with custom model loading settings
    if args:
--- a/extensions/openai/script.py
+++ b/extensions/openai/script.py
@ -241,6 +241,29 @@ async def handle_model_info():
@app.post("/v1/internal/model/load")
 async def handle_load_model(request_data: LoadModelRequest):
    '''
    This endpoint is experimental and may change in the future.
    The "args" parameter can be used to modify flags like "--load-in-4bit"
    or "--n-gpu-layers" before loading a model. Example:
    "args": {
      "load_in_4bit": true,
      "n_gpu_layers": 12
    }
    Note that those settings will remain after loading the model. So you
    may need to change them back to load a second model.
    The "settings" parameter is also a dict but with keys for the
    shared.settings object. It can be used to modify the default instruction
    template like this:
    "settings": {
      "instruction_template": "Alpaca"
    }
    '''
    try:
        OAImodels._load_model(to_dict(request_data))
        return JSONResponse(content="OK")