Document /v1/internal/model/load and fix a bug

This commit is contained in:
oobabooga 2023-11-08 17:41:12 -08:00
parent 1754a3761b
commit 678fd73aef
2 changed files with 24 additions and 1 deletions

View File

@ -46,7 +46,7 @@ def _load_model(data):
unload_model() unload_model()
model_settings = get_model_metadata(model_name) model_settings = get_model_metadata(model_name)
update_model_parameters(model_settings, initial=True) update_model_parameters(model_settings)
# Update shared.args with custom model loading settings # Update shared.args with custom model loading settings
if args: if args:

View File

@ -241,6 +241,29 @@ async def handle_model_info():
@app.post("/v1/internal/model/load") @app.post("/v1/internal/model/load")
async def handle_load_model(request_data: LoadModelRequest): async def handle_load_model(request_data: LoadModelRequest):
'''
This endpoint is experimental and may change in the future.
The "args" parameter can be used to modify flags like "--load-in-4bit"
or "--n-gpu-layers" before loading a model. Example:
"args": {
"load_in_4bit": true,
"n_gpu_layers": 12
}
Note that those settings will remain after loading the model. So you
may need to change them back to load a second model.
The "settings" parameter is also a dict but with keys for the
shared.settings object. It can be used to modify the default instruction
template like this:
"settings": {
"instruction_template": "Alpaca"
}
'''
try: try:
OAImodels._load_model(to_dict(request_data)) OAImodels._load_model(to_dict(request_data))
return JSONResponse(content="OK") return JSONResponse(content="OK")