mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-24 13:28:50 +01:00
Honor -ngl option for Cuda offloading in llava (#3621)
This commit is contained in:
parent
2a4bcbacea
commit
11dc1091f6
@ -80,6 +80,12 @@ int main(int argc, char ** argv) {
|
||||
llama_backend_init(params.numa);
|
||||
|
||||
llama_model_params model_params = llama_model_default_params();
|
||||
model_params.n_gpu_layers = params.n_gpu_layers;
|
||||
model_params.main_gpu = params.main_gpu;
|
||||
model_params.tensor_split = params.tensor_split;
|
||||
model_params.use_mmap = params.use_mmap;
|
||||
model_params.use_mlock = params.use_mlock;
|
||||
|
||||
llama_model * model = llama_load_model_from_file(params.model.c_str(), model_params);
|
||||
if (model == NULL) {
|
||||
fprintf(stderr , "%s: error: unable to load model\n" , __func__);
|
||||
|
Loading…
Reference in New Issue
Block a user