diff --git a/common/arg.cpp b/common/arg.cpp index a6226a34b..c82a335a0 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -403,7 +403,7 @@ static void add_rpc_devices(std::string servers) { for (const auto & server : rpc_servers) { ggml_backend_dev_t dev = ggml_backend_rpc_add_device_fn(server.c_str()); if (dev) { - ggml_backend_device_register(dev); + ggml_backend_device_register(dev, true); } else { throw std::invalid_argument("failed to register RPC device"); } diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h index fc9571c82..3b95430ea 100644 --- a/ggml/include/ggml-backend.h +++ b/ggml/include/ggml-backend.h @@ -203,7 +203,7 @@ extern "C" { // Backend registry // - GGML_API void ggml_backend_device_register(ggml_backend_dev_t device); + GGML_API void ggml_backend_device_register(ggml_backend_dev_t device, bool front); // Backend (reg) enumeration GGML_API size_t ggml_backend_reg_count(void); diff --git a/ggml/src/ggml-backend-reg.cpp b/ggml/src/ggml-backend-reg.cpp index 955ed505f..f536f59fc 100644 --- a/ggml/src/ggml-backend-reg.cpp +++ b/ggml/src/ggml-backend-reg.cpp @@ -210,11 +210,15 @@ struct ggml_backend_registry { } } - void register_device(ggml_backend_dev_t device) { + void register_device(ggml_backend_dev_t device, bool front = false) { #ifndef NDEBUG GGML_LOG_DEBUG("%s: registered device %s (%s)\n", __func__, ggml_backend_dev_name(device), ggml_backend_dev_description(device)); #endif - devices.push_back(device); + if (front) { + devices.insert(devices.begin(), device); + } else { + devices.push_back(device); + } } ggml_backend_reg_t load_backend(const std::wstring & path, bool silent) { @@ -298,8 +302,8 @@ void ggml_backend_register(ggml_backend_reg_t reg) { get_reg().register_backend(reg); } -void ggml_backend_device_register(ggml_backend_dev_t device) { - get_reg().register_device(device); +void ggml_backend_device_register(ggml_backend_dev_t device, bool front) { + get_reg().register_device(device, front); } // Backend (reg) enumeration diff --git a/src/llama-model.cpp b/src/llama-model.cpp index 031b4c30b..18bd0b071 100644 --- a/src/llama-model.cpp +++ b/src/llama-model.cpp @@ -1303,10 +1303,12 @@ bool llama_model::load_tensors(llama_model_loader & ml) { const int act_gpu_layers = devices.empty() ? 0 : std::min(n_gpu_layers, (int)n_layer + 1); auto get_layer_buft_list = [&](int il) -> llama_model::impl::layer_dev { if (il < i_gpu_start || (il - i_gpu_start) >= act_gpu_layers) { + LLAMA_LOG_DEBUG("load_tensors: layer %3d assigned to device %s\n", il, ggml_backend_dev_name(cpu_dev)); return {cpu_dev, &pimpl->cpu_buft_list}; } const int layer_gpu = std::upper_bound(splits.begin(), splits.begin() + n_devices(), float(il - i_gpu_start)/act_gpu_layers) - splits.begin(); auto * dev = devices.at(layer_gpu); + LLAMA_LOG_DEBUG("load_tensors: layer %3d assigned to device %s\n", il, ggml_backend_dev_name(dev)); return {dev, &pimpl->gpu_buft_list.at(dev)}; };