rpc: fix register position

Signed-off-by: thxCode <thxcode0824@gmail.com>
This commit is contained in:
thxCode 2025-01-26 15:13:06 +08:00
parent f35726c2fb
commit b83034a41d
4 changed files with 12 additions and 6 deletions

View File

@ -403,7 +403,7 @@ static void add_rpc_devices(std::string servers) {
for (const auto & server : rpc_servers) { for (const auto & server : rpc_servers) {
ggml_backend_dev_t dev = ggml_backend_rpc_add_device_fn(server.c_str()); ggml_backend_dev_t dev = ggml_backend_rpc_add_device_fn(server.c_str());
if (dev) { if (dev) {
ggml_backend_device_register(dev); ggml_backend_device_register(dev, true);
} else { } else {
throw std::invalid_argument("failed to register RPC device"); throw std::invalid_argument("failed to register RPC device");
} }

View File

@ -203,7 +203,7 @@ extern "C" {
// Backend registry // Backend registry
// //
GGML_API void ggml_backend_device_register(ggml_backend_dev_t device); GGML_API void ggml_backend_device_register(ggml_backend_dev_t device, bool front);
// Backend (reg) enumeration // Backend (reg) enumeration
GGML_API size_t ggml_backend_reg_count(void); GGML_API size_t ggml_backend_reg_count(void);

View File

@ -210,11 +210,15 @@ struct ggml_backend_registry {
} }
} }
void register_device(ggml_backend_dev_t device) { void register_device(ggml_backend_dev_t device, bool front = false) {
#ifndef NDEBUG #ifndef NDEBUG
GGML_LOG_DEBUG("%s: registered device %s (%s)\n", __func__, ggml_backend_dev_name(device), ggml_backend_dev_description(device)); GGML_LOG_DEBUG("%s: registered device %s (%s)\n", __func__, ggml_backend_dev_name(device), ggml_backend_dev_description(device));
#endif #endif
devices.push_back(device); if (front) {
devices.insert(devices.begin(), device);
} else {
devices.push_back(device);
}
} }
ggml_backend_reg_t load_backend(const std::wstring & path, bool silent) { ggml_backend_reg_t load_backend(const std::wstring & path, bool silent) {
@ -298,8 +302,8 @@ void ggml_backend_register(ggml_backend_reg_t reg) {
get_reg().register_backend(reg); get_reg().register_backend(reg);
} }
void ggml_backend_device_register(ggml_backend_dev_t device) { void ggml_backend_device_register(ggml_backend_dev_t device, bool front) {
get_reg().register_device(device); get_reg().register_device(device, front);
} }
// Backend (reg) enumeration // Backend (reg) enumeration

View File

@ -1303,10 +1303,12 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
const int act_gpu_layers = devices.empty() ? 0 : std::min(n_gpu_layers, (int)n_layer + 1); const int act_gpu_layers = devices.empty() ? 0 : std::min(n_gpu_layers, (int)n_layer + 1);
auto get_layer_buft_list = [&](int il) -> llama_model::impl::layer_dev { auto get_layer_buft_list = [&](int il) -> llama_model::impl::layer_dev {
if (il < i_gpu_start || (il - i_gpu_start) >= act_gpu_layers) { if (il < i_gpu_start || (il - i_gpu_start) >= act_gpu_layers) {
LLAMA_LOG_DEBUG("load_tensors: layer %3d assigned to device %s\n", il, ggml_backend_dev_name(cpu_dev));
return {cpu_dev, &pimpl->cpu_buft_list}; return {cpu_dev, &pimpl->cpu_buft_list};
} }
const int layer_gpu = std::upper_bound(splits.begin(), splits.begin() + n_devices(), float(il - i_gpu_start)/act_gpu_layers) - splits.begin(); const int layer_gpu = std::upper_bound(splits.begin(), splits.begin() + n_devices(), float(il - i_gpu_start)/act_gpu_layers) - splits.begin();
auto * dev = devices.at(layer_gpu); auto * dev = devices.at(layer_gpu);
LLAMA_LOG_DEBUG("load_tensors: layer %3d assigned to device %s\n", il, ggml_backend_dev_name(dev));
return {dev, &pimpl->gpu_buft_list.at(dev)}; return {dev, &pimpl->gpu_buft_list.at(dev)};
}; };