This commit is contained in:
Georgi Gerganov 2024-12-23 13:28:56 +02:00
parent de014bc339
commit 6eaea63e36
No known key found for this signature in database
GPG Key ID: 449E073F9DC10735
4 changed files with 8 additions and 5 deletions

View File

@ -543,7 +543,7 @@ extern "C" {
// to an n_embd x n_layers buffer starting from layer 1. // to an n_embd x n_layers buffer starting from layer 1.
// il_start and il_end are the layer range the vector should apply to (both inclusive) // il_start and il_end are the layer range the vector should apply to (both inclusive)
// See llama_control_vector_load in common to load a control vector. // See llama_control_vector_load in common to load a control vector.
// TODO: rename to llama_adapter_vec_apply // TODO: rename to llama_adapter_cvec_apply
LLAMA_API int32_t llama_control_vector_apply( LLAMA_API int32_t llama_control_vector_apply(
struct llama_context * lctx, struct llama_context * lctx,
const float * data, const float * data,

View File

@ -9,10 +9,10 @@
#include <vector> #include <vector>
// //
// llama_adapter_vec // llama_adapter_cvec
// //
// TODO: rename to llama_adapter_vec // TODO: rename to llama_adapter_cvec
struct llama_control_vector { struct llama_control_vector {
std::vector<ggml_context_ptr> ctxs; std::vector<ggml_context_ptr> ctxs;
std::vector<ggml_backend_buffer_ptr> bufs; std::vector<ggml_backend_buffer_ptr> bufs;

View File

@ -134,6 +134,7 @@ static bool buft_supported(ggml_backend_buffer_type_t buft, ggml_backend_dev_t d
/*.mem_buffer =*/ NULL, /*.mem_buffer =*/ NULL,
/*.no_alloc =*/ true, /*.no_alloc =*/ true,
}; };
ggml_context_ptr ctx { ggml_init(params) }; ggml_context_ptr ctx { ggml_init(params) };
if (!ctx) { if (!ctx) {
throw std::runtime_error(format("failed to create ggml context")); throw std::runtime_error(format("failed to create ggml context"));
@ -147,6 +148,7 @@ static bool buft_supported(ggml_backend_buffer_type_t buft, ggml_backend_dev_t d
op_tensor->src[i]->buffer = buf.get(); op_tensor->src[i]->buffer = buf.get();
} }
} }
bool op_supported = ggml_backend_dev_supports_op(dev, op_tensor); bool op_supported = ggml_backend_dev_supports_op(dev, op_tensor);
return op_supported; return op_supported;
@ -161,6 +163,7 @@ static ggml_backend_buffer_type_t select_buft(const llama_model::buft_list_t & b
return cur_buft; return cur_buft;
} }
} }
throw std::runtime_error(format("no suitable buffer type found")); throw std::runtime_error(format("no suitable buffer type found"));
} }

View File

@ -334,6 +334,7 @@ struct llama_model {
ggml_backend_dev_t dev; ggml_backend_dev_t dev;
buft_list_t * buft_list; buft_list_t * buft_list;
}; };
layer_dev dev_input = {}; layer_dev dev_input = {};
layer_dev dev_output = {}; layer_dev dev_output = {};
std::vector<layer_dev> dev_layer; std::vector<layer_dev> dev_layer;
@ -348,7 +349,6 @@ struct llama_model {
llama_mmaps mappings; llama_mmaps mappings;
// objects representing data potentially being locked in memory // objects representing data potentially being locked in memory
// TODO: should these be part of llama_context instead?
llama_mlocks mlock_bufs; llama_mlocks mlock_bufs;
llama_mlocks mlock_mmaps; llama_mlocks mlock_mmaps;
@ -371,7 +371,7 @@ std::string llama_model_arch_name (const llama_model & model);
std::string llama_model_type_name (const llama_model & model); std::string llama_model_type_name (const llama_model & model);
std::string llama_model_ftype_name(const llama_model & model); std::string llama_model_ftype_name(const llama_model & model);
// used by llama_adapter_vec // used by llama_adapter_cvec
ggml_backend_buffer_type_t llama_model_select_buft(const llama_model & model, int il); ggml_backend_buffer_type_t llama_model_select_buft(const llama_model & model, int il);
// used by llama_adapter_lora // used by llama_adapter_lora