This commit is contained in:
Georgi Gerganov 2024-12-23 13:28:56 +02:00
parent de014bc339
commit 6eaea63e36
No known key found for this signature in database
GPG Key ID: 449E073F9DC10735
4 changed files with 8 additions and 5 deletions

View File

@ -543,7 +543,7 @@ extern "C" {
// to an n_embd x n_layers buffer starting from layer 1.
// il_start and il_end are the layer range the vector should apply to (both inclusive)
// See llama_control_vector_load in common to load a control vector.
// TODO: rename to llama_adapter_vec_apply
// TODO: rename to llama_adapter_cvec_apply
LLAMA_API int32_t llama_control_vector_apply(
struct llama_context * lctx,
const float * data,

View File

@ -9,10 +9,10 @@
#include <vector>
//
// llama_adapter_vec
// llama_adapter_cvec
//
// TODO: rename to llama_adapter_vec
// TODO: rename to llama_adapter_cvec
struct llama_control_vector {
std::vector<ggml_context_ptr> ctxs;
std::vector<ggml_backend_buffer_ptr> bufs;

View File

@ -134,6 +134,7 @@ static bool buft_supported(ggml_backend_buffer_type_t buft, ggml_backend_dev_t d
/*.mem_buffer =*/ NULL,
/*.no_alloc =*/ true,
};
ggml_context_ptr ctx { ggml_init(params) };
if (!ctx) {
throw std::runtime_error(format("failed to create ggml context"));
@ -147,6 +148,7 @@ static bool buft_supported(ggml_backend_buffer_type_t buft, ggml_backend_dev_t d
op_tensor->src[i]->buffer = buf.get();
}
}
bool op_supported = ggml_backend_dev_supports_op(dev, op_tensor);
return op_supported;
@ -161,6 +163,7 @@ static ggml_backend_buffer_type_t select_buft(const llama_model::buft_list_t & b
return cur_buft;
}
}
throw std::runtime_error(format("no suitable buffer type found"));
}

View File

@ -334,6 +334,7 @@ struct llama_model {
ggml_backend_dev_t dev;
buft_list_t * buft_list;
};
layer_dev dev_input = {};
layer_dev dev_output = {};
std::vector<layer_dev> dev_layer;
@ -348,7 +349,6 @@ struct llama_model {
llama_mmaps mappings;
// objects representing data potentially being locked in memory
// TODO: should these be part of llama_context instead?
llama_mlocks mlock_bufs;
llama_mlocks mlock_mmaps;
@ -371,7 +371,7 @@ std::string llama_model_arch_name (const llama_model & model);
std::string llama_model_type_name (const llama_model & model);
std::string llama_model_ftype_name(const llama_model & model);
// used by llama_adapter_vec
// used by llama_adapter_cvec
ggml_backend_buffer_type_t llama_model_select_buft(const llama_model & model, int il);
// used by llama_adapter_lora