mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-14 06:19:02 +01:00
minor
This commit is contained in:
parent
de014bc339
commit
6eaea63e36
@ -543,7 +543,7 @@ extern "C" {
|
||||
// to an n_embd x n_layers buffer starting from layer 1.
|
||||
// il_start and il_end are the layer range the vector should apply to (both inclusive)
|
||||
// See llama_control_vector_load in common to load a control vector.
|
||||
// TODO: rename to llama_adapter_vec_apply
|
||||
// TODO: rename to llama_adapter_cvec_apply
|
||||
LLAMA_API int32_t llama_control_vector_apply(
|
||||
struct llama_context * lctx,
|
||||
const float * data,
|
||||
|
@ -9,10 +9,10 @@
|
||||
#include <vector>
|
||||
|
||||
//
|
||||
// llama_adapter_vec
|
||||
// llama_adapter_cvec
|
||||
//
|
||||
|
||||
// TODO: rename to llama_adapter_vec
|
||||
// TODO: rename to llama_adapter_cvec
|
||||
struct llama_control_vector {
|
||||
std::vector<ggml_context_ptr> ctxs;
|
||||
std::vector<ggml_backend_buffer_ptr> bufs;
|
||||
|
@ -134,6 +134,7 @@ static bool buft_supported(ggml_backend_buffer_type_t buft, ggml_backend_dev_t d
|
||||
/*.mem_buffer =*/ NULL,
|
||||
/*.no_alloc =*/ true,
|
||||
};
|
||||
|
||||
ggml_context_ptr ctx { ggml_init(params) };
|
||||
if (!ctx) {
|
||||
throw std::runtime_error(format("failed to create ggml context"));
|
||||
@ -147,6 +148,7 @@ static bool buft_supported(ggml_backend_buffer_type_t buft, ggml_backend_dev_t d
|
||||
op_tensor->src[i]->buffer = buf.get();
|
||||
}
|
||||
}
|
||||
|
||||
bool op_supported = ggml_backend_dev_supports_op(dev, op_tensor);
|
||||
|
||||
return op_supported;
|
||||
@ -161,6 +163,7 @@ static ggml_backend_buffer_type_t select_buft(const llama_model::buft_list_t & b
|
||||
return cur_buft;
|
||||
}
|
||||
}
|
||||
|
||||
throw std::runtime_error(format("no suitable buffer type found"));
|
||||
}
|
||||
|
||||
|
@ -334,6 +334,7 @@ struct llama_model {
|
||||
ggml_backend_dev_t dev;
|
||||
buft_list_t * buft_list;
|
||||
};
|
||||
|
||||
layer_dev dev_input = {};
|
||||
layer_dev dev_output = {};
|
||||
std::vector<layer_dev> dev_layer;
|
||||
@ -348,7 +349,6 @@ struct llama_model {
|
||||
llama_mmaps mappings;
|
||||
|
||||
// objects representing data potentially being locked in memory
|
||||
// TODO: should these be part of llama_context instead?
|
||||
llama_mlocks mlock_bufs;
|
||||
llama_mlocks mlock_mmaps;
|
||||
|
||||
@ -371,7 +371,7 @@ std::string llama_model_arch_name (const llama_model & model);
|
||||
std::string llama_model_type_name (const llama_model & model);
|
||||
std::string llama_model_ftype_name(const llama_model & model);
|
||||
|
||||
// used by llama_adapter_vec
|
||||
// used by llama_adapter_cvec
|
||||
ggml_backend_buffer_type_t llama_model_select_buft(const llama_model & model, int il);
|
||||
|
||||
// used by llama_adapter_lora
|
||||
|
Loading…
Reference in New Issue
Block a user