diff --git a/include/llama.h b/include/llama.h index 05c0130e4..c34428895 100644 --- a/include/llama.h +++ b/include/llama.h @@ -543,7 +543,7 @@ extern "C" { // to an n_embd x n_layers buffer starting from layer 1. // il_start and il_end are the layer range the vector should apply to (both inclusive) // See llama_control_vector_load in common to load a control vector. - // TODO: rename to llama_adapter_vec_apply + // TODO: rename to llama_adapter_cvec_apply LLAMA_API int32_t llama_control_vector_apply( struct llama_context * lctx, const float * data, diff --git a/src/llama-adapter.h b/src/llama-adapter.h index 7b8ce47a8..24f067db7 100644 --- a/src/llama-adapter.h +++ b/src/llama-adapter.h @@ -9,10 +9,10 @@ #include // -// llama_adapter_vec +// llama_adapter_cvec // -// TODO: rename to llama_adapter_vec +// TODO: rename to llama_adapter_cvec struct llama_control_vector { std::vector ctxs; std::vector bufs; diff --git a/src/llama-model.cpp b/src/llama-model.cpp index ba9a59e39..70e630633 100644 --- a/src/llama-model.cpp +++ b/src/llama-model.cpp @@ -134,6 +134,7 @@ static bool buft_supported(ggml_backend_buffer_type_t buft, ggml_backend_dev_t d /*.mem_buffer =*/ NULL, /*.no_alloc =*/ true, }; + ggml_context_ptr ctx { ggml_init(params) }; if (!ctx) { throw std::runtime_error(format("failed to create ggml context")); @@ -147,6 +148,7 @@ static bool buft_supported(ggml_backend_buffer_type_t buft, ggml_backend_dev_t d op_tensor->src[i]->buffer = buf.get(); } } + bool op_supported = ggml_backend_dev_supports_op(dev, op_tensor); return op_supported; @@ -161,6 +163,7 @@ static ggml_backend_buffer_type_t select_buft(const llama_model::buft_list_t & b return cur_buft; } } + throw std::runtime_error(format("no suitable buffer type found")); } diff --git a/src/llama-model.h b/src/llama-model.h index aa3ff9b0d..5123ac9a0 100644 --- a/src/llama-model.h +++ b/src/llama-model.h @@ -334,6 +334,7 @@ struct llama_model { ggml_backend_dev_t dev; buft_list_t * buft_list; }; + layer_dev dev_input = {}; layer_dev dev_output = {}; std::vector dev_layer; @@ -348,7 +349,6 @@ struct llama_model { llama_mmaps mappings; // objects representing data potentially being locked in memory - // TODO: should these be part of llama_context instead? llama_mlocks mlock_bufs; llama_mlocks mlock_mmaps; @@ -371,7 +371,7 @@ std::string llama_model_arch_name (const llama_model & model); std::string llama_model_type_name (const llama_model & model); std::string llama_model_ftype_name(const llama_model & model); -// used by llama_adapter_vec +// used by llama_adapter_cvec ggml_backend_buffer_type_t llama_model_select_buft(const llama_model & model, int il); // used by llama_adapter_lora