mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-14 06:19:02 +01:00
minor
This commit is contained in:
parent
de014bc339
commit
6eaea63e36
@ -543,7 +543,7 @@ extern "C" {
|
|||||||
// to an n_embd x n_layers buffer starting from layer 1.
|
// to an n_embd x n_layers buffer starting from layer 1.
|
||||||
// il_start and il_end are the layer range the vector should apply to (both inclusive)
|
// il_start and il_end are the layer range the vector should apply to (both inclusive)
|
||||||
// See llama_control_vector_load in common to load a control vector.
|
// See llama_control_vector_load in common to load a control vector.
|
||||||
// TODO: rename to llama_adapter_vec_apply
|
// TODO: rename to llama_adapter_cvec_apply
|
||||||
LLAMA_API int32_t llama_control_vector_apply(
|
LLAMA_API int32_t llama_control_vector_apply(
|
||||||
struct llama_context * lctx,
|
struct llama_context * lctx,
|
||||||
const float * data,
|
const float * data,
|
||||||
|
@ -9,10 +9,10 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
//
|
//
|
||||||
// llama_adapter_vec
|
// llama_adapter_cvec
|
||||||
//
|
//
|
||||||
|
|
||||||
// TODO: rename to llama_adapter_vec
|
// TODO: rename to llama_adapter_cvec
|
||||||
struct llama_control_vector {
|
struct llama_control_vector {
|
||||||
std::vector<ggml_context_ptr> ctxs;
|
std::vector<ggml_context_ptr> ctxs;
|
||||||
std::vector<ggml_backend_buffer_ptr> bufs;
|
std::vector<ggml_backend_buffer_ptr> bufs;
|
||||||
|
@ -134,6 +134,7 @@ static bool buft_supported(ggml_backend_buffer_type_t buft, ggml_backend_dev_t d
|
|||||||
/*.mem_buffer =*/ NULL,
|
/*.mem_buffer =*/ NULL,
|
||||||
/*.no_alloc =*/ true,
|
/*.no_alloc =*/ true,
|
||||||
};
|
};
|
||||||
|
|
||||||
ggml_context_ptr ctx { ggml_init(params) };
|
ggml_context_ptr ctx { ggml_init(params) };
|
||||||
if (!ctx) {
|
if (!ctx) {
|
||||||
throw std::runtime_error(format("failed to create ggml context"));
|
throw std::runtime_error(format("failed to create ggml context"));
|
||||||
@ -147,6 +148,7 @@ static bool buft_supported(ggml_backend_buffer_type_t buft, ggml_backend_dev_t d
|
|||||||
op_tensor->src[i]->buffer = buf.get();
|
op_tensor->src[i]->buffer = buf.get();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool op_supported = ggml_backend_dev_supports_op(dev, op_tensor);
|
bool op_supported = ggml_backend_dev_supports_op(dev, op_tensor);
|
||||||
|
|
||||||
return op_supported;
|
return op_supported;
|
||||||
@ -161,6 +163,7 @@ static ggml_backend_buffer_type_t select_buft(const llama_model::buft_list_t & b
|
|||||||
return cur_buft;
|
return cur_buft;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
throw std::runtime_error(format("no suitable buffer type found"));
|
throw std::runtime_error(format("no suitable buffer type found"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -334,6 +334,7 @@ struct llama_model {
|
|||||||
ggml_backend_dev_t dev;
|
ggml_backend_dev_t dev;
|
||||||
buft_list_t * buft_list;
|
buft_list_t * buft_list;
|
||||||
};
|
};
|
||||||
|
|
||||||
layer_dev dev_input = {};
|
layer_dev dev_input = {};
|
||||||
layer_dev dev_output = {};
|
layer_dev dev_output = {};
|
||||||
std::vector<layer_dev> dev_layer;
|
std::vector<layer_dev> dev_layer;
|
||||||
@ -348,7 +349,6 @@ struct llama_model {
|
|||||||
llama_mmaps mappings;
|
llama_mmaps mappings;
|
||||||
|
|
||||||
// objects representing data potentially being locked in memory
|
// objects representing data potentially being locked in memory
|
||||||
// TODO: should these be part of llama_context instead?
|
|
||||||
llama_mlocks mlock_bufs;
|
llama_mlocks mlock_bufs;
|
||||||
llama_mlocks mlock_mmaps;
|
llama_mlocks mlock_mmaps;
|
||||||
|
|
||||||
@ -371,7 +371,7 @@ std::string llama_model_arch_name (const llama_model & model);
|
|||||||
std::string llama_model_type_name (const llama_model & model);
|
std::string llama_model_type_name (const llama_model & model);
|
||||||
std::string llama_model_ftype_name(const llama_model & model);
|
std::string llama_model_ftype_name(const llama_model & model);
|
||||||
|
|
||||||
// used by llama_adapter_vec
|
// used by llama_adapter_cvec
|
||||||
ggml_backend_buffer_type_t llama_model_select_buft(const llama_model & model, int il);
|
ggml_backend_buffer_type_t llama_model_select_buft(const llama_model & model, int il);
|
||||||
|
|
||||||
// used by llama_adapter_lora
|
// used by llama_adapter_lora
|
||||||
|
Loading…
Reference in New Issue
Block a user