mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-13 22:08:55 +01:00
auto scale
This commit is contained in:
parent
703573f608
commit
42415a4874
@ -684,7 +684,7 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
|
|||||||
}
|
}
|
||||||
if (arg == "--lora") {
|
if (arg == "--lora") {
|
||||||
CHECK_ARG
|
CHECK_ARG
|
||||||
params.lora_adapter.emplace_back(argv[i], 1.0f);
|
params.lora_adapter.emplace_back(argv[i], 0.0f);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
if (arg == "--lora-scaled") {
|
if (arg == "--lora-scaled") {
|
||||||
@ -2089,6 +2089,9 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
|
|||||||
llama_free_model(model);
|
llama_free_model(model);
|
||||||
return std::make_tuple(nullptr, nullptr);
|
return std::make_tuple(nullptr, nullptr);
|
||||||
}
|
}
|
||||||
|
if (lora_scale == 0.0f) {
|
||||||
|
lora_scale = llama_lora_adapter_get_default_scale(adapter);
|
||||||
|
}
|
||||||
llama_lora_adapter_set(lctx, adapter, lora_scale);
|
llama_lora_adapter_set(lctx, adapter, lora_scale);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -366,9 +366,11 @@ if __name__ == '__main__':
|
|||||||
lparams: dict[str, Any] = json.load(f)
|
lparams: dict[str, Any] = json.load(f)
|
||||||
|
|
||||||
alpha = lparams["lora_alpha"]
|
alpha = lparams["lora_alpha"]
|
||||||
|
rank = lparams["r"]
|
||||||
|
|
||||||
model_instance.gguf_writer.add_string("training.type", "finetune_lora")
|
model_instance.gguf_writer.add_string("training.type", "finetune_lora")
|
||||||
model_instance.gguf_writer.add_float32("training.lora.alpha", float(alpha))
|
model_instance.gguf_writer.add_float32("training.lora.alpha", float(alpha))
|
||||||
|
model_instance.gguf_writer.add_float32("training.lora.scale", float(alpha) / float(rank))
|
||||||
|
|
||||||
model_instance.gguf_writer.add_quantization_version(gguf.GGML_QUANT_VERSION)
|
model_instance.gguf_writer.add_quantization_version(gguf.GGML_QUANT_VERSION)
|
||||||
logger.info("Exporting model...")
|
logger.info("Exporting model...")
|
||||||
|
@ -513,12 +513,33 @@ extern "C" {
|
|||||||
const char * fname_out,
|
const char * fname_out,
|
||||||
const llama_model_quantize_params * params);
|
const llama_model_quantize_params * params);
|
||||||
|
|
||||||
|
// Apply a loaded control vector to a llama_context, or if data is NULL, clear
|
||||||
|
// the currently loaded vector.
|
||||||
|
// n_embd should be the size of a single layer's control, and data should point
|
||||||
|
// to an n_embd x n_layers buffer starting from layer 1.
|
||||||
|
// il_start and il_end are the layer range the vector should apply to (both inclusive)
|
||||||
|
// See llama_control_vector_load in common to load a control vector.
|
||||||
|
LLAMA_API int32_t llama_control_vector_apply(
|
||||||
|
struct llama_context * lctx,
|
||||||
|
const float * data,
|
||||||
|
size_t len,
|
||||||
|
int32_t n_embd,
|
||||||
|
int32_t il_start,
|
||||||
|
int32_t il_end);
|
||||||
|
|
||||||
|
//
|
||||||
|
// LoRA
|
||||||
|
//
|
||||||
|
|
||||||
// Load a LoRA adapter from file
|
// Load a LoRA adapter from file
|
||||||
// The loaded adapter will be associated to the given model, and will be free when the model is deleted
|
// The loaded adapter will be associated to the given model, and will be free when the model is deleted
|
||||||
LLAMA_API struct llama_lora_adapter * llama_lora_adapter_init(
|
LLAMA_API struct llama_lora_adapter * llama_lora_adapter_init(
|
||||||
struct llama_model * model,
|
struct llama_model * model,
|
||||||
const char * path_lora);
|
const char * path_lora);
|
||||||
|
|
||||||
|
// Get default scale of an adapter
|
||||||
|
LLAMA_API float llama_lora_adapter_get_default_scale(struct llama_lora_adapter * adapter);
|
||||||
|
|
||||||
// Add a loaded LoRA adapter to given context
|
// Add a loaded LoRA adapter to given context
|
||||||
// This will not modify model's weight
|
// This will not modify model's weight
|
||||||
LLAMA_API int32_t llama_lora_adapter_set(
|
LLAMA_API int32_t llama_lora_adapter_set(
|
||||||
@ -536,20 +557,6 @@ extern "C" {
|
|||||||
// Note: loaded adapters will be free when the associated model is deleted
|
// Note: loaded adapters will be free when the associated model is deleted
|
||||||
LLAMA_API void llama_lora_adapter_free(struct llama_lora_adapter * adapter);
|
LLAMA_API void llama_lora_adapter_free(struct llama_lora_adapter * adapter);
|
||||||
|
|
||||||
// Apply a loaded control vector to a llama_context, or if data is NULL, clear
|
|
||||||
// the currently loaded vector.
|
|
||||||
// n_embd should be the size of a single layer's control, and data should point
|
|
||||||
// to an n_embd x n_layers buffer starting from layer 1.
|
|
||||||
// il_start and il_end are the layer range the vector should apply to (both inclusive)
|
|
||||||
// See llama_control_vector_load in common to load a control vector.
|
|
||||||
LLAMA_API int32_t llama_control_vector_apply(
|
|
||||||
struct llama_context * lctx,
|
|
||||||
const float * data,
|
|
||||||
size_t len,
|
|
||||||
int32_t n_embd,
|
|
||||||
int32_t il_start,
|
|
||||||
int32_t il_end);
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// KV cache
|
// KV cache
|
||||||
//
|
//
|
||||||
|
@ -380,6 +380,7 @@ enum llm_kv {
|
|||||||
|
|
||||||
LLM_KV_TRAINING_TYPE,
|
LLM_KV_TRAINING_TYPE,
|
||||||
LLM_KV_TRAINING_LORA_ALPHA,
|
LLM_KV_TRAINING_LORA_ALPHA,
|
||||||
|
LLM_KV_TRAINING_LORA_SCALE,
|
||||||
};
|
};
|
||||||
|
|
||||||
static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
|
static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
|
||||||
@ -476,6 +477,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
|
|||||||
|
|
||||||
{ LLM_KV_TRAINING_TYPE, "training.type" },
|
{ LLM_KV_TRAINING_TYPE, "training.type" },
|
||||||
{ LLM_KV_TRAINING_LORA_ALPHA, "training.lora.alpha" },
|
{ LLM_KV_TRAINING_LORA_ALPHA, "training.lora.alpha" },
|
||||||
|
{ LLM_KV_TRAINING_LORA_SCALE, "training.lora.scale" },
|
||||||
};
|
};
|
||||||
|
|
||||||
struct LLM_KV {
|
struct LLM_KV {
|
||||||
@ -2851,6 +2853,7 @@ struct llama_lora_adapter {
|
|||||||
std::vector<ggml_backend_buffer_t> bufs;
|
std::vector<ggml_backend_buffer_t> bufs;
|
||||||
|
|
||||||
float alpha;
|
float alpha;
|
||||||
|
float scale; // default scale
|
||||||
|
|
||||||
llama_lora_adapter(struct llama_model * base_model): base_model(base_model) {
|
llama_lora_adapter(struct llama_model * base_model): base_model(base_model) {
|
||||||
base_model->lora_adapters.insert(this);
|
base_model->lora_adapters.insert(this);
|
||||||
@ -18578,7 +18581,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void llama_lora_adapter_init_internal(struct llama_model * model, const char * path_lora, struct llama_lora_adapter & adapter) {
|
static void llama_lora_adapter_init_internal(struct llama_model * model, const char * path_lora, struct llama_lora_adapter & adapter) {
|
||||||
LLAMA_LOG_INFO("%s: applying lora adapter from '%s' ...\n", __func__, path_lora);
|
LLAMA_LOG_INFO("%s: loading lora adapter from '%s' ...\n", __func__, path_lora);
|
||||||
|
|
||||||
ggml_context * ctx = nullptr;
|
ggml_context * ctx = nullptr;
|
||||||
struct gguf_init_params meta_gguf_params = {
|
struct gguf_init_params meta_gguf_params = {
|
||||||
@ -18615,6 +18618,7 @@ static void llama_lora_adapter_init_internal(struct llama_model * model, const c
|
|||||||
}
|
}
|
||||||
|
|
||||||
adapter.alpha = get_kv_f32(llm_kv(LLM_KV_TRAINING_LORA_ALPHA));
|
adapter.alpha = get_kv_f32(llm_kv(LLM_KV_TRAINING_LORA_ALPHA));
|
||||||
|
adapter.scale = get_kv_f32(llm_kv(LLM_KV_TRAINING_LORA_SCALE));
|
||||||
}
|
}
|
||||||
|
|
||||||
int n_tensors = gguf_get_n_tensors(ctx_gguf);
|
int n_tensors = gguf_get_n_tensors(ctx_gguf);
|
||||||
@ -18749,6 +18753,10 @@ static void llama_lora_adapter_init_internal(struct llama_model * model, const c
|
|||||||
ggml_free(ctx);
|
ggml_free(ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
float llama_lora_adapter_get_default_scale(struct llama_lora_adapter * adapter) {
|
||||||
|
return adapter->scale;
|
||||||
|
}
|
||||||
|
|
||||||
int32_t llama_lora_adapter_set(
|
int32_t llama_lora_adapter_set(
|
||||||
struct llama_context * ctx,
|
struct llama_context * ctx,
|
||||||
struct llama_lora_adapter * adapter,
|
struct llama_lora_adapter * adapter,
|
||||||
|
Loading…
Reference in New Issue
Block a user