mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-28 15:18:26 +01:00
add llm_build_mm
This commit is contained in:
parent
b88ce0f892
commit
f6d090d7de
@ -2063,14 +2063,14 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
|
|||||||
for (unsigned int i = 0; i < params.lora_adapter.size(); ++i) {
|
for (unsigned int i = 0; i < params.lora_adapter.size(); ++i) {
|
||||||
const std::string & lora_adapter = std::get<0>(params.lora_adapter[i]);
|
const std::string & lora_adapter = std::get<0>(params.lora_adapter[i]);
|
||||||
float lora_scale = std::get<1>(params.lora_adapter[i]);
|
float lora_scale = std::get<1>(params.lora_adapter[i]);
|
||||||
auto adapter = llama_lora_adapter_init(lctx, lora_adapter.c_str(), lora_scale);
|
auto adapter = llama_lora_adapter_init(model, lora_adapter.c_str());
|
||||||
if (adapter == nullptr) {
|
if (adapter == nullptr) {
|
||||||
fprintf(stderr, "%s: error: failed to apply lora adapter\n", __func__);
|
fprintf(stderr, "%s: error: failed to apply lora adapter\n", __func__);
|
||||||
llama_free(lctx);
|
llama_free(lctx);
|
||||||
llama_free_model(model);
|
llama_free_model(model);
|
||||||
return std::make_tuple(nullptr, nullptr);
|
return std::make_tuple(nullptr, nullptr);
|
||||||
}
|
}
|
||||||
llama_lora_adapter_apply(lctx, adapter);
|
llama_lora_adapter_set(lctx, adapter, lora_scale);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (params.ignore_eos) {
|
if (params.ignore_eos) {
|
||||||
|
@ -19339,7 +19339,7 @@ void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph
|
|||||||
|
|
||||||
fprintf(fp, "digraph G {\n");
|
fprintf(fp, "digraph G {\n");
|
||||||
fprintf(fp, " newrank = true;\n");
|
fprintf(fp, " newrank = true;\n");
|
||||||
fprintf(fp, " rankdir = LR;\n");
|
fprintf(fp, " rankdir = TB;\n");
|
||||||
|
|
||||||
for (int i = 0; i < gb->n_nodes; i++) {
|
for (int i = 0; i < gb->n_nodes; i++) {
|
||||||
struct ggml_tensor * node = gb->nodes[i];
|
struct ggml_tensor * node = gb->nodes[i];
|
||||||
|
@ -508,19 +508,29 @@ extern "C" {
|
|||||||
const char * fname_out,
|
const char * fname_out,
|
||||||
const llama_model_quantize_params * params);
|
const llama_model_quantize_params * params);
|
||||||
|
|
||||||
// Apply a LoRA adapter to a loaded model
|
// Load a LoRA adapter from file
|
||||||
// path_base_model is the path to a higher quality model to use as a base for
|
// The loaded adapter will be associated to the given model, and will be free when the model is deleted
|
||||||
// the layers modified by the adapter. Can be NULL to use the current loaded model.
|
|
||||||
// The model needs to be reloaded before applying a new adapter, otherwise the adapter
|
|
||||||
// will be applied on top of the previous one
|
|
||||||
LLAMA_API struct llama_lora_adapter * llama_lora_adapter_init(
|
LLAMA_API struct llama_lora_adapter * llama_lora_adapter_init(
|
||||||
|
struct llama_model * model,
|
||||||
|
const char * path_lora);
|
||||||
|
|
||||||
|
// Add a loaded LoRA adapter to given context
|
||||||
|
// This will not modify model's weight
|
||||||
|
LLAMA_API int32_t llama_lora_adapter_set(
|
||||||
struct llama_context * ctx,
|
struct llama_context * ctx,
|
||||||
const char * path_lora,
|
struct llama_lora_adapter * adapter,
|
||||||
float scale);
|
float scale);
|
||||||
LLAMA_API int32_t llama_lora_adapter_apply(
|
|
||||||
|
// Remove a LoRA adapter from given context
|
||||||
|
// Return -1 if the adapter is not present in the context
|
||||||
|
LLAMA_API int32_t llama_lora_adapter_remove(
|
||||||
struct llama_context * ctx,
|
struct llama_context * ctx,
|
||||||
struct llama_lora_adapter * adapter);
|
struct llama_lora_adapter * adapter);
|
||||||
|
|
||||||
|
// Manually free a LoRA adapter
|
||||||
|
// Note: loaded adapters will be free when the associated model is deleted
|
||||||
|
LLAMA_API void llama_lora_adapter_free(struct llama_lora_adapter * adapter);
|
||||||
|
|
||||||
// Apply a loaded control vector to a llama_context, or if data is NULL, clear
|
// Apply a loaded control vector to a llama_context, or if data is NULL, clear
|
||||||
// the currently loaded vector.
|
// the currently loaded vector.
|
||||||
// n_embd should be the size of a single layer's control, and data should point
|
// n_embd should be the size of a single layer's control, and data should point
|
||||||
|
469
src/llama.cpp
469
src/llama.cpp
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user