add llm_build_mm

2024-12-28 15:18:26 +01:00 · 2024-07-07 16:01:05 +02:00 · 2024-07-07 16:01:05 +02:00 · f6d090d7de
commit f6d090d7de
parent b88ce0f892
4 changed files with 221 additions and 278 deletions
--- a/common/common.cpp
+++ b/common/common.cpp
@ -2063,14 +2063,14 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
    for (unsigned int i = 0; i < params.lora_adapter.size(); ++i) {
        const std::string & lora_adapter = std::get<0>(params.lora_adapter[i]);
        float lora_scale = std::get<1>(params.lora_adapter[i]);
-        auto adapter = llama_lora_adapter_init(lctx, lora_adapter.c_str(), lora_scale);
+        auto adapter = llama_lora_adapter_init(model, lora_adapter.c_str());
        if (adapter == nullptr) {
            fprintf(stderr, "%s: error: failed to apply lora adapter\n", __func__);
            llama_free(lctx);
            llama_free_model(model);
            return std::make_tuple(nullptr, nullptr);
        }
-        llama_lora_adapter_apply(lctx, adapter);
+        llama_lora_adapter_set(lctx, adapter, lora_scale);
    }

    if (params.ignore_eos) {
--- a/ggml/src/ggml.c
+++ b/ggml/src/ggml.c
@ -19339,7 +19339,7 @@ void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph

    fprintf(fp, "digraph G {\n");
    fprintf(fp, "  newrank = true;\n");
-    fprintf(fp, "  rankdir = LR;\n");
+    fprintf(fp, "  rankdir = TB;\n");

    for (int i = 0; i < gb->n_nodes; i++) {
        struct ggml_tensor * node = gb->nodes[i];
--- a/include/llama.h
+++ b/include/llama.h
@ -508,19 +508,29 @@ extern "C" {
            const char * fname_out,
            const llama_model_quantize_params * params);

-    // Apply a LoRA adapter to a loaded model
-    // path_base_model is the path to a higher quality model to use as a base for
-    // the layers modified by the adapter. Can be NULL to use the current loaded model.
-    // The model needs to be reloaded before applying a new adapter, otherwise the adapter
-    // will be applied on top of the previous one
+    // Load a LoRA adapter from file
+    // The loaded adapter will be associated to the given model, and will be free when the model is deleted
    LLAMA_API struct llama_lora_adapter * llama_lora_adapter_init(
+            struct llama_model * model,
+            const char * path_lora);
+
+    // Add a loaded LoRA adapter to given context
+    // This will not modify model's weight
+    LLAMA_API int32_t llama_lora_adapter_set(
            struct llama_context * ctx,
-            const char * path_lora,
+            struct llama_lora_adapter * adapter,
            float scale);
-    LLAMA_API int32_t llama_lora_adapter_apply(
+
+    // Remove a LoRA adapter from given context
+    // Return -1 if the adapter is not present in the context
+    LLAMA_API int32_t llama_lora_adapter_remove(
            struct llama_context * ctx,
            struct llama_lora_adapter * adapter);

+    // Manually free a LoRA adapter
+    // Note: loaded adapters will be free when the associated model is deleted
+    LLAMA_API void llama_lora_adapter_free(struct llama_lora_adapter * adapter);
+
    // Apply a loaded control vector to a llama_context, or if data is NULL, clear
    // the currently loaded vector.
    // n_embd should be the size of a single layer's control, and data should point
--- a/src/llama.cpp
+++ b/src/llama.cpp