context : move adapter code in the implementation [no ci]

2025-01-30 22:03:03 +01:00 · 2025-01-17 12:41:16 +02:00 · 2025-01-17 12:41:16 +02:00 · 2f6d767fc5
commit 2f6d767fc5
parent 587384e8f2
2 changed files with 42 additions and 35 deletions
--- a/src/llama-context.cpp
+++ b/src/llama-context.cpp
@ -1788,6 +1788,43 @@ float * llama_get_embeddings_seq(struct llama_context * ctx, llama_seq_id seq_id
    return it->second.data();
 }

+// llama adapter API
+
+int32_t llama_set_adapter_lora(
+            struct llama_context * ctx,
+            struct llama_adapter_lora * adapter,
+            float scale) {
+    ctx->loras[adapter] = scale;
+    return 0;
+}
+
+int32_t llama_rm_adapter_lora(
+            struct llama_context * ctx,
+            struct llama_adapter_lora * adapter) {
+    auto pos = ctx->loras.find(adapter);
+    if (pos != ctx->loras.end()) {
+        ctx->loras.erase(pos);
+        return 0;
+    }
+
+    return -1;
+}
+
+void llama_clear_adapter_lora(struct llama_context * ctx) {
+    ctx->loras.clear();
+}
+
+int32_t llama_apply_adapter_cvec(
+        struct llama_context * ctx,
+                 const float * data,
+                      size_t   len,
+                     int32_t   n_embd,
+                     int32_t   il_start,
+                     int32_t   il_end) {
+    return ctx->cvec.apply(ctx->model, data, len, n_embd, il_start, il_end);
+}
+
+
 // llama state API

 // deprecated
--- a/src/llama.cpp
+++ b/src/llama.cpp
@ -8322,40 +8322,6 @@ static int llama_encode_impl(
    return 0;
 }

-int32_t llama_set_adapter_lora(
-            struct llama_context * ctx,
-            struct llama_adapter_lora * adapter,
-            float scale) {
-    ctx->loras[adapter] = scale;
-    return 0;
-}
-
-int32_t llama_rm_adapter_lora(
-            struct llama_context * ctx,
-            struct llama_adapter_lora * adapter) {
-    auto pos = ctx->loras.find(adapter);
-    if (pos != ctx->loras.end()) {
-        ctx->loras.erase(pos);
-        return 0;
-    }
-
-    return -1;
-}
-
-void llama_clear_adapter_lora(struct llama_context * ctx) {
-    ctx->loras.clear();
-}
-
-int32_t llama_apply_adapter_cvec(
-        struct llama_context * ctx,
-                 const float * data,
-                      size_t   len,
-                     int32_t   n_embd,
-                     int32_t   il_start,
-                     int32_t   il_end) {
-    return ctx->cvec.apply(ctx->model, data, len, n_embd, il_start, il_end);
-}
-
 //
 // interface implementation
 //
@ -8914,7 +8880,7 @@ struct llama_context * llama_new_context_with_model(
 }

 //
-// kv cache
+// kv cache view
 //

 struct llama_kv_cache_view llama_kv_cache_view_init(const llama_context * ctx, int32_t n_seq_max) {
@ -8925,6 +8891,10 @@ void llama_kv_cache_view_update(const llama_context * ctx, llama_kv_cache_view *
    llama_kv_cache_view_update(view, ctx->kv_self);
 }

+//
+// kv cache
+//
+
 // deprecated
 int32_t llama_get_kv_cache_token_count(const llama_context * ctx) {
    return llama_kv_self_n_tokens(ctx);