mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-27 20:43:07 +01:00
Merge branch 'master' into convert-py
This commit is contained in:
commit
acf8f4b20f
@ -118,6 +118,7 @@ as the main playground for developing new features for the [ggml](https://github
|
|||||||
- Python: [abetlen/llama-cpp-python](https://github.com/abetlen/llama-cpp-python)
|
- Python: [abetlen/llama-cpp-python](https://github.com/abetlen/llama-cpp-python)
|
||||||
- Go: [go-skynet/go-llama.cpp](https://github.com/go-skynet/go-llama.cpp)
|
- Go: [go-skynet/go-llama.cpp](https://github.com/go-skynet/go-llama.cpp)
|
||||||
- Node.js: [withcatai/node-llama-cpp](https://github.com/withcatai/node-llama-cpp)
|
- Node.js: [withcatai/node-llama-cpp](https://github.com/withcatai/node-llama-cpp)
|
||||||
|
- JS/TS (llama.cpp server client): [lgrammel/modelfusion](https://modelfusion.dev/integration/model-provider/llamacpp)
|
||||||
- Ruby: [yoshoku/llama_cpp.rb](https://github.com/yoshoku/llama_cpp.rb)
|
- Ruby: [yoshoku/llama_cpp.rb](https://github.com/yoshoku/llama_cpp.rb)
|
||||||
- Rust: [mdrokz/rust-llama.cpp](https://github.com/mdrokz/rust-llama.cpp)
|
- Rust: [mdrokz/rust-llama.cpp](https://github.com/mdrokz/rust-llama.cpp)
|
||||||
- C#/.NET: [SciSharp/LLamaSharp](https://github.com/SciSharp/LLamaSharp)
|
- C#/.NET: [SciSharp/LLamaSharp](https://github.com/SciSharp/LLamaSharp)
|
||||||
|
@ -138,6 +138,7 @@ struct cmd_params {
|
|||||||
std::vector<int> n_threads;
|
std::vector<int> n_threads;
|
||||||
std::vector<int> n_gpu_layers;
|
std::vector<int> n_gpu_layers;
|
||||||
std::vector<int> main_gpu;
|
std::vector<int> main_gpu;
|
||||||
|
std::vector<bool> no_kv_offload;
|
||||||
std::vector<bool> mul_mat_q;
|
std::vector<bool> mul_mat_q;
|
||||||
std::vector<std::array<float, LLAMA_MAX_DEVICES>> tensor_split;
|
std::vector<std::array<float, LLAMA_MAX_DEVICES>> tensor_split;
|
||||||
int reps;
|
int reps;
|
||||||
@ -155,6 +156,7 @@ static const cmd_params cmd_params_defaults = {
|
|||||||
/* n_threads */ {get_num_physical_cores()},
|
/* n_threads */ {get_num_physical_cores()},
|
||||||
/* n_gpu_layers */ {99},
|
/* n_gpu_layers */ {99},
|
||||||
/* main_gpu */ {0},
|
/* main_gpu */ {0},
|
||||||
|
/* no_kv_offload */ {false},
|
||||||
/* mul_mat_q */ {true},
|
/* mul_mat_q */ {true},
|
||||||
/* tensor_split */ {{}},
|
/* tensor_split */ {{}},
|
||||||
/* reps */ 5,
|
/* reps */ 5,
|
||||||
@ -176,6 +178,7 @@ static void print_usage(int /* argc */, char ** argv) {
|
|||||||
printf(" -t, --threads <n> (default: %s)\n", join(cmd_params_defaults.n_threads, ",").c_str());
|
printf(" -t, --threads <n> (default: %s)\n", join(cmd_params_defaults.n_threads, ",").c_str());
|
||||||
printf(" -ngl, --n-gpu-layers <n> (default: %s)\n", join(cmd_params_defaults.n_gpu_layers, ",").c_str());
|
printf(" -ngl, --n-gpu-layers <n> (default: %s)\n", join(cmd_params_defaults.n_gpu_layers, ",").c_str());
|
||||||
printf(" -mg, --main-gpu <i> (default: %s)\n", join(cmd_params_defaults.main_gpu, ",").c_str());
|
printf(" -mg, --main-gpu <i> (default: %s)\n", join(cmd_params_defaults.main_gpu, ",").c_str());
|
||||||
|
printf(" -nkvo, --no-kv-offload <0|1> (default: %s)\n", join(cmd_params_defaults.no_kv_offload, ",").c_str());
|
||||||
printf(" -mmq, --mul-mat-q <0|1> (default: %s)\n", join(cmd_params_defaults.mul_mat_q, ",").c_str());
|
printf(" -mmq, --mul-mat-q <0|1> (default: %s)\n", join(cmd_params_defaults.mul_mat_q, ",").c_str());
|
||||||
printf(" -ts, --tensor_split <ts0/ts1/..> \n");
|
printf(" -ts, --tensor_split <ts0/ts1/..> \n");
|
||||||
printf(" -r, --repetitions <n> (default: %d)\n", cmd_params_defaults.reps);
|
printf(" -r, --repetitions <n> (default: %d)\n", cmd_params_defaults.reps);
|
||||||
@ -309,6 +312,13 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
params.main_gpu = split<int>(argv[i], split_delim);
|
params.main_gpu = split<int>(argv[i], split_delim);
|
||||||
|
} else if (arg == "-nkvo" || arg == "--no-kv-offload") {
|
||||||
|
if (++i >= argc) {
|
||||||
|
invalid_param = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
auto p = split<bool>(argv[i], split_delim);
|
||||||
|
params.no_kv_offload.insert(params.no_kv_offload.end(), p.begin(), p.end());
|
||||||
} else if (arg == "-mmq" || arg == "--mul-mat-q") {
|
} else if (arg == "-mmq" || arg == "--mul-mat-q") {
|
||||||
if (++i >= argc) {
|
if (++i >= argc) {
|
||||||
invalid_param = true;
|
invalid_param = true;
|
||||||
@ -383,6 +393,7 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
|
|||||||
if (params.type_v.empty()) { params.type_v = cmd_params_defaults.type_v; }
|
if (params.type_v.empty()) { params.type_v = cmd_params_defaults.type_v; }
|
||||||
if (params.n_gpu_layers.empty()) { params.n_gpu_layers = cmd_params_defaults.n_gpu_layers; }
|
if (params.n_gpu_layers.empty()) { params.n_gpu_layers = cmd_params_defaults.n_gpu_layers; }
|
||||||
if (params.main_gpu.empty()) { params.main_gpu = cmd_params_defaults.main_gpu; }
|
if (params.main_gpu.empty()) { params.main_gpu = cmd_params_defaults.main_gpu; }
|
||||||
|
if (params.no_kv_offload.empty()){ params.no_kv_offload = cmd_params_defaults.no_kv_offload; }
|
||||||
if (params.mul_mat_q.empty()) { params.mul_mat_q = cmd_params_defaults.mul_mat_q; }
|
if (params.mul_mat_q.empty()) { params.mul_mat_q = cmd_params_defaults.mul_mat_q; }
|
||||||
if (params.tensor_split.empty()) { params.tensor_split = cmd_params_defaults.tensor_split; }
|
if (params.tensor_split.empty()) { params.tensor_split = cmd_params_defaults.tensor_split; }
|
||||||
if (params.n_threads.empty()) { params.n_threads = cmd_params_defaults.n_threads; }
|
if (params.n_threads.empty()) { params.n_threads = cmd_params_defaults.n_threads; }
|
||||||
@ -400,6 +411,7 @@ struct cmd_params_instance {
|
|||||||
int n_threads;
|
int n_threads;
|
||||||
int n_gpu_layers;
|
int n_gpu_layers;
|
||||||
int main_gpu;
|
int main_gpu;
|
||||||
|
bool no_kv_offload;
|
||||||
bool mul_mat_q;
|
bool mul_mat_q;
|
||||||
std::array<float, LLAMA_MAX_DEVICES> tensor_split;
|
std::array<float, LLAMA_MAX_DEVICES> tensor_split;
|
||||||
|
|
||||||
@ -428,6 +440,7 @@ struct cmd_params_instance {
|
|||||||
cparams.type_k = type_k;
|
cparams.type_k = type_k;
|
||||||
cparams.type_v = type_v;
|
cparams.type_v = type_v;
|
||||||
cparams.mul_mat_q = mul_mat_q;
|
cparams.mul_mat_q = mul_mat_q;
|
||||||
|
cparams.offload_kqv = !no_kv_offload;
|
||||||
|
|
||||||
return cparams;
|
return cparams;
|
||||||
}
|
}
|
||||||
@ -444,6 +457,7 @@ static std::vector<cmd_params_instance> get_cmd_params_instances_int(const cmd_p
|
|||||||
for (const auto & tk : params.type_k)
|
for (const auto & tk : params.type_k)
|
||||||
for (const auto & tv : params.type_v)
|
for (const auto & tv : params.type_v)
|
||||||
for (const auto & mmq : params.mul_mat_q)
|
for (const auto & mmq : params.mul_mat_q)
|
||||||
|
for (const auto & nkvo : params.no_kv_offload)
|
||||||
for (const auto & nt : params.n_threads) {
|
for (const auto & nt : params.n_threads) {
|
||||||
cmd_params_instance instance = {
|
cmd_params_instance instance = {
|
||||||
/* .model = */ m,
|
/* .model = */ m,
|
||||||
@ -455,6 +469,7 @@ static std::vector<cmd_params_instance> get_cmd_params_instances_int(const cmd_p
|
|||||||
/* .n_threads = */ nt,
|
/* .n_threads = */ nt,
|
||||||
/* .n_gpu_layers = */ nl,
|
/* .n_gpu_layers = */ nl,
|
||||||
/* .main_gpu = */ mg,
|
/* .main_gpu = */ mg,
|
||||||
|
/* .no_kv_offload= */ nkvo,
|
||||||
/* .mul_mat_q = */ mmq,
|
/* .mul_mat_q = */ mmq,
|
||||||
/* .tensor_split = */ ts,
|
/* .tensor_split = */ ts,
|
||||||
};
|
};
|
||||||
@ -476,6 +491,7 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
|
|||||||
for (const auto & tk : params.type_k)
|
for (const auto & tk : params.type_k)
|
||||||
for (const auto & tv : params.type_v)
|
for (const auto & tv : params.type_v)
|
||||||
for (const auto & mmq : params.mul_mat_q)
|
for (const auto & mmq : params.mul_mat_q)
|
||||||
|
for (const auto & nkvo : params.no_kv_offload)
|
||||||
for (const auto & nt : params.n_threads) {
|
for (const auto & nt : params.n_threads) {
|
||||||
for (const auto & n_prompt : params.n_prompt) {
|
for (const auto & n_prompt : params.n_prompt) {
|
||||||
if (n_prompt == 0) {
|
if (n_prompt == 0) {
|
||||||
@ -491,6 +507,7 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
|
|||||||
/* .n_threads = */ nt,
|
/* .n_threads = */ nt,
|
||||||
/* .n_gpu_layers = */ nl,
|
/* .n_gpu_layers = */ nl,
|
||||||
/* .main_gpu = */ mg,
|
/* .main_gpu = */ mg,
|
||||||
|
/* .no_kv_offload= */ nkvo,
|
||||||
/* .mul_mat_q = */ mmq,
|
/* .mul_mat_q = */ mmq,
|
||||||
/* .tensor_split = */ ts,
|
/* .tensor_split = */ ts,
|
||||||
};
|
};
|
||||||
@ -511,6 +528,7 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
|
|||||||
/* .n_threads = */ nt,
|
/* .n_threads = */ nt,
|
||||||
/* .n_gpu_layers = */ nl,
|
/* .n_gpu_layers = */ nl,
|
||||||
/* .main_gpu = */ mg,
|
/* .main_gpu = */ mg,
|
||||||
|
/* .no_kv_offload= */ nkvo,
|
||||||
/* .mul_mat_q = */ mmq,
|
/* .mul_mat_q = */ mmq,
|
||||||
/* .tensor_split = */ ts,
|
/* .tensor_split = */ ts,
|
||||||
};
|
};
|
||||||
@ -559,6 +577,7 @@ struct test {
|
|||||||
ggml_type type_v;
|
ggml_type type_v;
|
||||||
int n_gpu_layers;
|
int n_gpu_layers;
|
||||||
int main_gpu;
|
int main_gpu;
|
||||||
|
bool no_kv_offload;
|
||||||
bool mul_mat_q;
|
bool mul_mat_q;
|
||||||
std::array<float, LLAMA_MAX_DEVICES> tensor_split;
|
std::array<float, LLAMA_MAX_DEVICES> tensor_split;
|
||||||
int n_prompt;
|
int n_prompt;
|
||||||
@ -579,6 +598,7 @@ struct test {
|
|||||||
type_v = inst.type_v;
|
type_v = inst.type_v;
|
||||||
n_gpu_layers = inst.n_gpu_layers;
|
n_gpu_layers = inst.n_gpu_layers;
|
||||||
main_gpu = inst.main_gpu;
|
main_gpu = inst.main_gpu;
|
||||||
|
no_kv_offload = inst.no_kv_offload;
|
||||||
mul_mat_q = inst.mul_mat_q;
|
mul_mat_q = inst.mul_mat_q;
|
||||||
tensor_split = inst.tensor_split;
|
tensor_split = inst.tensor_split;
|
||||||
n_prompt = inst.n_prompt;
|
n_prompt = inst.n_prompt;
|
||||||
@ -640,7 +660,8 @@ struct test {
|
|||||||
"cpu_info", "gpu_info",
|
"cpu_info", "gpu_info",
|
||||||
"model_filename", "model_type", "model_size", "model_n_params",
|
"model_filename", "model_type", "model_size", "model_n_params",
|
||||||
"n_batch", "n_threads", "type_k", "type_v",
|
"n_batch", "n_threads", "type_k", "type_v",
|
||||||
"n_gpu_layers", "main_gpu", "mul_mat_q", "tensor_split",
|
"n_gpu_layers", "main_gpu", "no_kv_offload",
|
||||||
|
"mul_mat_q", "tensor_split",
|
||||||
"n_prompt", "n_gen", "test_time",
|
"n_prompt", "n_gen", "test_time",
|
||||||
"avg_ns", "stddev_ns",
|
"avg_ns", "stddev_ns",
|
||||||
"avg_ts", "stddev_ts"
|
"avg_ts", "stddev_ts"
|
||||||
@ -659,7 +680,7 @@ struct test {
|
|||||||
return INT;
|
return INT;
|
||||||
}
|
}
|
||||||
if (field == "cuda" || field == "opencl" || field == "metal" || field == "gpu_blas" || field == "blas" ||
|
if (field == "cuda" || field == "opencl" || field == "metal" || field == "gpu_blas" || field == "blas" ||
|
||||||
field == "f16_kv" || field == "mul_mat_q") {
|
field == "f16_kv" || field == "no_kv_offload" || field == "mul_mat_q") {
|
||||||
return BOOL;
|
return BOOL;
|
||||||
}
|
}
|
||||||
if (field == "avg_ts" || field == "stddev_ts") {
|
if (field == "avg_ts" || field == "stddev_ts") {
|
||||||
@ -690,7 +711,8 @@ struct test {
|
|||||||
cpu_info, gpu_info,
|
cpu_info, gpu_info,
|
||||||
model_filename, model_type, std::to_string(model_size), std::to_string(model_n_params),
|
model_filename, model_type, std::to_string(model_size), std::to_string(model_n_params),
|
||||||
std::to_string(n_batch), std::to_string(n_threads), ggml_type_name(type_k), ggml_type_name(type_v),
|
std::to_string(n_batch), std::to_string(n_threads), ggml_type_name(type_k), ggml_type_name(type_v),
|
||||||
std::to_string(n_gpu_layers), std::to_string(main_gpu), std::to_string(mul_mat_q), tensor_split_str,
|
std::to_string(n_gpu_layers), std::to_string(main_gpu), std::to_string(no_kv_offload),
|
||||||
|
std::to_string(mul_mat_q), tensor_split_str,
|
||||||
std::to_string(n_prompt), std::to_string(n_gen), test_time,
|
std::to_string(n_prompt), std::to_string(n_gen), test_time,
|
||||||
std::to_string(avg_ns()), std::to_string(stdev_ns()),
|
std::to_string(avg_ns()), std::to_string(stdev_ns()),
|
||||||
std::to_string(avg_ts()), std::to_string(stdev_ts())
|
std::to_string(avg_ts()), std::to_string(stdev_ts())
|
||||||
@ -851,6 +873,9 @@ struct markdown_printer : public printer {
|
|||||||
if (field == "mul_mat_q") {
|
if (field == "mul_mat_q") {
|
||||||
return "mmq";
|
return "mmq";
|
||||||
}
|
}
|
||||||
|
if (field == "no_kv_offload") {
|
||||||
|
return "nkvo";
|
||||||
|
}
|
||||||
if (field == "tensor_split") {
|
if (field == "tensor_split") {
|
||||||
return "ts";
|
return "ts";
|
||||||
}
|
}
|
||||||
@ -885,6 +910,9 @@ struct markdown_printer : public printer {
|
|||||||
if (params.mul_mat_q.size() > 1 || params.mul_mat_q != cmd_params_defaults.mul_mat_q) {
|
if (params.mul_mat_q.size() > 1 || params.mul_mat_q != cmd_params_defaults.mul_mat_q) {
|
||||||
fields.push_back("mul_mat_q");
|
fields.push_back("mul_mat_q");
|
||||||
}
|
}
|
||||||
|
if (params.no_kv_offload.size() > 1 || params.no_kv_offload != cmd_params_defaults.no_kv_offload) {
|
||||||
|
fields.push_back("no_kv_offload");
|
||||||
|
}
|
||||||
if (params.tensor_split.size() > 1 || params.tensor_split != cmd_params_defaults.tensor_split) {
|
if (params.tensor_split.size() > 1 || params.tensor_split != cmd_params_defaults.tensor_split) {
|
||||||
fields.push_back("tensor_split");
|
fields.push_back("tensor_split");
|
||||||
}
|
}
|
||||||
|
@ -1,8 +1,5 @@
|
|||||||
import Foundation
|
import Foundation
|
||||||
|
import llama
|
||||||
// To use this in your own project, add llama.cpp as a swift package dependency
|
|
||||||
// and uncomment this import line.
|
|
||||||
// import llama
|
|
||||||
|
|
||||||
enum LlamaError: Error {
|
enum LlamaError: Error {
|
||||||
case couldNotInitializeContext
|
case couldNotInitializeContext
|
||||||
|
@ -1,5 +0,0 @@
|
|||||||
//
|
|
||||||
// Use this file to import your target's public headers that you would like to expose to Swift.
|
|
||||||
//
|
|
||||||
|
|
||||||
#import "llama.h"
|
|
@ -7,52 +7,31 @@
|
|||||||
objects = {
|
objects = {
|
||||||
|
|
||||||
/* Begin PBXBuildFile section */
|
/* Begin PBXBuildFile section */
|
||||||
542376082B0D9BFB008E6A1C /* ggml-quants.c in Sources */ = {isa = PBXBuildFile; fileRef = 542376072B0D9BFB008E6A1C /* ggml-quants.c */; settings = {COMPILER_FLAGS = "-O3"; }; };
|
|
||||||
5423760B2B0D9C4B008E6A1C /* ggml-backend.c in Sources */ = {isa = PBXBuildFile; fileRef = 5423760A2B0D9C4B008E6A1C /* ggml-backend.c */; settings = {COMPILER_FLAGS = "-O3"; }; };
|
|
||||||
542EA09D2AC8723900A8AEE9 /* ggml.c in Sources */ = {isa = PBXBuildFile; fileRef = 542EA09B2AC8723900A8AEE9 /* ggml.c */; settings = {COMPILER_FLAGS = "-DGGML_USE_ACCELERATE -DGGML_USE_METAL -DGGML_USE_K_QUANTS -O3"; }; };
|
|
||||||
542EA0A02AC8725700A8AEE9 /* ggml-alloc.c in Sources */ = {isa = PBXBuildFile; fileRef = 542EA09F2AC8725700A8AEE9 /* ggml-alloc.c */; settings = {COMPILER_FLAGS = "-O3"; }; };
|
|
||||||
542EA0A32AC8729100A8AEE9 /* llama.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 542EA0A12AC8729100A8AEE9 /* llama.cpp */; settings = {COMPILER_FLAGS = "-DGGML_USE_K_QUANTS -DGGML_USE_METAL -O3"; }; };
|
|
||||||
549479CB2AC9E16000E0F78B /* Metal.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 549479CA2AC9E16000E0F78B /* Metal.framework */; };
|
549479CB2AC9E16000E0F78B /* Metal.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 549479CA2AC9E16000E0F78B /* Metal.framework */; };
|
||||||
549479CD2AC9E42A00E0F78B /* ggml-metal.m in Sources */ = {isa = PBXBuildFile; fileRef = 549479C52AC9E0F200E0F78B /* ggml-metal.m */; settings = {COMPILER_FLAGS = "-fno-objc-arc -DGGML_SWIFT -DGGML_USE_METAL -O3"; }; };
|
|
||||||
7FA3D2B32B2EA2F600543F92 /* DownloadButton.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7FA3D2B22B2EA2F600543F92 /* DownloadButton.swift */; };
|
7FA3D2B32B2EA2F600543F92 /* DownloadButton.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7FA3D2B22B2EA2F600543F92 /* DownloadButton.swift */; };
|
||||||
8A1C83772AC328BD0096AF73 /* llama_swiftuiApp.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8A1C83762AC328BD0096AF73 /* llama_swiftuiApp.swift */; };
|
8A1C83772AC328BD0096AF73 /* llama_swiftuiApp.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8A1C83762AC328BD0096AF73 /* llama_swiftuiApp.swift */; };
|
||||||
8A1C83792AC328BD0096AF73 /* ContentView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8A1C83782AC328BD0096AF73 /* ContentView.swift */; };
|
8A1C83792AC328BD0096AF73 /* ContentView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8A1C83782AC328BD0096AF73 /* ContentView.swift */; };
|
||||||
8A1C837B2AC328BE0096AF73 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 8A1C837A2AC328BE0096AF73 /* Assets.xcassets */; };
|
8A1C837B2AC328BE0096AF73 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 8A1C837A2AC328BE0096AF73 /* Assets.xcassets */; };
|
||||||
8A1C837E2AC328BE0096AF73 /* Preview Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 8A1C837D2AC328BE0096AF73 /* Preview Assets.xcassets */; };
|
|
||||||
8A39BE0A2AC7601100BFEB40 /* Accelerate.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 8A39BE092AC7601000BFEB40 /* Accelerate.framework */; };
|
8A39BE0A2AC7601100BFEB40 /* Accelerate.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 8A39BE092AC7601000BFEB40 /* Accelerate.framework */; };
|
||||||
8A3F84242AC4C891005E2EE8 /* models in Resources */ = {isa = PBXBuildFile; fileRef = 8A3F84232AC4C891005E2EE8 /* models */; };
|
8A3F84242AC4C891005E2EE8 /* models in Resources */ = {isa = PBXBuildFile; fileRef = 8A3F84232AC4C891005E2EE8 /* models */; };
|
||||||
8A907F332AC7138A006146EA /* LibLlama.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8A907F322AC7134E006146EA /* LibLlama.swift */; };
|
8A907F332AC7138A006146EA /* LibLlama.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8A907F322AC7134E006146EA /* LibLlama.swift */; };
|
||||||
8A9F7C4D2AC332EE008AE1EA /* LlamaState.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8A9F7C4C2AC332EE008AE1EA /* LlamaState.swift */; };
|
8A9F7C4D2AC332EE008AE1EA /* LlamaState.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8A9F7C4C2AC332EE008AE1EA /* LlamaState.swift */; };
|
||||||
F1FE20DC2B465C4500B45541 /* ggml-metal.metal in Resources */ = {isa = PBXBuildFile; fileRef = 549479C82AC9E10B00E0F78B /* ggml-metal.metal */; };
|
DF810E132B4A5BA200301144 /* llama in Frameworks */ = {isa = PBXBuildFile; productRef = DF810E122B4A5BA200301144 /* llama */; };
|
||||||
F1FE20E22B465ECA00B45541 /* LoadCustomButton.swift in Sources */ = {isa = PBXBuildFile; fileRef = F1FE20E12B465EC900B45541 /* LoadCustomButton.swift */; };
|
F1FE20E22B465ECA00B45541 /* LoadCustomButton.swift in Sources */ = {isa = PBXBuildFile; fileRef = F1FE20E12B465EC900B45541 /* LoadCustomButton.swift */; };
|
||||||
/* End PBXBuildFile section */
|
/* End PBXBuildFile section */
|
||||||
|
|
||||||
/* Begin PBXFileReference section */
|
/* Begin PBXFileReference section */
|
||||||
542376062B0D9BEA008E6A1C /* ggml-quants.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-quants.h"; path = "../../ggml-quants.h"; sourceTree = "<group>"; };
|
|
||||||
542376072B0D9BFB008E6A1C /* ggml-quants.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "ggml-quants.c"; path = "../../ggml-quants.c"; sourceTree = "<group>"; };
|
|
||||||
542376092B0D9C40008E6A1C /* ggml-backend.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = "ggml-backend.h"; path = "../../ggml-backend.h"; sourceTree = "<group>"; };
|
|
||||||
5423760A2B0D9C4B008E6A1C /* ggml-backend.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "ggml-backend.c"; path = "../../ggml-backend.c"; sourceTree = "<group>"; };
|
|
||||||
542EA09B2AC8723900A8AEE9 /* ggml.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = ggml.c; path = ../../ggml.c; sourceTree = "<group>"; };
|
|
||||||
542EA09C2AC8723900A8AEE9 /* ggml.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ggml.h; path = ../../ggml.h; sourceTree = "<group>"; };
|
|
||||||
542EA09E2AC8725700A8AEE9 /* ggml-alloc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-alloc.h"; path = "../../ggml-alloc.h"; sourceTree = "<group>"; };
|
|
||||||
542EA09F2AC8725700A8AEE9 /* ggml-alloc.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "ggml-alloc.c"; path = "../../ggml-alloc.c"; sourceTree = "<group>"; };
|
|
||||||
542EA0A12AC8729100A8AEE9 /* llama.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = llama.cpp; path = ../../llama.cpp; sourceTree = "<group>"; };
|
|
||||||
542EA0A22AC8729100A8AEE9 /* llama.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = llama.h; path = ../../llama.h; sourceTree = "<group>"; };
|
|
||||||
549479C52AC9E0F200E0F78B /* ggml-metal.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = "ggml-metal.m"; path = "../../ggml-metal.m"; sourceTree = "<group>"; };
|
|
||||||
549479C62AC9E0F200E0F78B /* ggml-metal.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-metal.h"; path = "../../ggml-metal.h"; sourceTree = "<group>"; };
|
|
||||||
549479C82AC9E10B00E0F78B /* ggml-metal.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; name = "ggml-metal.metal"; path = "../../ggml-metal.metal"; sourceTree = "<group>"; };
|
|
||||||
549479CA2AC9E16000E0F78B /* Metal.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Metal.framework; path = System/Library/Frameworks/Metal.framework; sourceTree = SDKROOT; };
|
549479CA2AC9E16000E0F78B /* Metal.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Metal.framework; path = System/Library/Frameworks/Metal.framework; sourceTree = SDKROOT; };
|
||||||
7FA3D2B22B2EA2F600543F92 /* DownloadButton.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = DownloadButton.swift; sourceTree = "<group>"; };
|
7FA3D2B22B2EA2F600543F92 /* DownloadButton.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = DownloadButton.swift; sourceTree = "<group>"; };
|
||||||
8A08D20A2AC73B1500FE6CD4 /* bridging-header.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "bridging-header.h"; sourceTree = "<group>"; };
|
|
||||||
8A1C83732AC328BD0096AF73 /* llama.swiftui.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = llama.swiftui.app; sourceTree = BUILT_PRODUCTS_DIR; };
|
8A1C83732AC328BD0096AF73 /* llama.swiftui.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = llama.swiftui.app; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||||
8A1C83762AC328BD0096AF73 /* llama_swiftuiApp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = llama_swiftuiApp.swift; sourceTree = "<group>"; };
|
8A1C83762AC328BD0096AF73 /* llama_swiftuiApp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = llama_swiftuiApp.swift; sourceTree = "<group>"; };
|
||||||
8A1C83782AC328BD0096AF73 /* ContentView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ContentView.swift; sourceTree = "<group>"; };
|
8A1C83782AC328BD0096AF73 /* ContentView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ContentView.swift; sourceTree = "<group>"; };
|
||||||
8A1C837A2AC328BE0096AF73 /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = "<group>"; };
|
8A1C837A2AC328BE0096AF73 /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = "<group>"; };
|
||||||
8A1C837D2AC328BE0096AF73 /* Preview Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = "Preview Assets.xcassets"; sourceTree = "<group>"; };
|
|
||||||
8A39BE092AC7601000BFEB40 /* Accelerate.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Accelerate.framework; path = System/Library/Frameworks/Accelerate.framework; sourceTree = SDKROOT; };
|
8A39BE092AC7601000BFEB40 /* Accelerate.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Accelerate.framework; path = System/Library/Frameworks/Accelerate.framework; sourceTree = SDKROOT; };
|
||||||
8A3F84232AC4C891005E2EE8 /* models */ = {isa = PBXFileReference; lastKnownFileType = folder; name = models; path = llama.swiftui/Resources/models; sourceTree = "<group>"; };
|
8A3F84232AC4C891005E2EE8 /* models */ = {isa = PBXFileReference; lastKnownFileType = folder; name = models; path = llama.swiftui/Resources/models; sourceTree = "<group>"; };
|
||||||
8A907F322AC7134E006146EA /* LibLlama.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LibLlama.swift; sourceTree = "<group>"; };
|
8A907F322AC7134E006146EA /* LibLlama.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LibLlama.swift; sourceTree = "<group>"; };
|
||||||
8A9F7C4C2AC332EE008AE1EA /* LlamaState.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LlamaState.swift; sourceTree = "<group>"; };
|
8A9F7C4C2AC332EE008AE1EA /* LlamaState.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LlamaState.swift; sourceTree = "<group>"; };
|
||||||
|
DF2D2FE72B4A59BE00FCB72D /* llama.cpp */ = {isa = PBXFileReference; lastKnownFileType = wrapper; name = llama.cpp; path = ../..; sourceTree = "<group>"; };
|
||||||
F1FE20E12B465EC900B45541 /* LoadCustomButton.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LoadCustomButton.swift; sourceTree = "<group>"; };
|
F1FE20E12B465EC900B45541 /* LoadCustomButton.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LoadCustomButton.swift; sourceTree = "<group>"; };
|
||||||
/* End PBXFileReference section */
|
/* End PBXFileReference section */
|
||||||
|
|
||||||
@ -61,6 +40,7 @@
|
|||||||
isa = PBXFrameworksBuildPhase;
|
isa = PBXFrameworksBuildPhase;
|
||||||
buildActionMask = 2147483647;
|
buildActionMask = 2147483647;
|
||||||
files = (
|
files = (
|
||||||
|
DF810E132B4A5BA200301144 /* llama in Frameworks */,
|
||||||
549479CB2AC9E16000E0F78B /* Metal.framework in Frameworks */,
|
549479CB2AC9E16000E0F78B /* Metal.framework in Frameworks */,
|
||||||
8A39BE0A2AC7601100BFEB40 /* Accelerate.framework in Frameworks */,
|
8A39BE0A2AC7601100BFEB40 /* Accelerate.framework in Frameworks */,
|
||||||
);
|
);
|
||||||
@ -69,30 +49,10 @@
|
|||||||
/* End PBXFrameworksBuildPhase section */
|
/* End PBXFrameworksBuildPhase section */
|
||||||
|
|
||||||
/* Begin PBXGroup section */
|
/* Begin PBXGroup section */
|
||||||
8A08D1F62AC7383900FE6CD4 /* llama.cpp */ = {
|
|
||||||
isa = PBXGroup;
|
|
||||||
children = (
|
|
||||||
5423760A2B0D9C4B008E6A1C /* ggml-backend.c */,
|
|
||||||
542376092B0D9C40008E6A1C /* ggml-backend.h */,
|
|
||||||
542376062B0D9BEA008E6A1C /* ggml-quants.h */,
|
|
||||||
542376072B0D9BFB008E6A1C /* ggml-quants.c */,
|
|
||||||
549479C82AC9E10B00E0F78B /* ggml-metal.metal */,
|
|
||||||
549479C62AC9E0F200E0F78B /* ggml-metal.h */,
|
|
||||||
549479C52AC9E0F200E0F78B /* ggml-metal.m */,
|
|
||||||
542EA09B2AC8723900A8AEE9 /* ggml.c */,
|
|
||||||
542EA09C2AC8723900A8AEE9 /* ggml.h */,
|
|
||||||
542EA09F2AC8725700A8AEE9 /* ggml-alloc.c */,
|
|
||||||
542EA09E2AC8725700A8AEE9 /* ggml-alloc.h */,
|
|
||||||
542EA0A12AC8729100A8AEE9 /* llama.cpp */,
|
|
||||||
542EA0A22AC8729100A8AEE9 /* llama.h */,
|
|
||||||
);
|
|
||||||
name = llama.cpp;
|
|
||||||
sourceTree = "<group>";
|
|
||||||
};
|
|
||||||
8A1C836A2AC328BD0096AF73 = {
|
8A1C836A2AC328BD0096AF73 = {
|
||||||
isa = PBXGroup;
|
isa = PBXGroup;
|
||||||
children = (
|
children = (
|
||||||
8A08D1F62AC7383900FE6CD4 /* llama.cpp */,
|
DF2D2FE72B4A59BE00FCB72D /* llama.cpp */,
|
||||||
8A907F312AC7134E006146EA /* llama.cpp.swift */,
|
8A907F312AC7134E006146EA /* llama.cpp.swift */,
|
||||||
8A3F84232AC4C891005E2EE8 /* models */,
|
8A3F84232AC4C891005E2EE8 /* models */,
|
||||||
8A1C83752AC328BD0096AF73 /* llama.swiftui */,
|
8A1C83752AC328BD0096AF73 /* llama.swiftui */,
|
||||||
@ -117,19 +77,10 @@
|
|||||||
8A9F7C4A2AC332BF008AE1EA /* UI */,
|
8A9F7C4A2AC332BF008AE1EA /* UI */,
|
||||||
8A1C83762AC328BD0096AF73 /* llama_swiftuiApp.swift */,
|
8A1C83762AC328BD0096AF73 /* llama_swiftuiApp.swift */,
|
||||||
8A1C837A2AC328BE0096AF73 /* Assets.xcassets */,
|
8A1C837A2AC328BE0096AF73 /* Assets.xcassets */,
|
||||||
8A1C837C2AC328BE0096AF73 /* Preview Content */,
|
|
||||||
);
|
);
|
||||||
path = llama.swiftui;
|
path = llama.swiftui;
|
||||||
sourceTree = "<group>";
|
sourceTree = "<group>";
|
||||||
};
|
};
|
||||||
8A1C837C2AC328BE0096AF73 /* Preview Content */ = {
|
|
||||||
isa = PBXGroup;
|
|
||||||
children = (
|
|
||||||
8A1C837D2AC328BE0096AF73 /* Preview Assets.xcassets */,
|
|
||||||
);
|
|
||||||
path = "Preview Content";
|
|
||||||
sourceTree = "<group>";
|
|
||||||
};
|
|
||||||
8A39BE082AC7601000BFEB40 /* Frameworks */ = {
|
8A39BE082AC7601000BFEB40 /* Frameworks */ = {
|
||||||
isa = PBXGroup;
|
isa = PBXGroup;
|
||||||
children = (
|
children = (
|
||||||
@ -157,7 +108,6 @@
|
|||||||
8A907F312AC7134E006146EA /* llama.cpp.swift */ = {
|
8A907F312AC7134E006146EA /* llama.cpp.swift */ = {
|
||||||
isa = PBXGroup;
|
isa = PBXGroup;
|
||||||
children = (
|
children = (
|
||||||
8A08D20A2AC73B1500FE6CD4 /* bridging-header.h */,
|
|
||||||
8A907F322AC7134E006146EA /* LibLlama.swift */,
|
8A907F322AC7134E006146EA /* LibLlama.swift */,
|
||||||
);
|
);
|
||||||
path = llama.cpp.swift;
|
path = llama.cpp.swift;
|
||||||
@ -198,6 +148,7 @@
|
|||||||
);
|
);
|
||||||
name = llama.swiftui;
|
name = llama.swiftui;
|
||||||
packageProductDependencies = (
|
packageProductDependencies = (
|
||||||
|
DF810E122B4A5BA200301144 /* llama */,
|
||||||
);
|
);
|
||||||
productName = llama.swiftui;
|
productName = llama.swiftui;
|
||||||
productReference = 8A1C83732AC328BD0096AF73 /* llama.swiftui.app */;
|
productReference = 8A1C83732AC328BD0096AF73 /* llama.swiftui.app */;
|
||||||
@ -244,9 +195,7 @@
|
|||||||
isa = PBXResourcesBuildPhase;
|
isa = PBXResourcesBuildPhase;
|
||||||
buildActionMask = 2147483647;
|
buildActionMask = 2147483647;
|
||||||
files = (
|
files = (
|
||||||
F1FE20DC2B465C4500B45541 /* ggml-metal.metal in Resources */,
|
|
||||||
8A3F84242AC4C891005E2EE8 /* models in Resources */,
|
8A3F84242AC4C891005E2EE8 /* models in Resources */,
|
||||||
8A1C837E2AC328BE0096AF73 /* Preview Assets.xcassets in Resources */,
|
|
||||||
8A1C837B2AC328BE0096AF73 /* Assets.xcassets in Resources */,
|
8A1C837B2AC328BE0096AF73 /* Assets.xcassets in Resources */,
|
||||||
);
|
);
|
||||||
runOnlyForDeploymentPostprocessing = 0;
|
runOnlyForDeploymentPostprocessing = 0;
|
||||||
@ -258,18 +207,12 @@
|
|||||||
isa = PBXSourcesBuildPhase;
|
isa = PBXSourcesBuildPhase;
|
||||||
buildActionMask = 2147483647;
|
buildActionMask = 2147483647;
|
||||||
files = (
|
files = (
|
||||||
542376082B0D9BFB008E6A1C /* ggml-quants.c in Sources */,
|
|
||||||
549479CD2AC9E42A00E0F78B /* ggml-metal.m in Sources */,
|
|
||||||
F1FE20E22B465ECA00B45541 /* LoadCustomButton.swift in Sources */,
|
F1FE20E22B465ECA00B45541 /* LoadCustomButton.swift in Sources */,
|
||||||
542EA09D2AC8723900A8AEE9 /* ggml.c in Sources */,
|
|
||||||
8A907F332AC7138A006146EA /* LibLlama.swift in Sources */,
|
8A907F332AC7138A006146EA /* LibLlama.swift in Sources */,
|
||||||
542EA0A32AC8729100A8AEE9 /* llama.cpp in Sources */,
|
|
||||||
8A9F7C4D2AC332EE008AE1EA /* LlamaState.swift in Sources */,
|
8A9F7C4D2AC332EE008AE1EA /* LlamaState.swift in Sources */,
|
||||||
8A1C83792AC328BD0096AF73 /* ContentView.swift in Sources */,
|
8A1C83792AC328BD0096AF73 /* ContentView.swift in Sources */,
|
||||||
8A1C83772AC328BD0096AF73 /* llama_swiftuiApp.swift in Sources */,
|
8A1C83772AC328BD0096AF73 /* llama_swiftuiApp.swift in Sources */,
|
||||||
7FA3D2B32B2EA2F600543F92 /* DownloadButton.swift in Sources */,
|
7FA3D2B32B2EA2F600543F92 /* DownloadButton.swift in Sources */,
|
||||||
542EA0A02AC8725700A8AEE9 /* ggml-alloc.c in Sources */,
|
|
||||||
5423760B2B0D9C4B008E6A1C /* ggml-backend.c in Sources */,
|
|
||||||
);
|
);
|
||||||
runOnlyForDeploymentPostprocessing = 0;
|
runOnlyForDeploymentPostprocessing = 0;
|
||||||
};
|
};
|
||||||
@ -399,11 +342,9 @@
|
|||||||
isa = XCBuildConfiguration;
|
isa = XCBuildConfiguration;
|
||||||
buildSettings = {
|
buildSettings = {
|
||||||
ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
|
ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
|
||||||
ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
|
|
||||||
CLANG_ENABLE_MODULES = YES;
|
CLANG_ENABLE_MODULES = YES;
|
||||||
CODE_SIGN_STYLE = Automatic;
|
CODE_SIGN_STYLE = Automatic;
|
||||||
CURRENT_PROJECT_VERSION = 1;
|
CURRENT_PROJECT_VERSION = 1;
|
||||||
DEVELOPMENT_ASSET_PATHS = "\"llama.swiftui/Preview Content\"";
|
|
||||||
DEVELOPMENT_TEAM = STLSG3FG8Q;
|
DEVELOPMENT_TEAM = STLSG3FG8Q;
|
||||||
ENABLE_PREVIEWS = YES;
|
ENABLE_PREVIEWS = YES;
|
||||||
GENERATE_INFOPLIST_FILE = YES;
|
GENERATE_INFOPLIST_FILE = YES;
|
||||||
@ -420,11 +361,12 @@
|
|||||||
MARKETING_VERSION = 1.0;
|
MARKETING_VERSION = 1.0;
|
||||||
PRODUCT_BUNDLE_IDENTIFIER = "com.bachittle.llama-swift";
|
PRODUCT_BUNDLE_IDENTIFIER = "com.bachittle.llama-swift";
|
||||||
PRODUCT_NAME = "$(TARGET_NAME)";
|
PRODUCT_NAME = "$(TARGET_NAME)";
|
||||||
|
SUPPORTED_PLATFORMS = "iphoneos iphonesimulator xros xrsimulator";
|
||||||
|
SUPPORTS_XR_DESIGNED_FOR_IPHONE_IPAD = NO;
|
||||||
SWIFT_EMIT_LOC_STRINGS = YES;
|
SWIFT_EMIT_LOC_STRINGS = YES;
|
||||||
SWIFT_OBJC_BRIDGING_HEADER = "llama.cpp.swift/bridging-header.h";
|
|
||||||
SWIFT_OPTIMIZATION_LEVEL = "-Onone";
|
SWIFT_OPTIMIZATION_LEVEL = "-Onone";
|
||||||
SWIFT_VERSION = 5.0;
|
SWIFT_VERSION = 5.0;
|
||||||
TARGETED_DEVICE_FAMILY = "1,2";
|
TARGETED_DEVICE_FAMILY = "1,2,7";
|
||||||
};
|
};
|
||||||
name = Debug;
|
name = Debug;
|
||||||
};
|
};
|
||||||
@ -432,11 +374,9 @@
|
|||||||
isa = XCBuildConfiguration;
|
isa = XCBuildConfiguration;
|
||||||
buildSettings = {
|
buildSettings = {
|
||||||
ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
|
ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
|
||||||
ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
|
|
||||||
CLANG_ENABLE_MODULES = YES;
|
CLANG_ENABLE_MODULES = YES;
|
||||||
CODE_SIGN_STYLE = Automatic;
|
CODE_SIGN_STYLE = Automatic;
|
||||||
CURRENT_PROJECT_VERSION = 1;
|
CURRENT_PROJECT_VERSION = 1;
|
||||||
DEVELOPMENT_ASSET_PATHS = "\"llama.swiftui/Preview Content\"";
|
|
||||||
DEVELOPMENT_TEAM = STLSG3FG8Q;
|
DEVELOPMENT_TEAM = STLSG3FG8Q;
|
||||||
ENABLE_PREVIEWS = YES;
|
ENABLE_PREVIEWS = YES;
|
||||||
GENERATE_INFOPLIST_FILE = YES;
|
GENERATE_INFOPLIST_FILE = YES;
|
||||||
@ -453,10 +393,11 @@
|
|||||||
MARKETING_VERSION = 1.0;
|
MARKETING_VERSION = 1.0;
|
||||||
PRODUCT_BUNDLE_IDENTIFIER = "com.bachittle.llama-swift";
|
PRODUCT_BUNDLE_IDENTIFIER = "com.bachittle.llama-swift";
|
||||||
PRODUCT_NAME = "$(TARGET_NAME)";
|
PRODUCT_NAME = "$(TARGET_NAME)";
|
||||||
|
SUPPORTED_PLATFORMS = "iphoneos iphonesimulator xros xrsimulator";
|
||||||
|
SUPPORTS_XR_DESIGNED_FOR_IPHONE_IPAD = NO;
|
||||||
SWIFT_EMIT_LOC_STRINGS = YES;
|
SWIFT_EMIT_LOC_STRINGS = YES;
|
||||||
SWIFT_OBJC_BRIDGING_HEADER = "llama.cpp.swift/bridging-header.h";
|
|
||||||
SWIFT_VERSION = 5.0;
|
SWIFT_VERSION = 5.0;
|
||||||
TARGETED_DEVICE_FAMILY = "1,2";
|
TARGETED_DEVICE_FAMILY = "1,2,7";
|
||||||
};
|
};
|
||||||
name = Release;
|
name = Release;
|
||||||
};
|
};
|
||||||
@ -482,6 +423,13 @@
|
|||||||
defaultConfigurationName = Release;
|
defaultConfigurationName = Release;
|
||||||
};
|
};
|
||||||
/* End XCConfigurationList section */
|
/* End XCConfigurationList section */
|
||||||
|
|
||||||
|
/* Begin XCSwiftPackageProductDependency section */
|
||||||
|
DF810E122B4A5BA200301144 /* llama */ = {
|
||||||
|
isa = XCSwiftPackageProductDependency;
|
||||||
|
productName = llama;
|
||||||
|
};
|
||||||
|
/* End XCSwiftPackageProductDependency section */
|
||||||
};
|
};
|
||||||
rootObject = 8A1C836B2AC328BD0096AF73 /* Project object */;
|
rootObject = 8A1C836B2AC328BD0096AF73 /* Project object */;
|
||||||
}
|
}
|
||||||
|
@ -1,11 +0,0 @@
|
|||||||
{
|
|
||||||
"colors" : [
|
|
||||||
{
|
|
||||||
"idiom" : "universal"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"info" : {
|
|
||||||
"author" : "xcode",
|
|
||||||
"version" : 1
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,6 +0,0 @@
|
|||||||
{
|
|
||||||
"info" : {
|
|
||||||
"author" : "xcode",
|
|
||||||
"version" : 1
|
|
||||||
}
|
|
||||||
}
|
|
@ -447,8 +447,14 @@ struct llama_client_slot
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool has_budget(gpt_params &global_params) {
|
bool has_budget(gpt_params &global_params) {
|
||||||
|
if (params.n_predict == -1 && global_params.n_predict == -1)
|
||||||
|
{
|
||||||
|
return true; // limitless
|
||||||
|
}
|
||||||
|
|
||||||
n_remaining = -1;
|
n_remaining = -1;
|
||||||
if(params.n_predict != -1)
|
|
||||||
|
if (params.n_predict != -1)
|
||||||
{
|
{
|
||||||
n_remaining = params.n_predict - n_decoded;
|
n_remaining = params.n_predict - n_decoded;
|
||||||
}
|
}
|
||||||
@ -456,7 +462,8 @@ struct llama_client_slot
|
|||||||
{
|
{
|
||||||
n_remaining = global_params.n_predict - n_decoded;
|
n_remaining = global_params.n_predict - n_decoded;
|
||||||
}
|
}
|
||||||
return n_remaining > 0 || n_remaining == -1; // no budget || limitless
|
|
||||||
|
return n_remaining > 0; // no budget
|
||||||
}
|
}
|
||||||
|
|
||||||
bool available() const {
|
bool available() const {
|
||||||
@ -1102,7 +1109,7 @@ struct llama_server_context
|
|||||||
}
|
}
|
||||||
|
|
||||||
// check the limits
|
// check the limits
|
||||||
if (slot.n_decoded > 2 && slot.has_next_token && !slot.has_budget(params))
|
if (slot.n_decoded > 0 && slot.has_next_token && !slot.has_budget(params))
|
||||||
{
|
{
|
||||||
slot.stopped_limit = true;
|
slot.stopped_limit = true;
|
||||||
slot.has_next_token = false;
|
slot.has_next_token = false;
|
||||||
@ -1703,7 +1710,6 @@ struct llama_server_context
|
|||||||
|
|
||||||
llama_batch_add(batch, slot.sampled, system_tokens.size() + slot.n_past, { slot.id }, true);
|
llama_batch_add(batch, slot.sampled, system_tokens.size() + slot.n_past, { slot.id }, true);
|
||||||
|
|
||||||
slot.n_decoded += 1;
|
|
||||||
slot.n_past += 1;
|
slot.n_past += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1921,6 +1927,7 @@ struct llama_server_context
|
|||||||
|
|
||||||
llama_sampling_accept(slot.ctx_sampling, ctx, id, true);
|
llama_sampling_accept(slot.ctx_sampling, ctx, id, true);
|
||||||
|
|
||||||
|
slot.n_decoded += 1;
|
||||||
if (slot.n_decoded == 1)
|
if (slot.n_decoded == 1)
|
||||||
{
|
{
|
||||||
slot.t_start_genereration = ggml_time_us();
|
slot.t_start_genereration = ggml_time_us();
|
||||||
|
37
ggml-cuda.cu
37
ggml-cuda.cu
@ -183,7 +183,7 @@ static __device__ __forceinline__ int __vsubss4(const int a, const int b) {
|
|||||||
static __device__ __forceinline__ int __dp4a(const int a, const int b, int c) {
|
static __device__ __forceinline__ int __dp4a(const int a, const int b, int c) {
|
||||||
#if defined(__gfx906__) || defined(__gfx908__) || defined(__gfx90a__) || defined(__gfx1030__)
|
#if defined(__gfx906__) || defined(__gfx908__) || defined(__gfx90a__) || defined(__gfx1030__)
|
||||||
c = __builtin_amdgcn_sdot4(a, b, c, false);
|
c = __builtin_amdgcn_sdot4(a, b, c, false);
|
||||||
#elif defined(__gfx1100__)
|
#elif defined(RDNA3)
|
||||||
c = __builtin_amdgcn_sudot4( true, a, true, b, c, false);
|
c = __builtin_amdgcn_sudot4( true, a, true, b, c, false);
|
||||||
#elif defined(__gfx1010__) || defined(__gfx900__)
|
#elif defined(__gfx1010__) || defined(__gfx900__)
|
||||||
int tmp1;
|
int tmp1;
|
||||||
@ -1872,14 +1872,6 @@ static __device__ void convert_f16(const void * vx, const int ib, const int iqs,
|
|||||||
v.y = x[ib + iqs + 1];
|
v.y = x[ib + iqs + 1];
|
||||||
}
|
}
|
||||||
|
|
||||||
static __device__ void convert_f32(const void * vx, const int ib, const int iqs, dfloat2 & v){
|
|
||||||
const float * x = (const float *) vx;
|
|
||||||
|
|
||||||
// automatic half -> float type cast if dfloat == float
|
|
||||||
v.x = x[ib + iqs + 0];
|
|
||||||
v.y = x[ib + iqs + 1];
|
|
||||||
}
|
|
||||||
|
|
||||||
static __global__ void quantize_q8_1(const float * __restrict__ x, void * __restrict__ vy, const int kx, const int kx_padded) {
|
static __global__ void quantize_q8_1(const float * __restrict__ x, void * __restrict__ vy, const int kx, const int kx_padded) {
|
||||||
const int ix = blockDim.x*blockIdx.x + threadIdx.x;
|
const int ix = blockDim.x*blockIdx.x + threadIdx.x;
|
||||||
|
|
||||||
@ -1983,7 +1975,7 @@ static __global__ void k_get_rows_float(
|
|||||||
|
|
||||||
template <int qk, int qr, dequantize_kernel_t dequantize_kernel, typename dst_t>
|
template <int qk, int qr, dequantize_kernel_t dequantize_kernel, typename dst_t>
|
||||||
static __global__ void dequantize_block(const void * __restrict__ vx, dst_t * __restrict__ y, const int k) {
|
static __global__ void dequantize_block(const void * __restrict__ vx, dst_t * __restrict__ y, const int k) {
|
||||||
const int i = blockDim.x*blockIdx.x + 2*threadIdx.x;
|
const int i = 2*(blockDim.x*blockIdx.x + threadIdx.x);
|
||||||
|
|
||||||
if (i >= k) {
|
if (i >= k) {
|
||||||
return;
|
return;
|
||||||
@ -2002,6 +1994,19 @@ static __global__ void dequantize_block(const void * __restrict__ vx, dst_t * __
|
|||||||
y[iybs + iqs + y_offset] = v.y;
|
y[iybs + iqs + y_offset] = v.y;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename src_t, typename dst_t>
|
||||||
|
static __global__ void convert_unary(const void * __restrict__ vx, dst_t * __restrict__ y, const int k) {
|
||||||
|
const int i = blockDim.x*blockIdx.x + threadIdx.x;
|
||||||
|
|
||||||
|
if (i >= k) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const src_t * x = (src_t *) vx;
|
||||||
|
|
||||||
|
y[i] = x[i];
|
||||||
|
}
|
||||||
|
|
||||||
// VDR = vec dot ratio, how many contiguous integers each thread processes when the vec dot kernel is called
|
// VDR = vec dot ratio, how many contiguous integers each thread processes when the vec dot kernel is called
|
||||||
// MMVQ = mul_mat_vec_q, MMQ = mul_mat_q
|
// MMVQ = mul_mat_vec_q, MMQ = mul_mat_q
|
||||||
|
|
||||||
@ -5609,7 +5614,7 @@ static void quantize_row_q8_1_cuda(const float * x, void * vy, const int kx, con
|
|||||||
|
|
||||||
template <int qk, int qr, dequantize_kernel_t dequantize_kernel, typename dst_t>
|
template <int qk, int qr, dequantize_kernel_t dequantize_kernel, typename dst_t>
|
||||||
static void dequantize_block_cuda(const void * __restrict__ vx, dst_t * __restrict__ y, const int k, cudaStream_t stream) {
|
static void dequantize_block_cuda(const void * __restrict__ vx, dst_t * __restrict__ y, const int k, cudaStream_t stream) {
|
||||||
const int num_blocks = (k + CUDA_DEQUANTIZE_BLOCK_SIZE - 1) / CUDA_DEQUANTIZE_BLOCK_SIZE;
|
const int num_blocks = (k + 2*CUDA_DEQUANTIZE_BLOCK_SIZE - 1) / (2*CUDA_DEQUANTIZE_BLOCK_SIZE);
|
||||||
dequantize_block<qk, qr, dequantize_kernel><<<num_blocks, CUDA_DEQUANTIZE_BLOCK_SIZE, 0, stream>>>(vx, y, k);
|
dequantize_block<qk, qr, dequantize_kernel><<<num_blocks, CUDA_DEQUANTIZE_BLOCK_SIZE, 0, stream>>>(vx, y, k);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -5659,6 +5664,12 @@ static void dequantize_row_q6_K_cuda(const void * vx, dst_t * y, const int k, cu
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename src_t, typename dst_t>
|
||||||
|
static void convert_unary_cuda(const void * __restrict__ vx, dst_t * __restrict__ y, const int k, cudaStream_t stream) {
|
||||||
|
const int num_blocks = (k + CUDA_DEQUANTIZE_BLOCK_SIZE - 1) / CUDA_DEQUANTIZE_BLOCK_SIZE;
|
||||||
|
convert_unary<src_t><<<num_blocks, CUDA_DEQUANTIZE_BLOCK_SIZE, 0, stream>>>(vx, y, k);
|
||||||
|
}
|
||||||
|
|
||||||
static to_fp16_cuda_t ggml_get_to_fp16_cuda(ggml_type type) {
|
static to_fp16_cuda_t ggml_get_to_fp16_cuda(ggml_type type) {
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case GGML_TYPE_Q4_0:
|
case GGML_TYPE_Q4_0:
|
||||||
@ -5682,7 +5693,7 @@ static to_fp16_cuda_t ggml_get_to_fp16_cuda(ggml_type type) {
|
|||||||
case GGML_TYPE_Q6_K:
|
case GGML_TYPE_Q6_K:
|
||||||
return dequantize_row_q6_K_cuda;
|
return dequantize_row_q6_K_cuda;
|
||||||
case GGML_TYPE_F32:
|
case GGML_TYPE_F32:
|
||||||
return dequantize_block_cuda<1, 1, convert_f32>;
|
return convert_unary_cuda<float>;
|
||||||
default:
|
default:
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
@ -5711,7 +5722,7 @@ static to_fp32_cuda_t ggml_get_to_fp32_cuda(ggml_type type) {
|
|||||||
case GGML_TYPE_Q6_K:
|
case GGML_TYPE_Q6_K:
|
||||||
return dequantize_row_q6_K_cuda;
|
return dequantize_row_q6_K_cuda;
|
||||||
case GGML_TYPE_F16:
|
case GGML_TYPE_F16:
|
||||||
return dequantize_block_cuda<1, 1, convert_f16>;
|
return convert_unary_cuda<half>;
|
||||||
default:
|
default:
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
16
llama.cpp
16
llama.cpp
@ -2180,7 +2180,11 @@ struct llama_model_loader {
|
|||||||
type_max = type;
|
type_max = type;
|
||||||
}
|
}
|
||||||
|
|
||||||
// LLAMA_LOG_INFO("%s: - tensor %4d: %32s %-8s [ %s ]\n", __func__, i, name, ggml_type_name(meta->type), llama_format_tensor_shape(meta).c_str());
|
// TODO: make runtime configurable
|
||||||
|
#if 0
|
||||||
|
struct ggml_tensor * meta = ggml_get_tensor(ctx_meta, gguf_get_tensor_name(ctx_gguf, i));
|
||||||
|
LLAMA_LOG_INFO("%s: - tensor %4d: %32s %-8s [ %s ]\n", __func__, i, ggml_get_name(meta), ggml_type_name(type), llama_format_tensor_shape(meta).c_str());
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (type_max) {
|
switch (type_max) {
|
||||||
@ -4772,7 +4776,6 @@ struct llm_build_context {
|
|||||||
const int64_t n_embd_head = hparams.n_embd_head_v;
|
const int64_t n_embd_head = hparams.n_embd_head_v;
|
||||||
const int64_t n_embd_gqa = hparams.n_embd_v_gqa();
|
const int64_t n_embd_gqa = hparams.n_embd_v_gqa();
|
||||||
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
|
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
|
||||||
GGML_ASSERT(n_embd_gqa == n_embd);
|
|
||||||
|
|
||||||
struct ggml_tensor * cur;
|
struct ggml_tensor * cur;
|
||||||
struct ggml_tensor * inpL;
|
struct ggml_tensor * inpL;
|
||||||
@ -4896,7 +4899,6 @@ struct llm_build_context {
|
|||||||
const int64_t n_embd_head = hparams.n_embd_head_v;
|
const int64_t n_embd_head = hparams.n_embd_head_v;
|
||||||
const int64_t n_embd_gqa = hparams.n_embd_v_gqa();
|
const int64_t n_embd_gqa = hparams.n_embd_v_gqa();
|
||||||
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
|
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
|
||||||
GGML_ASSERT(n_embd_gqa == n_embd);
|
|
||||||
|
|
||||||
struct ggml_tensor * cur;
|
struct ggml_tensor * cur;
|
||||||
struct ggml_tensor * pos;
|
struct ggml_tensor * pos;
|
||||||
@ -4995,9 +4997,7 @@ struct llm_build_context {
|
|||||||
struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, LLAMA_MAX_NODES, false);
|
struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, LLAMA_MAX_NODES, false);
|
||||||
|
|
||||||
const int64_t n_embd_head = hparams.n_embd_head_v;
|
const int64_t n_embd_head = hparams.n_embd_head_v;
|
||||||
const int64_t n_embd_gqa = hparams.n_embd_v_gqa();
|
|
||||||
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
|
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
|
||||||
GGML_ASSERT(n_embd_gqa == n_embd);
|
|
||||||
|
|
||||||
const int64_t n_rot = n_embd_head_k / 2;
|
const int64_t n_rot = n_embd_head_k / 2;
|
||||||
|
|
||||||
@ -5209,9 +5209,7 @@ struct llm_build_context {
|
|||||||
struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, LLAMA_MAX_NODES, false);
|
struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, LLAMA_MAX_NODES, false);
|
||||||
|
|
||||||
const int64_t n_embd_head = hparams.n_embd_head_v;
|
const int64_t n_embd_head = hparams.n_embd_head_v;
|
||||||
const int64_t n_embd_gqa = hparams.n_embd_v_gqa();
|
|
||||||
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
|
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
|
||||||
GGML_ASSERT(n_embd_gqa == n_embd);
|
|
||||||
|
|
||||||
struct ggml_tensor * cur;
|
struct ggml_tensor * cur;
|
||||||
struct ggml_tensor * inpL;
|
struct ggml_tensor * inpL;
|
||||||
@ -5304,7 +5302,6 @@ struct llm_build_context {
|
|||||||
const int64_t n_embd_head = hparams.n_embd_head_v;
|
const int64_t n_embd_head = hparams.n_embd_head_v;
|
||||||
const int64_t n_embd_gqa = hparams.n_embd_v_gqa();
|
const int64_t n_embd_gqa = hparams.n_embd_v_gqa();
|
||||||
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
|
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
|
||||||
GGML_ASSERT(n_embd_gqa == n_embd);
|
|
||||||
|
|
||||||
struct ggml_tensor * cur;
|
struct ggml_tensor * cur;
|
||||||
struct ggml_tensor * inpL;
|
struct ggml_tensor * inpL;
|
||||||
@ -5400,7 +5397,6 @@ struct llm_build_context {
|
|||||||
const int64_t n_embd_head = hparams.n_embd_head_v;
|
const int64_t n_embd_head = hparams.n_embd_head_v;
|
||||||
const int64_t n_embd_gqa = hparams.n_embd_v_gqa();
|
const int64_t n_embd_gqa = hparams.n_embd_v_gqa();
|
||||||
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
|
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
|
||||||
GGML_ASSERT(n_embd_gqa == n_embd);
|
|
||||||
|
|
||||||
struct ggml_tensor * cur;
|
struct ggml_tensor * cur;
|
||||||
struct ggml_tensor * inpL;
|
struct ggml_tensor * inpL;
|
||||||
@ -5727,7 +5723,6 @@ struct llm_build_context {
|
|||||||
const int64_t n_embd_head = hparams.n_embd_head_v;
|
const int64_t n_embd_head = hparams.n_embd_head_v;
|
||||||
const int64_t n_embd_gqa = hparams.n_embd_v_gqa();
|
const int64_t n_embd_gqa = hparams.n_embd_v_gqa();
|
||||||
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
|
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
|
||||||
GGML_ASSERT(n_embd_gqa == n_embd);
|
|
||||||
|
|
||||||
struct ggml_tensor * cur;
|
struct ggml_tensor * cur;
|
||||||
struct ggml_tensor * attn_norm_output;
|
struct ggml_tensor * attn_norm_output;
|
||||||
@ -5951,7 +5946,6 @@ struct llm_build_context {
|
|||||||
const int64_t n_embd_head = hparams.n_embd_head_v;
|
const int64_t n_embd_head = hparams.n_embd_head_v;
|
||||||
const int64_t n_embd_gqa = hparams.n_embd_v_gqa();
|
const int64_t n_embd_gqa = hparams.n_embd_v_gqa();
|
||||||
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
|
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
|
||||||
GGML_ASSERT(n_embd_gqa == n_embd);
|
|
||||||
|
|
||||||
struct ggml_tensor * cur;
|
struct ggml_tensor * cur;
|
||||||
struct ggml_tensor * pos;
|
struct ggml_tensor * pos;
|
||||||
|
Loading…
Reference in New Issue
Block a user