mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-25 05:48:47 +01:00
llama.swiftui : add bench functionality (#4483)
* llama.swiftui : add bench button * llama.swiftui : initial bench functionality * force to use n_gpu_layers on simulator * add download buttons & expose llamaState.loadModel * update project.pbxproj * comment #Preview & fix editorconfig check * gitignore : xcode stuff * llama.swiftui : UX improvements * llama.swiftui : avoid data copy via "downloadTask" * llama.swiftui : remove model from project * llama : remove "mostly" from model infos * llama.swiftui : improve bench --------- Co-authored-by: jhen <developer@jhen.me>
This commit is contained in:
parent
f7f468a97d
commit
800a489e4a
@ -23,3 +23,6 @@ insert_final_newline = unset
|
|||||||
|
|
||||||
[examples/server/public/*]
|
[examples/server/public/*]
|
||||||
indent_size = 2
|
indent_size = 2
|
||||||
|
|
||||||
|
[examples/llama.swiftui/llama.swiftui.xcodeproj/*]
|
||||||
|
indent_style = tab
|
||||||
|
1
examples/llama.swiftui/.gitignore
vendored
1
examples/llama.swiftui/.gitignore
vendored
@ -1 +1,2 @@
|
|||||||
xcuserdata
|
xcuserdata
|
||||||
|
xcshareddata
|
||||||
|
@ -6,16 +6,34 @@ enum LlamaError: Error {
|
|||||||
case couldNotInitializeContext
|
case couldNotInitializeContext
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func llama_batch_clear(_ batch: inout llama_batch) {
|
||||||
|
batch.n_tokens = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func llama_batch_add(_ batch: inout llama_batch, _ id: llama_token, _ pos: llama_pos, _ seq_ids: [llama_seq_id], _ logits: Bool) {
|
||||||
|
batch.token [Int(batch.n_tokens)] = id
|
||||||
|
batch.pos [Int(batch.n_tokens)] = pos
|
||||||
|
batch.n_seq_id[Int(batch.n_tokens)] = Int32(seq_ids.count)
|
||||||
|
for i in 0..<seq_ids.count {
|
||||||
|
batch.seq_id[Int(batch.n_tokens)]![Int(i)] = seq_ids[i]
|
||||||
|
}
|
||||||
|
batch.logits [Int(batch.n_tokens)] = logits ? 1 : 0
|
||||||
|
|
||||||
|
batch.n_tokens += 1
|
||||||
|
}
|
||||||
|
|
||||||
actor LlamaContext {
|
actor LlamaContext {
|
||||||
private var model: OpaquePointer
|
private var model: OpaquePointer
|
||||||
private var context: OpaquePointer
|
private var context: OpaquePointer
|
||||||
private var batch: llama_batch
|
private var batch: llama_batch
|
||||||
private var tokens_list: [llama_token]
|
private var tokens_list: [llama_token]
|
||||||
|
|
||||||
/// This variable is used to store temporarily invalid cchars
|
/// This variable is used to store temporarily invalid cchars
|
||||||
private var temporary_invalid_cchars: [CChar]
|
private var temporary_invalid_cchars: [CChar]
|
||||||
|
|
||||||
var n_len: Int32 = 512
|
var n_len: Int32 = 64
|
||||||
var n_cur: Int32 = 0
|
var n_cur: Int32 = 0
|
||||||
|
|
||||||
var n_decode: Int32 = 0
|
var n_decode: Int32 = 0
|
||||||
|
|
||||||
init(model: OpaquePointer, context: OpaquePointer) {
|
init(model: OpaquePointer, context: OpaquePointer) {
|
||||||
@ -27,25 +45,34 @@ actor LlamaContext {
|
|||||||
}
|
}
|
||||||
|
|
||||||
deinit {
|
deinit {
|
||||||
|
llama_batch_free(batch)
|
||||||
llama_free(context)
|
llama_free(context)
|
||||||
llama_free_model(model)
|
llama_free_model(model)
|
||||||
llama_backend_free()
|
llama_backend_free()
|
||||||
}
|
}
|
||||||
|
|
||||||
static func createContext(path: String) throws -> LlamaContext {
|
static func create_context(path: String) throws -> LlamaContext {
|
||||||
llama_backend_init(false)
|
llama_backend_init(false)
|
||||||
let model_params = llama_model_default_params()
|
var model_params = llama_model_default_params()
|
||||||
|
|
||||||
|
#if targetEnvironment(simulator)
|
||||||
|
model_params.n_gpu_layers = 0
|
||||||
|
print("Running on simulator, force use n_gpu_layers = 0")
|
||||||
|
#endif
|
||||||
let model = llama_load_model_from_file(path, model_params)
|
let model = llama_load_model_from_file(path, model_params)
|
||||||
guard let model else {
|
guard let model else {
|
||||||
print("Could not load model at \(path)")
|
print("Could not load model at \(path)")
|
||||||
throw LlamaError.couldNotInitializeContext
|
throw LlamaError.couldNotInitializeContext
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let n_threads = max(1, min(8, ProcessInfo.processInfo.processorCount - 2))
|
||||||
|
print("Using \(n_threads) threads")
|
||||||
|
|
||||||
var ctx_params = llama_context_default_params()
|
var ctx_params = llama_context_default_params()
|
||||||
ctx_params.seed = 1234
|
ctx_params.seed = 1234
|
||||||
ctx_params.n_ctx = 2048
|
ctx_params.n_ctx = 2048
|
||||||
ctx_params.n_threads = 8
|
ctx_params.n_threads = UInt32(n_threads)
|
||||||
ctx_params.n_threads_batch = 8
|
ctx_params.n_threads_batch = UInt32(n_threads)
|
||||||
|
|
||||||
let context = llama_new_context_with_model(model, ctx_params)
|
let context = llama_new_context_with_model(model, ctx_params)
|
||||||
guard let context else {
|
guard let context else {
|
||||||
@ -56,6 +83,26 @@ actor LlamaContext {
|
|||||||
return LlamaContext(model: model, context: context)
|
return LlamaContext(model: model, context: context)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func model_info() -> String {
|
||||||
|
let result = UnsafeMutablePointer<Int8>.allocate(capacity: 256)
|
||||||
|
result.initialize(repeating: Int8(0), count: 256)
|
||||||
|
defer {
|
||||||
|
result.deallocate()
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: this is probably very stupid way to get the string from C
|
||||||
|
|
||||||
|
let nChars = llama_model_desc(model, result, 256)
|
||||||
|
let bufferPointer = UnsafeBufferPointer(start: result, count: Int(nChars))
|
||||||
|
|
||||||
|
var SwiftString = ""
|
||||||
|
for char in bufferPointer {
|
||||||
|
SwiftString.append(Character(UnicodeScalar(UInt8(char))))
|
||||||
|
}
|
||||||
|
|
||||||
|
return SwiftString
|
||||||
|
}
|
||||||
|
|
||||||
func get_n_tokens() -> Int32 {
|
func get_n_tokens() -> Int32 {
|
||||||
return batch.n_tokens;
|
return batch.n_tokens;
|
||||||
}
|
}
|
||||||
@ -79,16 +126,11 @@ actor LlamaContext {
|
|||||||
print(String(cString: token_to_piece(token: id) + [0]))
|
print(String(cString: token_to_piece(token: id) + [0]))
|
||||||
}
|
}
|
||||||
|
|
||||||
// batch = llama_batch_init(512, 0) // done in init()
|
llama_batch_clear(&batch)
|
||||||
batch.n_tokens = Int32(tokens_list.count)
|
|
||||||
|
|
||||||
for i1 in 0..<batch.n_tokens {
|
for i1 in 0..<tokens_list.count {
|
||||||
let i = Int(i1)
|
let i = Int(i1)
|
||||||
batch.token[i] = tokens_list[i]
|
llama_batch_add(&batch, tokens_list[i], Int32(i), [0], false)
|
||||||
batch.pos[i] = i1
|
|
||||||
batch.n_seq_id[Int(i)] = 1
|
|
||||||
batch.seq_id[Int(i)]![0] = 0
|
|
||||||
batch.logits[i] = 0
|
|
||||||
}
|
}
|
||||||
batch.logits[Int(batch.n_tokens) - 1] = 1 // true
|
batch.logits[Int(batch.n_tokens) - 1] = 1 // true
|
||||||
|
|
||||||
@ -141,17 +183,10 @@ actor LlamaContext {
|
|||||||
print(new_token_str)
|
print(new_token_str)
|
||||||
// tokens_list.append(new_token_id)
|
// tokens_list.append(new_token_id)
|
||||||
|
|
||||||
batch.n_tokens = 0
|
llama_batch_clear(&batch)
|
||||||
|
llama_batch_add(&batch, new_token_id, n_cur, [0], true)
|
||||||
batch.token[Int(batch.n_tokens)] = new_token_id
|
|
||||||
batch.pos[Int(batch.n_tokens)] = n_cur
|
|
||||||
batch.n_seq_id[Int(batch.n_tokens)] = 1
|
|
||||||
batch.seq_id[Int(batch.n_tokens)]![0] = 0
|
|
||||||
batch.logits[Int(batch.n_tokens)] = 1 // true
|
|
||||||
batch.n_tokens += 1
|
|
||||||
|
|
||||||
n_decode += 1
|
n_decode += 1
|
||||||
|
|
||||||
n_cur += 1
|
n_cur += 1
|
||||||
|
|
||||||
if llama_decode(context, batch) != 0 {
|
if llama_decode(context, batch) != 0 {
|
||||||
@ -161,14 +196,111 @@ actor LlamaContext {
|
|||||||
return new_token_str
|
return new_token_str
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func bench(pp: Int, tg: Int, pl: Int, nr: Int = 1) -> String {
|
||||||
|
var pp_avg: Double = 0
|
||||||
|
var tg_avg: Double = 0
|
||||||
|
|
||||||
|
var pp_std: Double = 0
|
||||||
|
var tg_std: Double = 0
|
||||||
|
|
||||||
|
for r in 0..<nr {
|
||||||
|
// bench prompt processing
|
||||||
|
|
||||||
|
llama_batch_clear(&batch)
|
||||||
|
|
||||||
|
let n_tokens = pp
|
||||||
|
|
||||||
|
for i in 0..<n_tokens {
|
||||||
|
llama_batch_add(&batch, 0, Int32(i), [0], false)
|
||||||
|
}
|
||||||
|
batch.logits[Int(batch.n_tokens) - 1] = 1 // true
|
||||||
|
|
||||||
|
llama_kv_cache_clear(context)
|
||||||
|
|
||||||
|
let t_pp_start = ggml_time_us()
|
||||||
|
|
||||||
|
if llama_decode(context, batch) != 0 {
|
||||||
|
print("llama_decode() failed during prompt")
|
||||||
|
}
|
||||||
|
|
||||||
|
let t_pp_end = ggml_time_us()
|
||||||
|
|
||||||
|
// bench text generation
|
||||||
|
|
||||||
|
llama_kv_cache_clear(context)
|
||||||
|
|
||||||
|
let t_tg_start = ggml_time_us()
|
||||||
|
|
||||||
|
for i in 0..<tg {
|
||||||
|
llama_batch_clear(&batch)
|
||||||
|
|
||||||
|
for j in 0..<pl {
|
||||||
|
llama_batch_add(&batch, 0, Int32(i), [Int32(j)], true)
|
||||||
|
}
|
||||||
|
|
||||||
|
if llama_decode(context, batch) != 0 {
|
||||||
|
print("llama_decode() failed during text generation")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let t_tg_end = ggml_time_us()
|
||||||
|
|
||||||
|
llama_kv_cache_clear(context)
|
||||||
|
|
||||||
|
let t_pp = Double(t_pp_end - t_pp_start) / 1000000.0
|
||||||
|
let t_tg = Double(t_tg_end - t_tg_start) / 1000000.0
|
||||||
|
|
||||||
|
let speed_pp = Double(pp) / t_pp
|
||||||
|
let speed_tg = Double(pl*tg) / t_tg
|
||||||
|
|
||||||
|
pp_avg += speed_pp
|
||||||
|
tg_avg += speed_tg
|
||||||
|
|
||||||
|
pp_std += speed_pp * speed_pp
|
||||||
|
tg_std += speed_tg * speed_tg
|
||||||
|
|
||||||
|
print("pp \(speed_pp) t/s, tg \(speed_tg) t/s")
|
||||||
|
}
|
||||||
|
|
||||||
|
pp_avg /= Double(nr)
|
||||||
|
tg_avg /= Double(nr)
|
||||||
|
|
||||||
|
if nr > 1 {
|
||||||
|
pp_std = sqrt(pp_std / Double(nr - 1) - pp_avg * pp_avg * Double(nr) / Double(nr - 1))
|
||||||
|
tg_std = sqrt(tg_std / Double(nr - 1) - tg_avg * tg_avg * Double(nr) / Double(nr - 1))
|
||||||
|
} else {
|
||||||
|
pp_std = 0
|
||||||
|
tg_std = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
let model_desc = model_info();
|
||||||
|
let model_size = String(format: "%.2f GiB", Double(llama_model_size(model)) / 1024.0 / 1024.0 / 1024.0);
|
||||||
|
let model_n_params = String(format: "%.2f B", Double(llama_model_n_params(model)) / 1e9);
|
||||||
|
let backend = "Metal";
|
||||||
|
let pp_avg_str = String(format: "%.2f", pp_avg);
|
||||||
|
let tg_avg_str = String(format: "%.2f", tg_avg);
|
||||||
|
let pp_std_str = String(format: "%.2f", pp_std);
|
||||||
|
let tg_std_str = String(format: "%.2f", tg_std);
|
||||||
|
|
||||||
|
var result = ""
|
||||||
|
|
||||||
|
result += String("| model | size | params | backend | test | t/s |\n")
|
||||||
|
result += String("| --- | --- | --- | --- | --- | --- |\n")
|
||||||
|
result += String("| \(model_desc) | \(model_size) | \(model_n_params) | \(backend) | pp \(pp) | \(pp_avg_str) ± \(pp_std_str) |\n")
|
||||||
|
result += String("| \(model_desc) | \(model_size) | \(model_n_params) | \(backend) | tg \(tg) | \(tg_avg_str) ± \(tg_std_str) |\n")
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
func clear() {
|
func clear() {
|
||||||
tokens_list.removeAll()
|
tokens_list.removeAll()
|
||||||
temporary_invalid_cchars.removeAll()
|
temporary_invalid_cchars.removeAll()
|
||||||
|
llama_kv_cache_clear(context)
|
||||||
}
|
}
|
||||||
|
|
||||||
private func tokenize(text: String, add_bos: Bool) -> [llama_token] {
|
private func tokenize(text: String, add_bos: Bool) -> [llama_token] {
|
||||||
let utf8Count = text.utf8.count
|
let utf8Count = text.utf8.count
|
||||||
let n_tokens = utf8Count + (add_bos ? 1 : 0)
|
let n_tokens = utf8Count + (add_bos ? 1 : 0) + 1
|
||||||
let tokens = UnsafeMutablePointer<llama_token>.allocate(capacity: n_tokens)
|
let tokens = UnsafeMutablePointer<llama_token>.allocate(capacity: n_tokens)
|
||||||
let tokenCount = llama_tokenize(model, text, Int32(utf8Count), tokens, Int32(n_tokens), add_bos, false)
|
let tokenCount = llama_tokenize(model, text, Int32(utf8Count), tokens, Int32(n_tokens), add_bos, false)
|
||||||
|
|
||||||
|
@ -7,14 +7,15 @@
|
|||||||
objects = {
|
objects = {
|
||||||
|
|
||||||
/* Begin PBXBuildFile section */
|
/* Begin PBXBuildFile section */
|
||||||
542376082B0D9BFB008E6A1C /* ggml-quants.c in Sources */ = {isa = PBXBuildFile; fileRef = 542376072B0D9BFB008E6A1C /* ggml-quants.c */; };
|
542376082B0D9BFB008E6A1C /* ggml-quants.c in Sources */ = {isa = PBXBuildFile; fileRef = 542376072B0D9BFB008E6A1C /* ggml-quants.c */; settings = {COMPILER_FLAGS = "-O3"; }; };
|
||||||
5423760B2B0D9C4B008E6A1C /* ggml-backend.c in Sources */ = {isa = PBXBuildFile; fileRef = 5423760A2B0D9C4B008E6A1C /* ggml-backend.c */; };
|
5423760B2B0D9C4B008E6A1C /* ggml-backend.c in Sources */ = {isa = PBXBuildFile; fileRef = 5423760A2B0D9C4B008E6A1C /* ggml-backend.c */; settings = {COMPILER_FLAGS = "-O3"; }; };
|
||||||
542378792ACE3F3500834A7B /* ggml-metal.metal in Resources */ = {isa = PBXBuildFile; fileRef = 549479C82AC9E10B00E0F78B /* ggml-metal.metal */; };
|
542378792ACE3F3500834A7B /* ggml-metal.metal in Resources */ = {isa = PBXBuildFile; fileRef = 549479C82AC9E10B00E0F78B /* ggml-metal.metal */; };
|
||||||
542EA09D2AC8723900A8AEE9 /* ggml.c in Sources */ = {isa = PBXBuildFile; fileRef = 542EA09B2AC8723900A8AEE9 /* ggml.c */; settings = {COMPILER_FLAGS = "-DGGML_USE_ACCELERATE -DGGML_USE_METAL -DGGML_USE_K_QUANTS -O3"; }; };
|
542EA09D2AC8723900A8AEE9 /* ggml.c in Sources */ = {isa = PBXBuildFile; fileRef = 542EA09B2AC8723900A8AEE9 /* ggml.c */; settings = {COMPILER_FLAGS = "-DGGML_USE_ACCELERATE -DGGML_USE_METAL -DGGML_USE_K_QUANTS -O3"; }; };
|
||||||
542EA0A02AC8725700A8AEE9 /* ggml-alloc.c in Sources */ = {isa = PBXBuildFile; fileRef = 542EA09F2AC8725700A8AEE9 /* ggml-alloc.c */; };
|
542EA0A02AC8725700A8AEE9 /* ggml-alloc.c in Sources */ = {isa = PBXBuildFile; fileRef = 542EA09F2AC8725700A8AEE9 /* ggml-alloc.c */; settings = {COMPILER_FLAGS = "-O3"; }; };
|
||||||
542EA0A32AC8729100A8AEE9 /* llama.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 542EA0A12AC8729100A8AEE9 /* llama.cpp */; settings = {COMPILER_FLAGS = "-DGGML_USE_K_QUANTS -DGGML_USE_METAL -O3"; }; };
|
542EA0A32AC8729100A8AEE9 /* llama.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 542EA0A12AC8729100A8AEE9 /* llama.cpp */; settings = {COMPILER_FLAGS = "-DGGML_USE_K_QUANTS -DGGML_USE_METAL -O3"; }; };
|
||||||
549479CB2AC9E16000E0F78B /* Metal.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 549479CA2AC9E16000E0F78B /* Metal.framework */; };
|
549479CB2AC9E16000E0F78B /* Metal.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 549479CA2AC9E16000E0F78B /* Metal.framework */; };
|
||||||
549479CD2AC9E42A00E0F78B /* ggml-metal.m in Sources */ = {isa = PBXBuildFile; fileRef = 549479C52AC9E0F200E0F78B /* ggml-metal.m */; settings = {COMPILER_FLAGS = "-fno-objc-arc -DGGML_SWIFT -DGGML_USE_METAL -O3"; }; };
|
549479CD2AC9E42A00E0F78B /* ggml-metal.m in Sources */ = {isa = PBXBuildFile; fileRef = 549479C52AC9E0F200E0F78B /* ggml-metal.m */; settings = {COMPILER_FLAGS = "-fno-objc-arc -DGGML_SWIFT -DGGML_USE_METAL -O3"; }; };
|
||||||
|
7FA3D2B32B2EA2F600543F92 /* DownloadButton.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7FA3D2B22B2EA2F600543F92 /* DownloadButton.swift */; };
|
||||||
8A1C83772AC328BD0096AF73 /* llama_swiftuiApp.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8A1C83762AC328BD0096AF73 /* llama_swiftuiApp.swift */; };
|
8A1C83772AC328BD0096AF73 /* llama_swiftuiApp.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8A1C83762AC328BD0096AF73 /* llama_swiftuiApp.swift */; };
|
||||||
8A1C83792AC328BD0096AF73 /* ContentView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8A1C83782AC328BD0096AF73 /* ContentView.swift */; };
|
8A1C83792AC328BD0096AF73 /* ContentView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8A1C83782AC328BD0096AF73 /* ContentView.swift */; };
|
||||||
8A1C837B2AC328BE0096AF73 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 8A1C837A2AC328BE0096AF73 /* Assets.xcassets */; };
|
8A1C837B2AC328BE0096AF73 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 8A1C837A2AC328BE0096AF73 /* Assets.xcassets */; };
|
||||||
@ -40,6 +41,7 @@
|
|||||||
549479C62AC9E0F200E0F78B /* ggml-metal.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-metal.h"; path = "../../ggml-metal.h"; sourceTree = "<group>"; };
|
549479C62AC9E0F200E0F78B /* ggml-metal.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-metal.h"; path = "../../ggml-metal.h"; sourceTree = "<group>"; };
|
||||||
549479C82AC9E10B00E0F78B /* ggml-metal.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; name = "ggml-metal.metal"; path = "../../ggml-metal.metal"; sourceTree = "<group>"; };
|
549479C82AC9E10B00E0F78B /* ggml-metal.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; name = "ggml-metal.metal"; path = "../../ggml-metal.metal"; sourceTree = "<group>"; };
|
||||||
549479CA2AC9E16000E0F78B /* Metal.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Metal.framework; path = System/Library/Frameworks/Metal.framework; sourceTree = SDKROOT; };
|
549479CA2AC9E16000E0F78B /* Metal.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Metal.framework; path = System/Library/Frameworks/Metal.framework; sourceTree = SDKROOT; };
|
||||||
|
7FA3D2B22B2EA2F600543F92 /* DownloadButton.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = DownloadButton.swift; sourceTree = "<group>"; };
|
||||||
8A08D20A2AC73B1500FE6CD4 /* bridging-header.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "bridging-header.h"; sourceTree = "<group>"; };
|
8A08D20A2AC73B1500FE6CD4 /* bridging-header.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "bridging-header.h"; sourceTree = "<group>"; };
|
||||||
8A1C83732AC328BD0096AF73 /* llama.swiftui.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = llama.swiftui.app; sourceTree = BUILT_PRODUCTS_DIR; };
|
8A1C83732AC328BD0096AF73 /* llama.swiftui.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = llama.swiftui.app; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||||
8A1C83762AC328BD0096AF73 /* llama_swiftuiApp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = llama_swiftuiApp.swift; sourceTree = "<group>"; };
|
8A1C83762AC328BD0096AF73 /* llama_swiftuiApp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = llama_swiftuiApp.swift; sourceTree = "<group>"; };
|
||||||
@ -47,7 +49,6 @@
|
|||||||
8A1C837A2AC328BE0096AF73 /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = "<group>"; };
|
8A1C837A2AC328BE0096AF73 /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = "<group>"; };
|
||||||
8A1C837D2AC328BE0096AF73 /* Preview Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = "Preview Assets.xcassets"; sourceTree = "<group>"; };
|
8A1C837D2AC328BE0096AF73 /* Preview Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = "Preview Assets.xcassets"; sourceTree = "<group>"; };
|
||||||
8A39BE092AC7601000BFEB40 /* Accelerate.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Accelerate.framework; path = System/Library/Frameworks/Accelerate.framework; sourceTree = SDKROOT; };
|
8A39BE092AC7601000BFEB40 /* Accelerate.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Accelerate.framework; path = System/Library/Frameworks/Accelerate.framework; sourceTree = SDKROOT; };
|
||||||
8A3F841F2AC4C824005E2EE8 /* llama-2-7b-chat.Q2_K.gguf */ = {isa = PBXFileReference; lastKnownFileType = file; path = "llama-2-7b-chat.Q2_K.gguf"; sourceTree = "<group>"; };
|
|
||||||
8A3F84232AC4C891005E2EE8 /* models */ = {isa = PBXFileReference; lastKnownFileType = folder; name = models; path = llama.swiftui/Resources/models; sourceTree = "<group>"; };
|
8A3F84232AC4C891005E2EE8 /* models */ = {isa = PBXFileReference; lastKnownFileType = folder; name = models; path = llama.swiftui/Resources/models; sourceTree = "<group>"; };
|
||||||
8A907F322AC7134E006146EA /* LibLlama.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LibLlama.swift; sourceTree = "<group>"; };
|
8A907F322AC7134E006146EA /* LibLlama.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LibLlama.swift; sourceTree = "<group>"; };
|
||||||
8A9F7C4C2AC332EE008AE1EA /* LlamaState.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LlamaState.swift; sourceTree = "<group>"; };
|
8A9F7C4C2AC332EE008AE1EA /* LlamaState.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LlamaState.swift; sourceTree = "<group>"; };
|
||||||
@ -147,7 +148,6 @@
|
|||||||
8A3F84112AC4BD8C005E2EE8 /* models */ = {
|
8A3F84112AC4BD8C005E2EE8 /* models */ = {
|
||||||
isa = PBXGroup;
|
isa = PBXGroup;
|
||||||
children = (
|
children = (
|
||||||
8A3F841F2AC4C824005E2EE8 /* llama-2-7b-chat.Q2_K.gguf */,
|
|
||||||
);
|
);
|
||||||
path = models;
|
path = models;
|
||||||
sourceTree = "<group>";
|
sourceTree = "<group>";
|
||||||
@ -164,6 +164,7 @@
|
|||||||
8A9F7C4A2AC332BF008AE1EA /* UI */ = {
|
8A9F7C4A2AC332BF008AE1EA /* UI */ = {
|
||||||
isa = PBXGroup;
|
isa = PBXGroup;
|
||||||
children = (
|
children = (
|
||||||
|
7FA3D2B22B2EA2F600543F92 /* DownloadButton.swift */,
|
||||||
8A1C83782AC328BD0096AF73 /* ContentView.swift */,
|
8A1C83782AC328BD0096AF73 /* ContentView.swift */,
|
||||||
);
|
);
|
||||||
path = UI;
|
path = UI;
|
||||||
@ -262,6 +263,7 @@
|
|||||||
8A9F7C4D2AC332EE008AE1EA /* LlamaState.swift in Sources */,
|
8A9F7C4D2AC332EE008AE1EA /* LlamaState.swift in Sources */,
|
||||||
8A1C83792AC328BD0096AF73 /* ContentView.swift in Sources */,
|
8A1C83792AC328BD0096AF73 /* ContentView.swift in Sources */,
|
||||||
8A1C83772AC328BD0096AF73 /* llama_swiftuiApp.swift in Sources */,
|
8A1C83772AC328BD0096AF73 /* llama_swiftuiApp.swift in Sources */,
|
||||||
|
7FA3D2B32B2EA2F600543F92 /* DownloadButton.swift in Sources */,
|
||||||
542EA0A02AC8725700A8AEE9 /* ggml-alloc.c in Sources */,
|
542EA0A02AC8725700A8AEE9 /* ggml-alloc.c in Sources */,
|
||||||
5423760B2B0D9C4B008E6A1C /* ggml-backend.c in Sources */,
|
5423760B2B0D9C4B008E6A1C /* ggml-backend.c in Sources */,
|
||||||
);
|
);
|
||||||
|
@ -3,24 +3,26 @@ import Foundation
|
|||||||
@MainActor
|
@MainActor
|
||||||
class LlamaState: ObservableObject {
|
class LlamaState: ObservableObject {
|
||||||
@Published var messageLog = ""
|
@Published var messageLog = ""
|
||||||
|
@Published var cacheCleared = false
|
||||||
|
|
||||||
private var llamaContext: LlamaContext?
|
private var llamaContext: LlamaContext?
|
||||||
private var modelUrl: URL? {
|
private var defaultModelUrl: URL? {
|
||||||
Bundle.main.url(forResource: "q8_0", withExtension: "gguf", subdirectory: "models")
|
Bundle.main.url(forResource: "ggml-model", withExtension: "gguf", subdirectory: "models")
|
||||||
// Bundle.main.url(forResource: "llama-2-7b-chat", withExtension: "Q2_K.gguf", subdirectory: "models")
|
// Bundle.main.url(forResource: "llama-2-7b-chat", withExtension: "Q2_K.gguf", subdirectory: "models")
|
||||||
}
|
}
|
||||||
|
|
||||||
init() {
|
init() {
|
||||||
do {
|
do {
|
||||||
try loadModel()
|
try loadModel(modelUrl: defaultModelUrl)
|
||||||
} catch {
|
} catch {
|
||||||
messageLog += "Error!\n"
|
messageLog += "Error!\n"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private func loadModel() throws {
|
func loadModel(modelUrl: URL?) throws {
|
||||||
messageLog += "Loading model...\n"
|
messageLog += "Loading model...\n"
|
||||||
if let modelUrl {
|
if let modelUrl {
|
||||||
llamaContext = try LlamaContext.createContext(path: modelUrl.path())
|
llamaContext = try LlamaContext.create_context(path: modelUrl.path())
|
||||||
messageLog += "Loaded model \(modelUrl.lastPathComponent)\n"
|
messageLog += "Loaded model \(modelUrl.lastPathComponent)\n"
|
||||||
} else {
|
} else {
|
||||||
messageLog += "Could not locate model\n"
|
messageLog += "Could not locate model\n"
|
||||||
@ -31,7 +33,7 @@ class LlamaState: ObservableObject {
|
|||||||
guard let llamaContext else {
|
guard let llamaContext else {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
messageLog += "Attempting to complete text...\n"
|
|
||||||
await llamaContext.completion_init(text: text)
|
await llamaContext.completion_init(text: text)
|
||||||
messageLog += "\(text)"
|
messageLog += "\(text)"
|
||||||
|
|
||||||
@ -42,4 +44,42 @@ class LlamaState: ObservableObject {
|
|||||||
await llamaContext.clear()
|
await llamaContext.clear()
|
||||||
messageLog += "\n\ndone\n"
|
messageLog += "\n\ndone\n"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func bench() async {
|
||||||
|
guard let llamaContext else {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
messageLog += "\n"
|
||||||
|
messageLog += "Running benchmark...\n"
|
||||||
|
messageLog += "Model info: "
|
||||||
|
messageLog += await llamaContext.model_info() + "\n"
|
||||||
|
|
||||||
|
let t_start = DispatchTime.now().uptimeNanoseconds
|
||||||
|
await llamaContext.bench(pp: 8, tg: 4, pl: 1) // heat up
|
||||||
|
let t_end = DispatchTime.now().uptimeNanoseconds
|
||||||
|
|
||||||
|
let t_heat = Double(t_end - t_start) / 1_000_000_000.0
|
||||||
|
messageLog += "Heat up time: \(t_heat) seconds, please wait...\n"
|
||||||
|
|
||||||
|
// if more than 5 seconds, then we're probably running on a slow device
|
||||||
|
if t_heat > 5.0 {
|
||||||
|
messageLog += "Heat up time is too long, aborting benchmark\n"
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
let result = await llamaContext.bench(pp: 512, tg: 128, pl: 1, nr: 3)
|
||||||
|
|
||||||
|
messageLog += "\(result)"
|
||||||
|
messageLog += "\n"
|
||||||
|
}
|
||||||
|
|
||||||
|
func clear() async {
|
||||||
|
guard let llamaContext else {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
await llamaContext.clear()
|
||||||
|
messageLog = ""
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -5,24 +5,97 @@ struct ContentView: View {
|
|||||||
|
|
||||||
@State private var multiLineText = ""
|
@State private var multiLineText = ""
|
||||||
|
|
||||||
|
private static func cleanupModelCaches() {
|
||||||
|
// Delete all models (*.gguf)
|
||||||
|
let fileManager = FileManager.default
|
||||||
|
let documentsUrl = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)[0]
|
||||||
|
do {
|
||||||
|
let fileURLs = try fileManager.contentsOfDirectory(at: documentsUrl, includingPropertiesForKeys: nil)
|
||||||
|
for fileURL in fileURLs {
|
||||||
|
if fileURL.pathExtension == "gguf" {
|
||||||
|
try fileManager.removeItem(at: fileURL)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
print("Error while enumerating files \(documentsUrl.path): \(error.localizedDescription)")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
var body: some View {
|
var body: some View {
|
||||||
VStack {
|
VStack {
|
||||||
ScrollView(.vertical) {
|
ScrollView(.vertical, showsIndicators: true) {
|
||||||
Text(llamaState.messageLog)
|
Text(llamaState.messageLog)
|
||||||
|
.font(.system(size: 12))
|
||||||
|
.frame(maxWidth: .infinity, alignment: .leading)
|
||||||
|
.padding()
|
||||||
|
.onTapGesture {
|
||||||
|
UIApplication.shared.sendAction(#selector(UIResponder.resignFirstResponder), to: nil, from: nil, for: nil)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
TextEditor(text: $multiLineText)
|
TextEditor(text: $multiLineText)
|
||||||
.frame(height: 200)
|
.frame(height: 80)
|
||||||
.padding()
|
.padding()
|
||||||
.border(Color.gray, width: 0.5)
|
.border(Color.gray, width: 0.5)
|
||||||
Button(action: {
|
|
||||||
|
HStack {
|
||||||
|
Button("Send") {
|
||||||
sendText()
|
sendText()
|
||||||
}) {
|
}
|
||||||
Text("Send")
|
.padding(8)
|
||||||
.padding()
|
|
||||||
.background(Color.blue)
|
.background(Color.blue)
|
||||||
.foregroundColor(.white)
|
.foregroundColor(.white)
|
||||||
.cornerRadius(8)
|
.cornerRadius(8)
|
||||||
|
|
||||||
|
Button("Bench") {
|
||||||
|
bench()
|
||||||
|
}
|
||||||
|
.padding(8)
|
||||||
|
.background(Color.blue)
|
||||||
|
.foregroundColor(.white)
|
||||||
|
.cornerRadius(8)
|
||||||
|
|
||||||
|
Button("Clear") {
|
||||||
|
clear()
|
||||||
|
}
|
||||||
|
.padding(8)
|
||||||
|
.background(Color.blue)
|
||||||
|
.foregroundColor(.white)
|
||||||
|
.cornerRadius(8)
|
||||||
|
|
||||||
|
Button("Copy") {
|
||||||
|
UIPasteboard.general.string = llamaState.messageLog
|
||||||
|
}
|
||||||
|
.padding(8)
|
||||||
|
.background(Color.blue)
|
||||||
|
.foregroundColor(.white)
|
||||||
|
.cornerRadius(8)
|
||||||
|
}
|
||||||
|
|
||||||
|
VStack {
|
||||||
|
DownloadButton(
|
||||||
|
llamaState: llamaState,
|
||||||
|
modelName: "TinyLlama-1.1B (Q4_0)",
|
||||||
|
modelUrl: "https://huggingface.co/TheBloke/TinyLlama-1.1B-1T-OpenOrca-GGUF/resolve/main/tinyllama-1.1b-1t-openorca.Q4_0.gguf?download=true",
|
||||||
|
filename: "tinyllama-1.1b-1t-openorca.Q4_0.gguf"
|
||||||
|
)
|
||||||
|
.font(.system(size: 12))
|
||||||
|
.padding(.top, 4)
|
||||||
|
|
||||||
|
DownloadButton(
|
||||||
|
llamaState: llamaState,
|
||||||
|
modelName: "TinyLlama-1.1B (Q8_0)",
|
||||||
|
modelUrl: "https://huggingface.co/TheBloke/TinyLlama-1.1B-1T-OpenOrca-GGUF/resolve/main/tinyllama-1.1b-1t-openorca.Q8_0.gguf?download=true",
|
||||||
|
filename: "tinyllama-1.1b-1t-openorca.Q8_0.gguf"
|
||||||
|
)
|
||||||
|
.font(.system(size: 12))
|
||||||
|
|
||||||
|
Button("Clear downloaded models") {
|
||||||
|
ContentView.cleanupModelCaches()
|
||||||
|
llamaState.cacheCleared = true
|
||||||
|
}
|
||||||
|
.padding(8)
|
||||||
|
.font(.system(size: 12))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
.padding()
|
.padding()
|
||||||
@ -34,9 +107,20 @@ struct ContentView: View {
|
|||||||
multiLineText = ""
|
multiLineText = ""
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func bench() {
|
||||||
|
Task {
|
||||||
|
await llamaState.bench()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func clear() {
|
||||||
|
Task {
|
||||||
|
await llamaState.clear()
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
/*
|
|
||||||
#Preview {
|
//#Preview {
|
||||||
ContentView()
|
// ContentView()
|
||||||
}
|
//}
|
||||||
*/
|
|
||||||
|
122
examples/llama.swiftui/llama.swiftui/UI/DownloadButton.swift
Normal file
122
examples/llama.swiftui/llama.swiftui/UI/DownloadButton.swift
Normal file
@ -0,0 +1,122 @@
|
|||||||
|
import SwiftUI
|
||||||
|
|
||||||
|
struct DownloadButton: View {
|
||||||
|
@ObservedObject private var llamaState: LlamaState
|
||||||
|
private var modelName: String
|
||||||
|
private var modelUrl: String
|
||||||
|
private var filename: String
|
||||||
|
|
||||||
|
@State private var status: String
|
||||||
|
|
||||||
|
@State private var downloadTask: URLSessionDownloadTask?
|
||||||
|
@State private var progress = 0.0
|
||||||
|
@State private var observation: NSKeyValueObservation?
|
||||||
|
|
||||||
|
private static func getFileURL(filename: String) -> URL {
|
||||||
|
FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)[0].appendingPathComponent(filename)
|
||||||
|
}
|
||||||
|
|
||||||
|
private func checkFileExistenceAndUpdateStatus() {
|
||||||
|
}
|
||||||
|
|
||||||
|
init(llamaState: LlamaState, modelName: String, modelUrl: String, filename: String) {
|
||||||
|
self.llamaState = llamaState
|
||||||
|
self.modelName = modelName
|
||||||
|
self.modelUrl = modelUrl
|
||||||
|
self.filename = filename
|
||||||
|
|
||||||
|
let fileURL = DownloadButton.getFileURL(filename: filename)
|
||||||
|
status = FileManager.default.fileExists(atPath: fileURL.path) ? "downloaded" : "download"
|
||||||
|
}
|
||||||
|
|
||||||
|
private func download() {
|
||||||
|
status = "downloading"
|
||||||
|
print("Downloading model \(modelName) from \(modelUrl)")
|
||||||
|
guard let url = URL(string: modelUrl) else { return }
|
||||||
|
let fileURL = DownloadButton.getFileURL(filename: filename)
|
||||||
|
|
||||||
|
downloadTask = URLSession.shared.downloadTask(with: url) { temporaryURL, response, error in
|
||||||
|
if let error = error {
|
||||||
|
print("Error: \(error.localizedDescription)")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
guard let response = response as? HTTPURLResponse, (200...299).contains(response.statusCode) else {
|
||||||
|
print("Server error!")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
do {
|
||||||
|
if let temporaryURL = temporaryURL {
|
||||||
|
try FileManager.default.copyItem(at: temporaryURL, to: fileURL)
|
||||||
|
print("Writing to \(filename) completed")
|
||||||
|
|
||||||
|
llamaState.cacheCleared = false
|
||||||
|
|
||||||
|
status = "downloaded"
|
||||||
|
}
|
||||||
|
} catch let err {
|
||||||
|
print("Error: \(err.localizedDescription)")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
observation = downloadTask?.progress.observe(\.fractionCompleted) { progress, _ in
|
||||||
|
self.progress = progress.fractionCompleted
|
||||||
|
}
|
||||||
|
|
||||||
|
downloadTask?.resume()
|
||||||
|
}
|
||||||
|
|
||||||
|
var body: some View {
|
||||||
|
VStack {
|
||||||
|
if status == "download" {
|
||||||
|
Button(action: download) {
|
||||||
|
Text("Download " + modelName)
|
||||||
|
}
|
||||||
|
} else if status == "downloading" {
|
||||||
|
Button(action: {
|
||||||
|
downloadTask?.cancel()
|
||||||
|
status = "download"
|
||||||
|
}) {
|
||||||
|
Text("\(modelName) (Downloading \(Int(progress * 100))%)")
|
||||||
|
}
|
||||||
|
} else if status == "downloaded" {
|
||||||
|
Button(action: {
|
||||||
|
let fileURL = DownloadButton.getFileURL(filename: filename)
|
||||||
|
if !FileManager.default.fileExists(atPath: fileURL.path) {
|
||||||
|
download()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
do {
|
||||||
|
try llamaState.loadModel(modelUrl: fileURL)
|
||||||
|
} catch let err {
|
||||||
|
print("Error: \(err.localizedDescription)")
|
||||||
|
}
|
||||||
|
}) {
|
||||||
|
Text("\(modelName) (Downloaded)")
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
Text("Unknown status")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
.onDisappear() {
|
||||||
|
downloadTask?.cancel()
|
||||||
|
}
|
||||||
|
.onChange(of: llamaState.cacheCleared) { newValue in
|
||||||
|
if newValue {
|
||||||
|
downloadTask?.cancel()
|
||||||
|
let fileURL = DownloadButton.getFileURL(filename: filename)
|
||||||
|
status = FileManager.default.fileExists(atPath: fileURL.path) ? "downloaded" : "download"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// #Preview {
|
||||||
|
// DownloadButton(
|
||||||
|
// llamaState: LlamaState(),
|
||||||
|
// modelName: "TheBloke / TinyLlama-1.1B-1T-OpenOrca-GGUF (Q4_0)",
|
||||||
|
// modelUrl: "https://huggingface.co/TheBloke/TinyLlama-1.1B-1T-OpenOrca-GGUF/resolve/main/tinyllama-1.1b-1t-openorca.Q4_0.gguf?download=true",
|
||||||
|
// filename: "tinyllama-1.1b-1t-openorca.Q4_0.gguf"
|
||||||
|
// )
|
||||||
|
// }
|
33
llama.cpp
33
llama.cpp
@ -2397,25 +2397,25 @@ static std::string llama_model_ftype_name(llama_ftype ftype) {
|
|||||||
|
|
||||||
switch (ftype) {
|
switch (ftype) {
|
||||||
case LLAMA_FTYPE_ALL_F32: return "all F32";
|
case LLAMA_FTYPE_ALL_F32: return "all F32";
|
||||||
case LLAMA_FTYPE_MOSTLY_F16: return "mostly F16";
|
case LLAMA_FTYPE_MOSTLY_F16: return "F16";
|
||||||
case LLAMA_FTYPE_MOSTLY_Q4_0: return "mostly Q4_0";
|
case LLAMA_FTYPE_MOSTLY_Q4_0: return "Q4_0";
|
||||||
case LLAMA_FTYPE_MOSTLY_Q4_1: return "mostly Q4_1";
|
case LLAMA_FTYPE_MOSTLY_Q4_1: return "Q4_1";
|
||||||
case LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16:
|
case LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16:
|
||||||
return "mostly Q4_1, some F16";
|
return "Q4_1, some F16";
|
||||||
case LLAMA_FTYPE_MOSTLY_Q5_0: return "mostly Q5_0";
|
case LLAMA_FTYPE_MOSTLY_Q5_0: return "Q5_0";
|
||||||
case LLAMA_FTYPE_MOSTLY_Q5_1: return "mostly Q5_1";
|
case LLAMA_FTYPE_MOSTLY_Q5_1: return "Q5_1";
|
||||||
case LLAMA_FTYPE_MOSTLY_Q8_0: return "mostly Q8_0";
|
case LLAMA_FTYPE_MOSTLY_Q8_0: return "Q8_0";
|
||||||
|
|
||||||
// K-quants
|
// K-quants
|
||||||
case LLAMA_FTYPE_MOSTLY_Q2_K: return "mostly Q2_K";
|
case LLAMA_FTYPE_MOSTLY_Q2_K: return "Q2_K";
|
||||||
case LLAMA_FTYPE_MOSTLY_Q3_K_S: return "mostly Q3_K - Small";
|
case LLAMA_FTYPE_MOSTLY_Q3_K_S: return "Q3_K - Small";
|
||||||
case LLAMA_FTYPE_MOSTLY_Q3_K_M: return "mostly Q3_K - Medium";
|
case LLAMA_FTYPE_MOSTLY_Q3_K_M: return "Q3_K - Medium";
|
||||||
case LLAMA_FTYPE_MOSTLY_Q3_K_L: return "mostly Q3_K - Large";
|
case LLAMA_FTYPE_MOSTLY_Q3_K_L: return "Q3_K - Large";
|
||||||
case LLAMA_FTYPE_MOSTLY_Q4_K_S: return "mostly Q4_K - Small";
|
case LLAMA_FTYPE_MOSTLY_Q4_K_S: return "Q4_K - Small";
|
||||||
case LLAMA_FTYPE_MOSTLY_Q4_K_M: return "mostly Q4_K - Medium";
|
case LLAMA_FTYPE_MOSTLY_Q4_K_M: return "Q4_K - Medium";
|
||||||
case LLAMA_FTYPE_MOSTLY_Q5_K_S: return "mostly Q5_K - Small";
|
case LLAMA_FTYPE_MOSTLY_Q5_K_S: return "Q5_K - Small";
|
||||||
case LLAMA_FTYPE_MOSTLY_Q5_K_M: return "mostly Q5_K - Medium";
|
case LLAMA_FTYPE_MOSTLY_Q5_K_M: return "Q5_K - Medium";
|
||||||
case LLAMA_FTYPE_MOSTLY_Q6_K: return "mostly Q6_K";
|
case LLAMA_FTYPE_MOSTLY_Q6_K: return "Q6_K";
|
||||||
|
|
||||||
default: return "unknown, may not work";
|
default: return "unknown, may not work";
|
||||||
}
|
}
|
||||||
@ -2533,6 +2533,7 @@ static void llm_load_hparams(
|
|||||||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
|
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
|
||||||
|
|
||||||
switch (hparams.n_layer) {
|
switch (hparams.n_layer) {
|
||||||
|
case 22: model.type = e_model::MODEL_1B; break;
|
||||||
case 26: model.type = e_model::MODEL_3B; break;
|
case 26: model.type = e_model::MODEL_3B; break;
|
||||||
case 32: model.type = e_model::MODEL_7B; break;
|
case 32: model.type = e_model::MODEL_7B; break;
|
||||||
case 40: model.type = e_model::MODEL_13B; break;
|
case 40: model.type = e_model::MODEL_13B; break;
|
||||||
|
Loading…
Reference in New Issue
Block a user