mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-11 21:10:24 +01:00
swift : fix token_to_piece implementation (#4278)
* Fix token_to_piece implementation in Swift * Fix errors
This commit is contained in:
parent
511f52c334
commit
b220222a64
@ -230,18 +230,15 @@ private func token_to_piece(token: llama_token, buffer: inout [CChar]) -> String
|
|||||||
var result = [CChar](repeating: 0, count: 8)
|
var result = [CChar](repeating: 0, count: 8)
|
||||||
let nTokens = llama_token_to_piece(model, token, &result, Int32(result.count))
|
let nTokens = llama_token_to_piece(model, token, &result, Int32(result.count))
|
||||||
if nTokens < 0 {
|
if nTokens < 0 {
|
||||||
if result.count >= -Int(nTokens) {
|
let actualTokensCount = -Int(nTokens)
|
||||||
result.removeLast(-Int(nTokens))
|
result = .init(repeating: 0, count: actualTokensCount)
|
||||||
} else {
|
|
||||||
result.removeAll()
|
|
||||||
}
|
|
||||||
let check = llama_token_to_piece(
|
let check = llama_token_to_piece(
|
||||||
model,
|
model,
|
||||||
token,
|
token,
|
||||||
&result,
|
&result,
|
||||||
Int32(result.count)
|
Int32(result.count)
|
||||||
)
|
)
|
||||||
assert(check == nTokens)
|
assert(check == actualTokensCount)
|
||||||
} else {
|
} else {
|
||||||
result.removeLast(result.count - Int(nTokens))
|
result.removeLast(result.count - Int(nTokens))
|
||||||
}
|
}
|
||||||
@ -259,5 +256,4 @@ private func token_to_piece(token: llama_token, buffer: inout [CChar]) -> String
|
|||||||
buffer = []
|
buffer = []
|
||||||
return bufferString
|
return bufferString
|
||||||
}
|
}
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
@ -164,13 +164,21 @@ actor LlamaContext {
|
|||||||
private func token_to_piece(token: llama_token) -> String {
|
private func token_to_piece(token: llama_token) -> String {
|
||||||
let result = UnsafeMutablePointer<Int8>.allocate(capacity: 8)
|
let result = UnsafeMutablePointer<Int8>.allocate(capacity: 8)
|
||||||
result.initialize(repeating: Int8(0), count: 8)
|
result.initialize(repeating: Int8(0), count: 8)
|
||||||
|
defer {
|
||||||
let _ = llama_token_to_piece(model, token, result, 8)
|
|
||||||
|
|
||||||
let resultStr = String(cString: result)
|
|
||||||
|
|
||||||
result.deallocate()
|
result.deallocate()
|
||||||
|
}
|
||||||
|
let nTokens = llama_token_to_piece(model, token, result, 8)
|
||||||
|
|
||||||
return resultStr
|
if nTokens < 0 {
|
||||||
|
let newResult = UnsafeMutablePointer<Int8>.allocate(capacity: Int(-nTokens))
|
||||||
|
newResult.initialize(repeating: Int8(0), count: Int(-nTokens))
|
||||||
|
defer {
|
||||||
|
newResult.deallocate()
|
||||||
|
}
|
||||||
|
_ = llama_token_to_piece(model, token, newResult, -nTokens)
|
||||||
|
return String(cString: newResult)
|
||||||
|
} else {
|
||||||
|
return String(cString: result)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user