mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-07 11:23:56 +01:00
convert : rm quantization version
This commit is contained in:
parent
1d60468eee
commit
bf2dad3100
@ -23,6 +23,7 @@ def permute(weights: NDArray, n_head: int) -> NDArray:
|
||||
.swapaxes(1, 2)
|
||||
.reshape(weights.shape))
|
||||
|
||||
|
||||
def count_model_parts(dir_model: str) -> int:
|
||||
num_parts = 0
|
||||
for filename in os.listdir(dir_model):
|
||||
@ -33,6 +34,7 @@ def count_model_parts(dir_model: str) -> int:
|
||||
print("gguf: found " + str(num_parts) + " model parts")
|
||||
return num_parts
|
||||
|
||||
|
||||
if len(sys.argv) < 3:
|
||||
print("Usage: convert-h5-to-ggml.py dir-model ftype\n")
|
||||
print(" ftype == 0 -> float32")
|
||||
@ -86,7 +88,6 @@ block_count = hparams["num_hidden_layers"]
|
||||
|
||||
gguf_writer.add_name(last_dir)
|
||||
gguf_writer.add_architecture(llm_arch)
|
||||
gguf_writer.add_quantization_version(ftype)
|
||||
guff_writer.add_source_hf_repo(hf_repo)
|
||||
gguf_writer.add_context_length(llm_arch, hparams["max_position_embeddings"])
|
||||
gguf_writer.add_embedding_length(llm_arch, hparams["hidden_size"])
|
||||
|
Loading…
Reference in New Issue
Block a user