fix conflicts

This commit is contained in:
M. Yusuf Sarıgöz 2023-08-13 13:35:40 +03:00
commit 1d60468eee
2 changed files with 220 additions and 86 deletions

View File

@ -1,4 +1,4 @@
# Quick and dirty HF gptneox--> gguf conversion # HF gptneox--> gguf conversion
import gguf import gguf
import gguf_tensor_map as tmap import gguf_tensor_map as tmap
@ -9,7 +9,8 @@ import json
import numpy as np import numpy as np
from typing import Any, List from typing import Any, List
from pathlib import Path from pathlib import Path
from transformers import AutoTokenizer, AutoModelForCausalLM import torch
from transformers import AutoTokenizer
# ref: https://github.com/openai/gpt-2/blob/master/src/encoder.py # ref: https://github.com/openai/gpt-2/blob/master/src/encoder.py
def bytes_to_unicode(): def bytes_to_unicode():
@ -33,6 +34,15 @@ def bytes_to_unicode():
cs = [chr(n) for n in cs] cs = [chr(n) for n in cs]
return dict(zip(bs, cs)) return dict(zip(bs, cs))
def count_model_parts(dir_model: str) -> int:
num_parts = 0
for filename in os.listdir(dir_model):
if filename.startswith("pytorch_model-"):
num_parts += 1
if num_parts > 0:
print("gguf: found " + str(num_parts) + " model parts")
return num_parts
if len(sys.argv) < 3: if len(sys.argv) < 3:
print("Usage: convert-h5-to-ggml.py dir-model ftype\n") print("Usage: convert-h5-to-ggml.py dir-model ftype\n")
@ -70,9 +80,8 @@ if hparams["architectures"][0] != "GPTNeoXForCausalLM":
print("Model architecture not supported: " + hparams["architectures"][0] ) print("Model architecture not supported: " + hparams["architectures"][0] )
sys.exit() sys.exit()
# get number of model parts
model = AutoModelForCausalLM.from_pretrained(dir_model, low_cpu_mem_usage=True, trust_remote_code=True) num_parts = count_model_parts(dir_model)
list_vars = model.state_dict()
gguf_writer = gguf.GGUFWriter.open(fname_out) gguf_writer = gguf.GGUFWriter.open(fname_out)
@ -183,37 +192,58 @@ tensor_map = tmap.get_tensor_map(block_count)
# tensor info # tensor info
print("gguf: get tensor metadata") print("gguf: get tensor metadata")
for name in list_vars.keys(): if num_parts == 0:
data = list_vars[name].squeeze().numpy() part_names = ("pytorch_model.bin",)
else:
part_names = (
f"pytorch_model-{n:05}-of-{num_parts:05}.bin" for n in range(1, num_parts + 1)
)
# we don't need these for part_name in part_names:
if name.endswith(".attention.masked_bias") or name.endswith(".attention.bias") or name.endswith(".attention.rotary_emb.inv_freq"): print("gguf: loading model part '"+ part_name + "'")
continue model_part = torch.load(f"{dir_model}/{part_name}", map_location="cpu")
# map tensor names for name in model_part.keys():
if name.endswith(".weight") and name[:-7] in tensor_map: data = model_part[name]
name = tensor_map[name[:-7]] + ".weight"
elif name.endswith(".bias") and name[:-5] in tensor_map:
name = tensor_map[name[:-5]] + ".bias"
else:
print( "Can not map tensor '" + name + "'" )
sys.exit()
n_dims = len(data.shape) # we don't need these
data_dtype = data.dtype if name.endswith(".attention.masked_bias") or name.endswith(".attention.bias") or name.endswith(".attention.rotary_emb.inv_freq"):
continue
# print( name + " dims " + str(n_dims) + " dtype " + str(data.dtype) )
if data.dtype != np.float16 and data.dtype != np.float32:
# convert any unsupported data types to float32 # convert any unsupported data types to float32
data_dtype = np.float32 if data.dtype != torch.float16 and data.dtype != torch.float32:
elif ftype == 1 and data.dtype == np.float32 and name.endswith(".weight") and n_dims == 2: data = data.to(torch.float32)
data = data.squeeze().numpy()
# map tensor names
if name.endswith(".weight") and name[:-7] in tensor_map:
name = tensor_map[name[:-7]] + ".weight"
elif name.endswith(".bias") and name[:-5] in tensor_map:
name = tensor_map[name[:-5]] + ".bias"
else:
print( "Can not map tensor '" + name + "'" )
sys.exit()
n_dims = len(data.shape)
data_dtype = data.dtype
# if f32 desired, convert any float16 to float32
if ftype == 0 and data.dtype == np.float16:
data_dtype = np.float32
# TODO: Why cant we use these float16 as-is? There should be not reason to store float16 as float32
if ftype == 1 and data.dtype == np.float16 and n_dims == 1:
data_dtype = np.float32
# if f16 desired, convert any float32 2-dim weight tensors to float16 # if f16 desired, convert any float32 2-dim weight tensors to float16
data_dtype = np.float16 if ftype == 1 and data.dtype == np.float32 and name.endswith(".weight") and n_dims == 2:
data_dtype = np.float16
data_nbytes = data.size * 2 if data_dtype == np.float16 else data.size * 4 data_nbytes = data.size * 2 if data_dtype == np.float16 else data.size * 4
gguf_writer.add_tensor_info(name, data.shape, data_dtype, data_nbytes)
gguf_writer.add_tensor_info(name, data.shape, data_dtype, data_nbytes)
print("gguf: write header") print("gguf: write header")
gguf_writer.write_header_to_file() gguf_writer.write_header_to_file()
@ -225,24 +255,59 @@ gguf_writer.write_ti_data_to_file()
# tensor data # tensor data
print("gguf: convert and write tensor data") print("gguf: convert and write tensor data")
for name in list_vars.keys(): if num_parts == 0:
data = list_vars[name].squeeze().numpy() part_names = ("pytorch_model.bin",)
else:
part_names = (
f"pytorch_model-{n:05}-of-{num_parts:05}.bin" for n in range(1, num_parts + 1)
)
# we don't need these for part_name in part_names:
if name.endswith(".attention.masked_bias") or name.endswith(".attention.bias") or name.endswith(".attention.rotary_emb.inv_freq"): print("gguf: loading model part '"+ part_name + "'")
continue model_part = torch.load(f"{dir_model}/{part_name}", map_location="cpu")
n_dims = len(data.shape) for name in model_part.keys():
data_dtype = data.dtype data = model_part[name]
old_dtype = data.dtype
# we don't need these
if name.endswith(".attention.masked_bias") or name.endswith(".attention.bias") or name.endswith(".attention.rotary_emb.inv_freq"):
continue
if data_dtype != np.float16 and data_dtype != np.float32:
# convert any unsupported data types to float32 # convert any unsupported data types to float32
data = data.astype(np.float32) if data.dtype != torch.float16 and data.dtype != torch.float32:
elif ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2: data = data.to(torch.float32)
# if f16 desired, convert any float32 2-dim weight tensors to float16
data = data.astype(np.float16)
gguf_writer.write_tensor_to_file(data) data = data.squeeze().numpy()
# map tensor names
if name.endswith(".weight") and name[:-7] in tensor_map:
name = tensor_map[name[:-7]] + ".weight"
elif name.endswith(".bias") and name[:-5] in tensor_map:
name = tensor_map[name[:-5]] + ".bias"
else:
print( "Can not map tensor '" + name + "'" )
sys.exit()
n_dims = len(data.shape)
data_dtype = data.dtype
# if f32 desired, convert any float16 to float32
if ftype == 0 and data.dtype == np.float16:
data = data.astype(np.float32)
# TODO: Why cant we use these float16 as-is? There should be not reason to store float16 as float32
if ftype == 1 and data_dtype == np.float16 and n_dims == 1:
data = data.astype(np.float32)
# if f16 desired, convert any float32 2-dim weight tensors to float16
if ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2:
data = data.astype(np.float16)
print( name + ", shape " + str(len(data.shape)) + ", " + str(old_dtype) + " --> " + str(data.dtype))
gguf_writer.write_tensor_to_file(data)
gguf_writer.close() gguf_writer.close()

View File

@ -1,4 +1,4 @@
# Quick and dirty HF llama --> gguf conversion, GQA/70b wont work # HF llama --> gguf conversion, GQA/70b not supported
import gguf import gguf
import gguf_tensor_map as tmap import gguf_tensor_map as tmap
@ -9,7 +9,7 @@ import json
import numpy as np import numpy as np
from typing import Any, List from typing import Any, List
from pathlib import Path from pathlib import Path
from transformers import AutoModelForCausalLM import torch
from sentencepiece import SentencePieceProcessor from sentencepiece import SentencePieceProcessor
@ -23,6 +23,15 @@ def permute(weights: NDArray, n_head: int) -> NDArray:
.swapaxes(1, 2) .swapaxes(1, 2)
.reshape(weights.shape)) .reshape(weights.shape))
def count_model_parts(dir_model: str) -> int:
num_parts = 0
for filename in os.listdir(dir_model):
if filename.startswith("pytorch_model-"):
num_parts += 1
if num_parts > 0:
print("gguf: found " + str(num_parts) + " model parts")
return num_parts
if len(sys.argv) < 3: if len(sys.argv) < 3:
print("Usage: convert-h5-to-ggml.py dir-model ftype\n") print("Usage: convert-h5-to-ggml.py dir-model ftype\n")
@ -61,8 +70,8 @@ if hparams["architectures"][0] != "LlamaForCausalLM":
print("Model architecture not supported: " + hparams["architectures"][0]) print("Model architecture not supported: " + hparams["architectures"][0])
sys.exit() sys.exit()
model = AutoModelForCausalLM.from_pretrained(dir_model, low_cpu_mem_usage=True, trust_remote_code=True) # get number of model parts
list_vars = model.state_dict() num_parts = count_model_parts(dir_model)
gguf_writer = gguf.GGUFWriter.open(fname_out) gguf_writer = gguf.GGUFWriter.open(fname_out)
@ -170,41 +179,61 @@ tensor_map = tmap.get_tensor_map(block_count)
# tensor info # tensor info
print("gguf: get tensor metadata") print("gguf: get tensor metadata")
for name in list_vars.keys(): if num_parts == 0:
data = list_vars[name].squeeze().numpy() part_names = ("pytorch_model.bin",)
else:
part_names = (
f"pytorch_model-{n:05}-of-{num_parts:05}.bin" for n in range(1, num_parts + 1)
)
# we don't need these for part_name in part_names:
if name.endswith(".rotary_emb.inv_freq"): print("gguf: loading model part '"+ part_name + "'")
continue model_part = torch.load(f"{dir_model}/{part_name}", map_location="cpu")
# permute these for name in model_part.keys():
if name.endswith(".q_proj.weight") or name.endswith(".k_proj.weight"): data = model_part[name]
data = permute(data, head_count)
# map tensor names # we don't need these
if name.endswith(".weight") and name[:-7] in tensor_map: if name.endswith(".rotary_emb.inv_freq"):
name = tensor_map[name[:-7]] + ".weight" continue
elif name.endswith(".bias") and name[:-5] in tensor_map:
name = tensor_map[name[:-5]] + ".bias"
else:
print("Can not map tensor '" + name + "'")
sys.exit()
n_dims = len(data.shape)
data_dtype = data.dtype
# print( name + " dims " + str(n_dims) + " dtype " + str(data.dtype) )
if data.dtype != np.float16 and data.dtype != np.float32:
# convert any unsupported data types to float32 # convert any unsupported data types to float32
data_dtype = np.float32 if data.dtype != torch.float16 and data.dtype != torch.float32:
elif ftype == 1 and data.dtype == np.float32 and name.endswith(".weight") and n_dims == 2: data = data.to(torch.float32)
data = data.squeeze().numpy()
# permute these
if name.endswith(".q_proj.weight") or name.endswith(".k_proj.weight"):
data = permute(data,head_count)
# map tensor names
if name.endswith(".weight") and name[:-7] in tensor_map:
name = tensor_map[name[:-7]] + ".weight"
elif name.endswith(".bias") and name[:-5] in tensor_map:
name = tensor_map[name[:-5]] + ".bias"
else:
print( "Can not map tensor '" + name + "'" )
sys.exit()
n_dims = len(data.shape)
data_dtype = data.dtype
# if f32 desired, convert any float16 to float32
if ftype == 0 and data.dtype == np.float16:
data_dtype = np.float32
# TODO: Why cant we use these float16 as-is? There should be not reason to store float16 as float32
if ftype == 1 and data_dtype == np.float16 and n_dims == 1:
data_dtype = np.float32
# if f16 desired, convert any float32 2-dim weight tensors to float16 # if f16 desired, convert any float32 2-dim weight tensors to float16
data_dtype = np.float16 if ftype == 1 and data.dtype == np.float32 and name.endswith(".weight") and n_dims == 2:
data_dtype = np.float16
data_nbytes = data.size * 2 if data_dtype == np.float16 else data.size * 4 data_nbytes = data.size * 2 if data_dtype == np.float16 else data.size * 4
gguf_writer.add_tensor_info(name, data.shape, data_dtype, data_nbytes) gguf_writer.add_tensor_info(name, data.shape, data_dtype, data_nbytes)
print("gguf: write header") print("gguf: write header")
@ -217,28 +246,68 @@ gguf_writer.write_ti_data_to_file()
# tensor data # tensor data
print("gguf: convert and write tensor data") print("gguf: convert and write tensor data")
for name in list_vars.keys(): if num_parts == 0:
data = list_vars[name].squeeze().numpy() part_names = ("pytorch_model.bin",)
else:
part_names = (
f"pytorch_model-{n:05}-of-{num_parts:05}.bin" for n in range(1, num_parts + 1)
)
# we don't need these for part_name in part_names:
if name.endswith(".rotary_emb.inv_freq"): print("gguf: loading model part '"+ part_name + "'")
continue model_part = torch.load(f"{dir_model}/{part_name}", map_location="cpu")
# permute these for name in model_part.keys():
if name.endswith(".q_proj.weight") or name.endswith(".k_proj.weight"): data = model_part[name]
data = permute(data, head_count)
<<<<<<< HEAD
n_dims = len(data.shape) n_dims = len(data.shape)
data_dtype = data.dtype data_dtype = data.dtype
=======
old_dtype = data.dtype
# we don't need these
if name.endswith(".rotary_emb.inv_freq"):
continue
>>>>>>> 17800cd80fec468411481dc34a51d42a936442f1
if data_dtype != np.float16 and data_dtype != np.float32:
# convert any unsupported data types to float32 # convert any unsupported data types to float32
data = data.astype(np.float32) if data.dtype != torch.float16 and data.dtype != torch.float32:
elif ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2: data = data.to(torch.float32)
# if f16 desired, convert any float32 2-dim weight tensors to float16
data = data.astype(np.float16)
gguf_writer.write_tensor_to_file(data) data = data.squeeze().numpy()
# permute these
if name.endswith(".q_proj.weight") or name.endswith(".k_proj.weight"):
data = permute(data, head_count)
# map tensor names
if name.endswith(".weight") and name[:-7] in tensor_map:
name = tensor_map[name[:-7]] + ".weight"
elif name.endswith(".bias") and name[:-5] in tensor_map:
name = tensor_map[name[:-5]] + ".bias"
else:
print( "Can not map tensor '" + name + "'" )
sys.exit()
n_dims = len(data.shape)
data_dtype = data.dtype
# if f32 desired, convert any float16 to float32
if ftype == 0 and data.dtype == np.float16:
data = data.astype(np.float32)
# TODO: Why cant we use these float16 as-is? There should be not reason to store float16 as float32
if ftype == 1 and data_dtype == np.float16 and n_dims == 1:
data = data.astype(np.float32)
# if f16 desired, convert any float32 2-dim weight tensors to float16
if ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2:
data = data.astype(np.float16)
print( name + ", shape " + str(len(data.shape)) + ", " + str(old_dtype) + " --> " + str(data.dtype))
gguf_writer.write_tensor_to_file(data)
gguf_writer.close() gguf_writer.close()