mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-14 06:19:02 +01:00
convert_hf : faster lazy safetensors
This commit is contained in:
parent
aaab2419ea
commit
7cda4dd7e9
@ -148,7 +148,14 @@ class Model:
|
|||||||
tensor_names_from_parts.update(model_part.keys())
|
tensor_names_from_parts.update(model_part.keys())
|
||||||
|
|
||||||
for name in model_part.keys():
|
for name in model_part.keys():
|
||||||
data = model_part.get_tensor(name) if self.is_safetensors else model_part[name]
|
if self.is_safetensors:
|
||||||
|
if self.lazy:
|
||||||
|
data = model_part.get_slice(name)
|
||||||
|
data = LazyTorchTensor.from_safetensors_slice(data)
|
||||||
|
else:
|
||||||
|
data = model_part.get_tensor(name)
|
||||||
|
else:
|
||||||
|
data = model_part[name]
|
||||||
if self.lazy:
|
if self.lazy:
|
||||||
data = LazyTorchTensor.from_eager(data)
|
data = LazyTorchTensor.from_eager(data)
|
||||||
yield name, data
|
yield name, data
|
||||||
@ -3435,6 +3442,27 @@ class LazyTorchTensor(gguf.LazyBase):
|
|||||||
torch.float32: np.float32,
|
torch.float32: np.float32,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# used for safetensors slices
|
||||||
|
# ref: https://github.com/huggingface/safetensors/blob/079781fd0dc455ba0fe851e2b4507c33d0c0d407/bindings/python/src/lib.rs#L1046
|
||||||
|
# TODO: uncomment U64, U32, and U16, ref: https://github.com/pytorch/pytorch/issues/58734
|
||||||
|
_dtype_str_map: dict[str, torch.dtype] = {
|
||||||
|
"F64": torch.float64,
|
||||||
|
"F32": torch.float32,
|
||||||
|
"BF16": torch.bfloat16,
|
||||||
|
"F16": torch.float16,
|
||||||
|
# "U64": torch.uint64,
|
||||||
|
"I64": torch.int64,
|
||||||
|
# "U32": torch.uint32,
|
||||||
|
"I32": torch.int32,
|
||||||
|
# "U16": torch.uint16,
|
||||||
|
"I16": torch.int16,
|
||||||
|
"U8": torch.uint8,
|
||||||
|
"I8": torch.int8,
|
||||||
|
"BOOL": torch.bool,
|
||||||
|
"F8_E4M3": torch.float8_e4m3fn,
|
||||||
|
"F8_E5M2": torch.float8_e5m2,
|
||||||
|
}
|
||||||
|
|
||||||
def numpy(self) -> gguf.LazyNumpyTensor:
|
def numpy(self) -> gguf.LazyNumpyTensor:
|
||||||
dtype = self._dtype_map[self.dtype]
|
dtype = self._dtype_map[self.dtype]
|
||||||
return gguf.LazyNumpyTensor(
|
return gguf.LazyNumpyTensor(
|
||||||
@ -3448,6 +3476,13 @@ class LazyTorchTensor(gguf.LazyBase):
|
|||||||
def meta_with_dtype_and_shape(cls, dtype: torch.dtype, shape: torch.Size) -> Tensor:
|
def meta_with_dtype_and_shape(cls, dtype: torch.dtype, shape: torch.Size) -> Tensor:
|
||||||
return torch.empty(size=shape, dtype=dtype, device="meta")
|
return torch.empty(size=shape, dtype=dtype, device="meta")
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_safetensors_slice(cls, st_slice: Any) -> Tensor:
|
||||||
|
dtype = cls._dtype_str_map[st_slice.get_dtype()]
|
||||||
|
shape = st_slice.get_shape()
|
||||||
|
lazy = cls(meta=cls.meta_with_dtype_and_shape(dtype, shape), args=(st_slice,), func=lambda s: s[0][:])
|
||||||
|
return cast(torch.Tensor, lazy)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def __torch_function__(cls, func, types, args=(), kwargs=None):
|
def __torch_function__(cls, func, types, args=(), kwargs=None):
|
||||||
del types # unused
|
del types # unused
|
||||||
|
@ -602,13 +602,11 @@ class TensorNameMap:
|
|||||||
for tensor, keys in self.block_mappings_cfg.items():
|
for tensor, keys in self.block_mappings_cfg.items():
|
||||||
if tensor not in MODEL_TENSORS[arch]:
|
if tensor not in MODEL_TENSORS[arch]:
|
||||||
continue
|
continue
|
||||||
# TODO: make this configurable
|
|
||||||
n_experts = 160
|
tensor_name = TENSOR_NAMES[tensor].format(bid = bid)
|
||||||
for xid in range(n_experts):
|
|
||||||
tensor_name = TENSOR_NAMES[tensor].format(bid = bid, xid = xid)
|
|
||||||
self.mapping[tensor_name] = (tensor, tensor_name)
|
self.mapping[tensor_name] = (tensor, tensor_name)
|
||||||
for key in keys:
|
for key in keys:
|
||||||
key = key.format(bid = bid, xid = xid)
|
key = key.format(bid = bid)
|
||||||
self.mapping[key] = (tensor, tensor_name)
|
self.mapping[key] = (tensor, tensor_name)
|
||||||
|
|
||||||
def get_type_and_name(self, key: str, try_suffixes: Sequence[str] = ()) -> tuple[MODEL_TENSOR, str] | None:
|
def get_type_and_name(self, key: str, try_suffixes: Sequence[str] = ()) -> tuple[MODEL_TENSOR, str] | None:
|
||||||
|
Loading…
Reference in New Issue
Block a user