mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-26 14:20:31 +01:00
cont
ggml-ci
This commit is contained in:
parent
d8f2da6b9f
commit
c172b322c2
@ -287,7 +287,7 @@ function gg_run_open_llama_7b_v2 {
|
|||||||
(time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} -DGGML_CUDA=1 .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
|
(time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} -DGGML_CUDA=1 .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
|
||||||
(time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log
|
(time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log
|
||||||
|
|
||||||
python3 ../examples/convert-legacy-llama.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf
|
python3 ../examples/convert_legacy_llama.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf
|
||||||
|
|
||||||
model_f16="${path_models}/ggml-model-f16.gguf"
|
model_f16="${path_models}/ggml-model-f16.gguf"
|
||||||
model_q8_0="${path_models}/ggml-model-q8_0.gguf"
|
model_q8_0="${path_models}/ggml-model-q8_0.gguf"
|
||||||
|
@ -1161,7 +1161,7 @@ class FalconModel(Model):
|
|||||||
# So we rearrange them here,, so that we have n_head query weights
|
# So we rearrange them here,, so that we have n_head query weights
|
||||||
# followed by n_head_kv key weights followed by n_head_kv value weights,
|
# followed by n_head_kv key weights followed by n_head_kv value weights,
|
||||||
# in contiguous fashion.
|
# in contiguous fashion.
|
||||||
# ref: https://github.com/jploski/ggml/blob/falcon40b/examples/falcon/convert_hf_to_gguf.py
|
# ref: https://github.com/jploski/ggml/blob/falcon40b/examples/falcon/convert-hf-to-gguf.py
|
||||||
|
|
||||||
if "query_key_value" in name:
|
if "query_key_value" in name:
|
||||||
n_head = self.find_hparam(["num_attention_heads", "n_head"])
|
n_head = self.find_hparam(["num_attention_heads", "n_head"])
|
||||||
|
@ -15,7 +15,7 @@
|
|||||||
# - Add a new model to the "models" list
|
# - Add a new model to the "models" list
|
||||||
# - Run the script with your huggingface token:
|
# - Run the script with your huggingface token:
|
||||||
#
|
#
|
||||||
# python3 convert_hf_to_gguf-update.py <huggingface_token>
|
# python3 convert_hf_to_gguf_update.py <huggingface_token>
|
||||||
#
|
#
|
||||||
# - Copy-paste the generated get_vocab_base_pre() function into convert_hf_to_gguf.py
|
# - Copy-paste the generated get_vocab_base_pre() function into convert_hf_to_gguf.py
|
||||||
# - Update llama.cpp with the new pre-tokenizer if necessary
|
# - Update llama.cpp with the new pre-tokenizer if necessary
|
||||||
@ -37,7 +37,7 @@ from enum import IntEnum, auto
|
|||||||
from transformers import AutoTokenizer
|
from transformers import AutoTokenizer
|
||||||
|
|
||||||
logging.basicConfig(level=logging.DEBUG)
|
logging.basicConfig(level=logging.DEBUG)
|
||||||
logger = logging.getLogger("convert_hf_to_gguf-update")
|
logger = logging.getLogger("convert_hf_to_gguf_update")
|
||||||
sess = requests.Session()
|
sess = requests.Session()
|
||||||
|
|
||||||
|
|
||||||
@ -56,10 +56,10 @@ if len(sys.argv) == 2:
|
|||||||
token = sys.argv[1]
|
token = sys.argv[1]
|
||||||
if not token.startswith("hf_"):
|
if not token.startswith("hf_"):
|
||||||
logger.info("Huggingface token seems invalid")
|
logger.info("Huggingface token seems invalid")
|
||||||
logger.info("Usage: python convert_hf_to_gguf-update.py <huggingface_token>")
|
logger.info("Usage: python convert_hf_to_gguf_update.py <huggingface_token>")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
else:
|
else:
|
||||||
logger.info("Usage: python convert_hf_to_gguf-update.py <huggingface_token>")
|
logger.info("Usage: python convert_hf_to_gguf_update.py <huggingface_token>")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
# TODO: add models here, base models preferred
|
# TODO: add models here, base models preferred
|
||||||
@ -201,7 +201,7 @@ src_func = f"""
|
|||||||
|
|
||||||
res = None
|
res = None
|
||||||
|
|
||||||
# NOTE: if you get an error here, you need to update the convert_hf_to_gguf-update.py script
|
# NOTE: if you get an error here, you need to update the convert_hf_to_gguf_update.py script
|
||||||
# or pull the latest version of the model from Huggingface
|
# or pull the latest version of the model from Huggingface
|
||||||
# don't edit the hashes manually!
|
# don't edit the hashes manually!
|
||||||
{src_ifs}
|
{src_ifs}
|
||||||
|
@ -17,7 +17,7 @@ Also, it is important to check that the examples and main ggml backends (CUDA, M
|
|||||||
### 1. Convert the model to GGUF
|
### 1. Convert the model to GGUF
|
||||||
|
|
||||||
This step is done in python with a `convert` script using the [gguf](https://pypi.org/project/gguf/) library.
|
This step is done in python with a `convert` script using the [gguf](https://pypi.org/project/gguf/) library.
|
||||||
Depending on the model architecture, you can use either [convert_hf_to_gguf.py](../convert_hf_to_gguf.py) or [examples/convert-legacy-llama.py](../examples/convert-legacy-llama.py) (for `llama/llama2` models in `.pth` format).
|
Depending on the model architecture, you can use either [convert_hf_to_gguf.py](../convert_hf_to_gguf.py) or [examples/convert_legacy_llama.py](../examples/convert_legacy_llama.py) (for `llama/llama2` models in `.pth` format).
|
||||||
|
|
||||||
The convert script reads the model configuration, tokenizer, tensor names+data and converts them to GGUF metadata and tensors.
|
The convert script reads the model configuration, tokenizer, tensor names+data and converts them to GGUF metadata and tensors.
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user