mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-07 11:23:56 +01:00
gguf : quantization is working
This commit is contained in:
parent
1fc3d30b71
commit
202eab04d3
@ -421,7 +421,7 @@ int main(int argc, char ** argv) {
|
|||||||
GGML_ASSERT(gguf_ex_write(fname) && "failed to write gguf file");
|
GGML_ASSERT(gguf_ex_write(fname) && "failed to write gguf file");
|
||||||
} else if (mode == "r") {
|
} else if (mode == "r") {
|
||||||
GGML_ASSERT(gguf_ex_read_0(fname) && "failed to read gguf file");
|
GGML_ASSERT(gguf_ex_read_0(fname) && "failed to read gguf file");
|
||||||
//GGML_ASSERT(gguf_ex_read_1(fname) && "failed to read gguf file");
|
GGML_ASSERT(gguf_ex_read_1(fname) && "failed to read gguf file");
|
||||||
GGML_ASSERT(gguf_ex_read_2(fname) && "failed to read gguf file");
|
GGML_ASSERT(gguf_ex_read_2(fname) && "failed to read gguf file");
|
||||||
} else if (mode == "q") {
|
} else if (mode == "q") {
|
||||||
llama_model_quantize_params params = llama_model_quantize_default_params();
|
llama_model_quantize_params params = llama_model_quantize_default_params();
|
||||||
|
@ -752,7 +752,6 @@ struct gguf_file_saver {
|
|||||||
file.seek(info_offset, SEEK_SET);
|
file.seek(info_offset, SEEK_SET);
|
||||||
GGML_ASSERT(info_offset == file.tell());
|
GGML_ASSERT(info_offset == file.tell());
|
||||||
total_written += file.write_str(tensor.name);
|
total_written += file.write_str(tensor.name);
|
||||||
printf("total_written = %zu, name = %s\n", total_written, tensor.name.c_str());
|
|
||||||
|
|
||||||
int32_t n_dims = tensor.ne.size();
|
int32_t n_dims = tensor.ne.size();
|
||||||
total_written += file.write_i32(n_dims);
|
total_written += file.write_i32(n_dims);
|
||||||
@ -765,8 +764,7 @@ printf("total_written = %zu, name = %s\n", total_written, tensor.name.c_str());
|
|||||||
info_offset += total_written;
|
info_offset += total_written;
|
||||||
|
|
||||||
file.seek(0, SEEK_END);
|
file.seek(0, SEEK_END);
|
||||||
printf("total_written = %zu\n", total_written);
|
|
||||||
|
|
||||||
return total_written;
|
return total_written;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -936,8 +934,7 @@ struct llama_model_loader {
|
|||||||
} else {
|
} else {
|
||||||
gguf_file & file = file_loader->file;
|
gguf_file & file = file_loader->file;
|
||||||
file.seek(lt.file_off, SEEK_SET);
|
file.seek(lt.file_off, SEEK_SET);
|
||||||
// TODO
|
file.read_raw(lt.data, lt.size);
|
||||||
//file.read_raw(lt.data, lt.size);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (0) {
|
if (0) {
|
||||||
|
@ -131,6 +131,10 @@ struct gguf_file {
|
|||||||
fwrite(data, size, 1, fp);
|
fwrite(data, size, 1, fp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void read_raw(void * data, size_t size) {
|
||||||
|
fread(data, size, 1, fp);
|
||||||
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
void write_val(const std::string & key, enum gguf_type type, const T & val) {
|
void write_val(const std::string & key, enum gguf_type type, const T & val) {
|
||||||
write_str(key);
|
write_str(key);
|
||||||
|
Loading…
Reference in New Issue
Block a user