mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-07 11:23:56 +01:00
gguf : start implementing quantization (WIP)
This commit is contained in:
parent
c4f02b4f74
commit
b2571af255
2
Makefile
2
Makefile
@ -393,7 +393,7 @@ $(LIB_PRE)embdinput$(DSO_EXT): examples/embd-input/embd-input.h examples/embd-in
|
|||||||
embd-input-test: $(LIB_PRE)embdinput$(DSO_EXT) examples/embd-input/embd-input-test.cpp build-info.h ggml.o llama.o common.o $(OBJS)
|
embd-input-test: $(LIB_PRE)embdinput$(DSO_EXT) examples/embd-input/embd-input-test.cpp build-info.h ggml.o llama.o common.o $(OBJS)
|
||||||
$(CXX) $(CXXFLAGS) $(filter-out %$(DSO_EXT),$(filter-out %.h,$(filter-out %.hpp,$^))) -o $@ $(LDFLAGS) -L. -lembdinput
|
$(CXX) $(CXXFLAGS) $(filter-out %$(DSO_EXT),$(filter-out %.h,$(filter-out %.hpp,$^))) -o $@ $(LDFLAGS) -L. -lembdinput
|
||||||
|
|
||||||
gguf: examples/gguf/gguf.cpp build-info.h ggml.o $(OBJS)
|
gguf: examples/gguf/gguf.cpp build-info.h ggml.o gguf-llama.o $(OBJS)
|
||||||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||||
|
|
||||||
gguf-llama-simple: examples/gguf/gguf-llama-simple.cpp build-info.h ggml.o gguf-llama.o common.o $(OBJS)
|
gguf-llama-simple: examples/gguf/gguf-llama-simple.cpp build-info.h ggml.o gguf-llama.o common.o $(OBJS)
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
#include "ggml.h"
|
#include "ggml.h"
|
||||||
#include "gguf-util.h"
|
#include "gguf-util.h"
|
||||||
|
#include "gguf-llama.h"
|
||||||
|
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#include <cinttypes>
|
#include <cinttypes>
|
||||||
@ -7,14 +8,14 @@
|
|||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
/*
|
||||||
template<typename T>
|
template<typename T>
|
||||||
static std::string to_string(const T & val) {
|
static std::string to_string(const T & val) {
|
||||||
std::stringstream ss;
|
std::stringstream ss;
|
||||||
ss << val;
|
ss << val;
|
||||||
return ss.str();
|
return ss.str();
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
void gguf_ex_write_str(std::ofstream & fout, const std::string & val) {
|
void gguf_ex_write_str(std::ofstream & fout, const std::string & val) {
|
||||||
const int32_t n = val.size();
|
const int32_t n = val.size();
|
||||||
fout.write((const char *) &n, sizeof(n));
|
fout.write((const char *) &n, sizeof(n));
|
||||||
@ -414,7 +415,7 @@ int main(int argc, char ** argv) {
|
|||||||
const std::string fname(argv[1]);
|
const std::string fname(argv[1]);
|
||||||
const std::string mode (argv[2]);
|
const std::string mode (argv[2]);
|
||||||
|
|
||||||
GGML_ASSERT((mode == "r" || mode == "w") && "mode must be r or w");
|
GGML_ASSERT((mode == "r" || mode == "w" || mode == "q") && "mode must be r, w or q");
|
||||||
|
|
||||||
if (mode == "w") {
|
if (mode == "w") {
|
||||||
GGML_ASSERT(gguf_ex_write(fname) && "failed to write gguf file");
|
GGML_ASSERT(gguf_ex_write(fname) && "failed to write gguf file");
|
||||||
@ -422,6 +423,9 @@ int main(int argc, char ** argv) {
|
|||||||
GGML_ASSERT(gguf_ex_read_0(fname) && "failed to read gguf file");
|
GGML_ASSERT(gguf_ex_read_0(fname) && "failed to read gguf file");
|
||||||
GGML_ASSERT(gguf_ex_read_1(fname) && "failed to read gguf file");
|
GGML_ASSERT(gguf_ex_read_1(fname) && "failed to read gguf file");
|
||||||
GGML_ASSERT(gguf_ex_read_2(fname) && "failed to read gguf file");
|
GGML_ASSERT(gguf_ex_read_2(fname) && "failed to read gguf file");
|
||||||
|
} else if (mode == "q") {
|
||||||
|
llama_model_quantize_params params = llama_model_quantize_default_params();
|
||||||
|
llama_model_quantize(fname.c_str(), "quant.gguf", ¶ms);
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -738,15 +738,19 @@ struct gguf_file_saver {
|
|||||||
info_offset = file.tell();
|
info_offset = file.tell();
|
||||||
size_t count = gguf_get_data_offset(fl->gguf_ctx) - info_offset;
|
size_t count = gguf_get_data_offset(fl->gguf_ctx) - info_offset;
|
||||||
file.write_zeros(count);
|
file.write_zeros(count);
|
||||||
|
printf("info_offset = %zu\n", info_offset);
|
||||||
|
file.seek(info_offset, SEEK_SET);
|
||||||
|
GGML_ASSERT(info_offset == file.tell());
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t write_tensor_info(llama_load_tensor & tensor) {
|
size_t write_tensor_info(llama_load_tensor & tensor) {
|
||||||
size_t total_written = 0;
|
size_t total_written = 0;
|
||||||
file.seek(0, info_offset);
|
file.seek(info_offset, SEEK_SET);
|
||||||
|
GGML_ASSERT(info_offset == file.tell());
|
||||||
total_written += file.write_str(tensor.name);
|
total_written += file.write_str(tensor.name);
|
||||||
|
|
||||||
int32_t n_dims = tensor.ne.size();
|
int32_t n_dims = tensor.ne.size();
|
||||||
file.write_i32(n_dims);
|
total_written += file.write_i32(n_dims);
|
||||||
for (int32_t i = 0; i < n_dims; ++i) {
|
for (int32_t i = 0; i < n_dims; ++i) {
|
||||||
total_written += file.write_i32(i);
|
total_written += file.write_i32(i);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user