mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-26 12:21:40 +01:00
build : enable more non-default compiler warnings (#3200)
This commit is contained in:
parent
0ccfc62a96
commit
bc39553c90
1
.gitignore
vendored
1
.gitignore
vendored
@ -45,6 +45,7 @@ models-mnt
|
||||
/main
|
||||
/metal
|
||||
/perplexity
|
||||
/q8dot
|
||||
/quantize
|
||||
/quantize-stats
|
||||
/result
|
||||
|
@ -414,37 +414,38 @@ endif()
|
||||
|
||||
if (LLAMA_ALL_WARNINGS)
|
||||
if (NOT MSVC)
|
||||
set(c_flags
|
||||
-Wall
|
||||
-Wextra
|
||||
-Wpedantic
|
||||
-Wcast-qual
|
||||
-Wdouble-promotion
|
||||
-Wshadow
|
||||
-Wstrict-prototypes
|
||||
-Wpointer-arith
|
||||
-Wmissing-prototypes
|
||||
-Werror=implicit-int
|
||||
-Wno-unused-function
|
||||
)
|
||||
set(cxx_flags
|
||||
-Wall
|
||||
-Wextra
|
||||
-Wpedantic
|
||||
-Wcast-qual
|
||||
-Wmissing-declarations
|
||||
-Wno-unused-function
|
||||
-Wno-multichar
|
||||
)
|
||||
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
|
||||
# g++ only
|
||||
set(cxx_flags ${cxx_flags} -Wno-format-truncation -Wno-array-bounds)
|
||||
set(warning_flags -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function)
|
||||
set(c_flags -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int
|
||||
-Werror=implicit-function-declaration)
|
||||
set(cxx_flags -Wmissing-declarations -Wmissing-noreturn)
|
||||
|
||||
if (CMAKE_C_COMPILER_ID MATCHES "Clang")
|
||||
set(warning_flags ${warning_flags} -Wunreachable-code-break -Wunreachable-code-return)
|
||||
set(cxx_flags ${cxx_flags} -Wmissing-prototypes -Wextra-semi)
|
||||
|
||||
if (
|
||||
(CMAKE_C_COMPILER_ID STREQUAL "Clang" AND CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL 3.8.0) OR
|
||||
(CMAKE_C_COMPILER_ID STREQUAL "AppleClang" AND CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL 7.3.0)
|
||||
)
|
||||
set(c_flags ${c_flags} -Wdouble-promotion)
|
||||
endif()
|
||||
elseif (CMAKE_C_COMPILER_ID STREQUAL "GNU")
|
||||
set(c_flags ${c_flags} -Wdouble-promotion)
|
||||
set(cxx_flags ${cxx_flags} -Wno-array-bounds)
|
||||
|
||||
if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 7.1.0)
|
||||
set(cxx_flags ${cxx_flags} -Wno-format-truncation)
|
||||
endif()
|
||||
if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 8.1.0)
|
||||
set(cxx_flags ${cxx_flags} -Wextra-semi)
|
||||
endif()
|
||||
endif()
|
||||
else()
|
||||
# todo : msvc
|
||||
endif()
|
||||
|
||||
add_compile_options(
|
||||
${warning_flags}
|
||||
"$<$<COMPILE_LANGUAGE:C>:${c_flags}>"
|
||||
"$<$<COMPILE_LANGUAGE:CXX>:${cxx_flags}>"
|
||||
)
|
||||
|
69
Makefile
69
Makefile
@ -1,5 +1,5 @@
|
||||
# Define the default target now so that it is always the first target
|
||||
BUILD_TARGETS = main quantize quantize-stats perplexity embedding vdot train-text-from-scratch convert-llama2c-to-ggml simple batched save-load-state server embd-input-test gguf llama-bench baby-llama beam-search speculative parallel finetune export-lora tests/test-c.o
|
||||
BUILD_TARGETS = main quantize quantize-stats perplexity embedding vdot q8dot train-text-from-scratch convert-llama2c-to-ggml simple batched save-load-state server embd-input-test gguf llama-bench baby-llama beam-search speculative benchmark-matmult parallel finetune export-lora tests/test-c.o
|
||||
|
||||
# Binaries only useful for tests
|
||||
TEST_TARGETS = tests/test-llama-grammar tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0-llama tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama
|
||||
@ -19,6 +19,20 @@ ifndef UNAME_M
|
||||
UNAME_M := $(shell uname -m)
|
||||
endif
|
||||
|
||||
ifeq '' '$(findstring clang,$(shell $(CC) --version))'
|
||||
CC_IS_GCC=1
|
||||
CC_VER := $(shell $(CC) -dumpfullversion -dumpversion | awk -F. '{ printf("%02d%02d%02d", $$1, $$2, $$3) }')
|
||||
else
|
||||
CC_IS_CLANG=1
|
||||
ifeq '' '$(findstring Apple LLVM,$(shell $(CC) --version))'
|
||||
CC_IS_LLVM_CLANG=1
|
||||
else
|
||||
CC_IS_APPLE_CLANG=1
|
||||
endif
|
||||
CC_VER := $(shell $(CC) --version | sed -n 's/^.* version \([0-9.]*\).*$$/\1/p' \
|
||||
| awk -F. '{ printf("%02d%02d%02d", $$1, $$2, $$3) }')
|
||||
endif
|
||||
|
||||
# Mac OS + Arm can report x86_64
|
||||
# ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789
|
||||
ifeq ($(UNAME_S),Darwin)
|
||||
@ -87,9 +101,6 @@ CC := riscv64-unknown-linux-gnu-gcc
|
||||
CXX := riscv64-unknown-linux-gnu-g++
|
||||
endif
|
||||
|
||||
CCV := $(shell $(CC) --version | head -n 1)
|
||||
CXXV := $(shell $(CXX) --version | head -n 1)
|
||||
|
||||
#
|
||||
# Compile flags
|
||||
#
|
||||
@ -173,20 +184,33 @@ ifdef LLAMA_DISABLE_LOGS
|
||||
endif # LLAMA_DISABLE_LOGS
|
||||
|
||||
# warnings
|
||||
MK_CFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wdouble-promotion -Wshadow -Wstrict-prototypes -Wpointer-arith \
|
||||
-Wmissing-prototypes -Werror=implicit-int -Wno-unused-function
|
||||
MK_CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wmissing-declarations -Wno-unused-function -Wno-multichar
|
||||
WARN_FLAGS = -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
|
||||
MK_CFLAGS += $(WARN_FLAGS) -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int \
|
||||
-Werror=implicit-function-declaration
|
||||
MK_CXXFLAGS += $(WARN_FLAGS) -Wmissing-declarations -Wmissing-noreturn
|
||||
|
||||
# TODO(cebtenzzre): remove this once PR #2632 gets merged
|
||||
TTFS_CXXFLAGS = $(CXXFLAGS) -Wno-missing-declarations
|
||||
ifeq ($(CC_IS_CLANG), 1)
|
||||
# clang options
|
||||
MK_CFLAGS += -Wunreachable-code-break -Wunreachable-code-return
|
||||
MK_HOST_CXXFLAGS += -Wunreachable-code-break -Wunreachable-code-return -Wmissing-prototypes -Wextra-semi
|
||||
|
||||
ifneq '' '$(findstring clang,$(shell $(CXX) --version))'
|
||||
# clang++ only
|
||||
MK_CXXFLAGS += -Wmissing-prototypes
|
||||
TTFS_CXXFLAGS += -Wno-missing-prototypes
|
||||
ifneq '' '$(and $(CC_IS_LLVM_CLANG),$(filter 1,$(shell expr $(CC_VER) \>= 030800)))'
|
||||
MK_CFLAGS += -Wdouble-promotion
|
||||
endif
|
||||
ifneq '' '$(and $(CC_IS_APPLE_CLANG),$(filter 1,$(shell expr $(CC_VER) \>= 070300)))'
|
||||
MK_CFLAGS += -Wdouble-promotion
|
||||
endif
|
||||
else
|
||||
# g++ only
|
||||
MK_CXXFLAGS += -Wno-format-truncation -Wno-array-bounds
|
||||
# gcc options
|
||||
MK_CFLAGS += -Wdouble-promotion
|
||||
MK_HOST_CXXFLAGS += -Wno-array-bounds
|
||||
|
||||
ifeq ($(shell expr $(CC_VER) \>= 070100), 1)
|
||||
MK_HOST_CXXFLAGS += -Wno-format-truncation
|
||||
endif
|
||||
ifeq ($(shell expr $(CC_VER) \>= 080100), 1)
|
||||
MK_HOST_CXXFLAGS += -Wextra-semi
|
||||
endif
|
||||
endif
|
||||
|
||||
# OS specific
|
||||
@ -382,7 +406,7 @@ ifdef LLAMA_CUDA_CCBIN
|
||||
NVCCFLAGS += -ccbin $(LLAMA_CUDA_CCBIN)
|
||||
endif
|
||||
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
|
||||
$(NVCC) $(NVCCFLAGS) -Wno-pedantic -c $< -o $@
|
||||
$(NVCC) $(NVCCFLAGS) -c $< -o $@
|
||||
endif # LLAMA_CUBLAS
|
||||
|
||||
ifdef LLAMA_CLBLAST
|
||||
@ -472,8 +496,8 @@ $(info I CFLAGS: $(CFLAGS))
|
||||
$(info I CXXFLAGS: $(CXXFLAGS))
|
||||
$(info I NVCCFLAGS: $(NVCCFLAGS))
|
||||
$(info I LDFLAGS: $(LDFLAGS))
|
||||
$(info I CC: $(CCV))
|
||||
$(info I CXX: $(CXXV))
|
||||
$(info I CC: $(shell $(CC) --version | head -n 1))
|
||||
$(info I CXX: $(shell $(CXX) --version | head -n 1))
|
||||
$(info )
|
||||
|
||||
#
|
||||
@ -554,7 +578,7 @@ gguf: examples/gguf/gguf.cpp ggml.o llama.o $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||
|
||||
train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp ggml.o llama.o common.o train.o $(OBJS)
|
||||
$(CXX) $(TTFS_CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||
|
||||
convert-llama2c-to-ggml: examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp ggml.o llama.o $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||
@ -601,11 +625,18 @@ tests: $(TEST_TARGETS)
|
||||
|
||||
benchmark-matmult: examples/benchmark/benchmark-matmult.cpp build-info.h ggml.o $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||
|
||||
run-benchmark-matmult: benchmark-matmult
|
||||
./$@
|
||||
|
||||
.PHONY: run-benchmark-matmult
|
||||
|
||||
vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
|
||||
|
||||
q8dot: pocs/vdot/q8dot.cpp ggml.o $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
|
||||
|
||||
tests/test-llama-grammar: tests/test-llama-grammar.cpp build-info.h ggml.o common.o grammar-parser.o $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||
|
||||
|
@ -755,10 +755,9 @@ std::string gpt_random_prompt(std::mt19937 & rng) {
|
||||
case 7: return "He";
|
||||
case 8: return "She";
|
||||
case 9: return "They";
|
||||
default: return "To";
|
||||
}
|
||||
|
||||
return "The";
|
||||
GGML_UNREACHABLE();
|
||||
}
|
||||
|
||||
//
|
||||
|
74
common/log.h
74
common/log.h
@ -225,31 +225,31 @@ enum LogTriState
|
||||
// USE LOG() INSTEAD
|
||||
//
|
||||
#ifndef _MSC_VER
|
||||
#define LOG_IMPL(str, ...) \
|
||||
{ \
|
||||
#define LOG_IMPL(str, ...) \
|
||||
do { \
|
||||
if (LOG_TARGET != nullptr) \
|
||||
{ \
|
||||
fprintf(LOG_TARGET, LOG_TIMESTAMP_FMT LOG_FLF_FMT str "%s" LOG_TIMESTAMP_VAL LOG_FLF_VAL, __VA_ARGS__); \
|
||||
fflush(LOG_TARGET); \
|
||||
} \
|
||||
}
|
||||
} while (0)
|
||||
#else
|
||||
#define LOG_IMPL(str, ...) \
|
||||
{ \
|
||||
#define LOG_IMPL(str, ...) \
|
||||
do { \
|
||||
if (LOG_TARGET != nullptr) \
|
||||
{ \
|
||||
fprintf(LOG_TARGET, LOG_TIMESTAMP_FMT LOG_FLF_FMT str "%s" LOG_TIMESTAMP_VAL LOG_FLF_VAL "", ##__VA_ARGS__); \
|
||||
fflush(LOG_TARGET); \
|
||||
} \
|
||||
}
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
// INTERNAL, DO NOT USE
|
||||
// USE LOG_TEE() INSTEAD
|
||||
//
|
||||
#ifndef _MSC_VER
|
||||
#define LOG_TEE_IMPL(str, ...) \
|
||||
{ \
|
||||
#define LOG_TEE_IMPL(str, ...) \
|
||||
do { \
|
||||
if (LOG_TARGET != nullptr) \
|
||||
{ \
|
||||
fprintf(LOG_TARGET, LOG_TIMESTAMP_FMT LOG_FLF_FMT str "%s" LOG_TIMESTAMP_VAL LOG_FLF_VAL, __VA_ARGS__); \
|
||||
@ -260,10 +260,10 @@ enum LogTriState
|
||||
fprintf(LOG_TEE_TARGET, LOG_TEE_TIMESTAMP_FMT LOG_TEE_FLF_FMT str "%s" LOG_TEE_TIMESTAMP_VAL LOG_TEE_FLF_VAL, __VA_ARGS__); \
|
||||
fflush(LOG_TEE_TARGET); \
|
||||
} \
|
||||
}
|
||||
} while (0)
|
||||
#else
|
||||
#define LOG_TEE_IMPL(str, ...) \
|
||||
{ \
|
||||
#define LOG_TEE_IMPL(str, ...) \
|
||||
do { \
|
||||
if (LOG_TARGET != nullptr) \
|
||||
{ \
|
||||
fprintf(LOG_TARGET, LOG_TIMESTAMP_FMT LOG_FLF_FMT str "%s" LOG_TIMESTAMP_VAL LOG_FLF_VAL "", ##__VA_ARGS__); \
|
||||
@ -274,7 +274,7 @@ enum LogTriState
|
||||
fprintf(LOG_TEE_TARGET, LOG_TEE_TIMESTAMP_FMT LOG_TEE_FLF_FMT str "%s" LOG_TEE_TIMESTAMP_VAL LOG_TEE_FLF_VAL "", ##__VA_ARGS__); \
|
||||
fflush(LOG_TEE_TARGET); \
|
||||
} \
|
||||
}
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
// The '\0' as a last argument, is a trick to bypass the silly
|
||||
@ -435,41 +435,41 @@ inline FILE *log_handler() { return log_handler1_impl(); }
|
||||
inline void log_test()
|
||||
{
|
||||
log_disable();
|
||||
LOG("01 Hello World to nobody, because logs are disabled!\n")
|
||||
LOG("01 Hello World to nobody, because logs are disabled!\n");
|
||||
log_enable();
|
||||
LOG("02 Hello World to default output, which is \"%s\" ( Yaaay, arguments! )!\n", LOG_STRINGIZE(LOG_TARGET))
|
||||
LOG_TEE("03 Hello World to **both** default output and " LOG_TEE_TARGET_STRING "!\n")
|
||||
LOG("02 Hello World to default output, which is \"%s\" ( Yaaay, arguments! )!\n", LOG_STRINGIZE(LOG_TARGET));
|
||||
LOG_TEE("03 Hello World to **both** default output and " LOG_TEE_TARGET_STRING "!\n");
|
||||
log_set_target(stderr);
|
||||
LOG("04 Hello World to stderr!\n")
|
||||
LOG_TEE("05 Hello World TEE with double printing to stderr prevented!\n")
|
||||
LOG("04 Hello World to stderr!\n");
|
||||
LOG_TEE("05 Hello World TEE with double printing to stderr prevented!\n");
|
||||
log_set_target(LOG_DEFAULT_FILE_NAME);
|
||||
LOG("06 Hello World to default log file!\n")
|
||||
LOG("06 Hello World to default log file!\n");
|
||||
log_set_target(stdout);
|
||||
LOG("07 Hello World to stdout!\n")
|
||||
LOG("07 Hello World to stdout!\n");
|
||||
log_set_target(LOG_DEFAULT_FILE_NAME);
|
||||
LOG("08 Hello World to default log file again!\n")
|
||||
LOG("08 Hello World to default log file again!\n");
|
||||
log_disable();
|
||||
LOG("09 Hello World _1_ into the void!\n")
|
||||
LOG("09 Hello World _1_ into the void!\n");
|
||||
log_enable();
|
||||
LOG("10 Hello World back from the void ( you should not see _1_ in the log or the output )!\n")
|
||||
LOG("10 Hello World back from the void ( you should not see _1_ in the log or the output )!\n");
|
||||
log_disable();
|
||||
log_set_target("llama.anotherlog.log");
|
||||
LOG("11 Hello World _2_ to nobody, new target was selected but logs are still disabled!\n")
|
||||
LOG("11 Hello World _2_ to nobody, new target was selected but logs are still disabled!\n");
|
||||
log_enable();
|
||||
LOG("12 Hello World this time in a new file ( you should not see _2_ in the log or the output )?\n")
|
||||
LOG("12 Hello World this time in a new file ( you should not see _2_ in the log or the output )?\n");
|
||||
log_set_target("llama.yetanotherlog.log");
|
||||
LOG("13 Hello World this time in yet new file?\n")
|
||||
LOG("13 Hello World this time in yet new file?\n");
|
||||
log_set_target(log_filename_generator("llama_autonamed", "log"));
|
||||
LOG("14 Hello World in log with generated filename!\n")
|
||||
LOG("14 Hello World in log with generated filename!\n");
|
||||
#ifdef _MSC_VER
|
||||
LOG_TEE("15 Hello msvc TEE without arguments\n")
|
||||
LOG_TEE("16 Hello msvc TEE with (%d)(%s) arguments\n", 1, "test")
|
||||
LOG_TEELN("17 Hello msvc TEELN without arguments\n")
|
||||
LOG_TEELN("18 Hello msvc TEELN with (%d)(%s) arguments\n", 1, "test")
|
||||
LOG("19 Hello msvc LOG without arguments\n")
|
||||
LOG("20 Hello msvc LOG with (%d)(%s) arguments\n", 1, "test")
|
||||
LOGLN("21 Hello msvc LOGLN without arguments\n")
|
||||
LOGLN("22 Hello msvc LOGLN with (%d)(%s) arguments\n", 1, "test")
|
||||
LOG_TEE("15 Hello msvc TEE without arguments\n");
|
||||
LOG_TEE("16 Hello msvc TEE with (%d)(%s) arguments\n", 1, "test");
|
||||
LOG_TEELN("17 Hello msvc TEELN without arguments\n");
|
||||
LOG_TEELN("18 Hello msvc TEELN with (%d)(%s) arguments\n", 1, "test");
|
||||
LOG("19 Hello msvc LOG without arguments\n");
|
||||
LOG("20 Hello msvc LOG with (%d)(%s) arguments\n", 1, "test");
|
||||
LOGLN("21 Hello msvc LOGLN without arguments\n");
|
||||
LOGLN("22 Hello msvc LOGLN with (%d)(%s) arguments\n", 1, "test");
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -542,7 +542,7 @@ inline void log_dump_cmdline_impl(int argc, char **argv)
|
||||
buf << " " << argv[i];
|
||||
}
|
||||
}
|
||||
LOGLN("Cmd:%s", buf.str().c_str())
|
||||
LOGLN("Cmd:%s", buf.str().c_str());
|
||||
}
|
||||
|
||||
#define log_tostr(var) log_var_to_string_impl(var).c_str()
|
||||
@ -620,10 +620,10 @@ inline std::string log_var_to_string_impl(const std::vector<int> & var)
|
||||
#define LOGLN(...) // dummy stub
|
||||
|
||||
#undef LOG_TEE
|
||||
#define LOG_TEE(...) fprintf(stderr, __VA_ARGS__); // convert to normal fprintf
|
||||
#define LOG_TEE(...) fprintf(stderr, __VA_ARGS__) // convert to normal fprintf
|
||||
|
||||
#undef LOG_TEELN
|
||||
#define LOG_TEELN(...) fprintf(stderr, __VA_ARGS__); // convert to normal fprintf
|
||||
#define LOG_TEELN(...) fprintf(stderr, __VA_ARGS__) // convert to normal fprintf
|
||||
|
||||
#undef LOG_DISABLE
|
||||
#define LOG_DISABLE() // dummy stub
|
||||
|
@ -1,9 +1,12 @@
|
||||
#include "ggml.h"
|
||||
#include "train.h"
|
||||
|
||||
#include <vector>
|
||||
#include <cassert>
|
||||
#include <random>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <random>
|
||||
#include <vector>
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#pragma warning(disable: 4244 4267) // possible loss of data
|
||||
@ -64,7 +67,7 @@ static struct ggml_tensor * randomize_tensor(
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
};
|
||||
}
|
||||
|
||||
return tensor;
|
||||
}
|
||||
@ -389,7 +392,7 @@ static void randomize_model_lora(
|
||||
free_random_normal_distribution(rnd);
|
||||
}
|
||||
|
||||
static bool init_kv_cache(struct llama_kv_cache* cache, struct llama_model * model, int n_batch) {
|
||||
static void init_kv_cache(struct llama_kv_cache* cache, struct llama_model * model, int n_batch) {
|
||||
const auto & hparams = model->hparams;
|
||||
|
||||
const uint32_t n_ctx = hparams.n_ctx;
|
||||
@ -415,14 +418,12 @@ static bool init_kv_cache(struct llama_kv_cache* cache, struct llama_model * mod
|
||||
|
||||
if (!cache->ctx) {
|
||||
fprintf(stderr, "%s: failed to allocate memory for kv cache\n", __func__);
|
||||
return false;
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
cache->k = ggml_new_tensor_1d(cache->ctx, GGML_TYPE_F32, n_elements);
|
||||
cache->v = ggml_new_tensor_1d(cache->ctx, GGML_TYPE_F32, n_elements);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool init_kv_cache_lora(struct llama_kv_cache* cache, struct llama_model_lora * model, int n_batch) {
|
||||
|
@ -655,9 +655,9 @@ struct printer {
|
||||
virtual ~printer() {}
|
||||
|
||||
FILE * fout;
|
||||
virtual void print_header(const cmd_params & params) { (void) params; };
|
||||
virtual void print_header(const cmd_params & params) { (void) params; }
|
||||
virtual void print_test(const test & t) = 0;
|
||||
virtual void print_footer() { };
|
||||
virtual void print_footer() { }
|
||||
};
|
||||
|
||||
struct csv_printer : public printer {
|
||||
|
@ -852,7 +852,7 @@ int main(int argc, char ** argv) {
|
||||
llama_backend_free();
|
||||
|
||||
#ifndef LOG_DISABLE_LOGS
|
||||
LOG_TEE("Log end\n")
|
||||
LOG_TEE("Log end\n");
|
||||
#endif // LOG_DISABLE_LOGS
|
||||
|
||||
return 0;
|
||||
|
@ -72,6 +72,7 @@ static bool try_parse_ftype(const std::string & ftype_str_in, llama_ftype & ftyp
|
||||
// usage:
|
||||
// ./quantize [--allow-requantize] [--leave-output-tensor] models/llama/ggml-model.gguf [models/llama/ggml-model-quant.gguf] type [nthreads]
|
||||
//
|
||||
[[noreturn]]
|
||||
static void usage(const char * executable) {
|
||||
printf("usage: %s [--help] [--allow-requantize] [--leave-output-tensor] model-f32.gguf [model-quant.gguf] type [nthreads]\n\n", executable);
|
||||
printf(" --allow-requantize: Allows requantizing tensors that have already been quantized. Warning: This can severely reduce quality compared to quantizing from 16bit or 32bit\n");
|
||||
|
@ -483,7 +483,7 @@ static struct ggml_tensor * llama_build_train_graphs(
|
||||
}
|
||||
|
||||
#define GGUF_GET_KEY(ctx, dst, func, type, req, key) \
|
||||
{ \
|
||||
do { \
|
||||
const std::string skey(key); \
|
||||
const int kid = gguf_find_key(ctx, skey.c_str()); \
|
||||
if (kid >= 0) { \
|
||||
@ -495,7 +495,7 @@ static struct ggml_tensor * llama_build_train_graphs(
|
||||
} else if (req) { \
|
||||
die_fmt("key not found in model: %s", skey.c_str()); \
|
||||
} \
|
||||
}
|
||||
} while (0)
|
||||
|
||||
static void load_llama_model_gguf(struct gguf_context * fctx, struct ggml_context * f_ggml_ctx, struct my_llama_model * model) {
|
||||
// NOTE: gguf_context must be initialized with f_ggml_ctx and no_alloc=false, otherwise tensor data can not be read
|
||||
@ -786,7 +786,7 @@ struct train_params {
|
||||
float rope_freq_scale;
|
||||
};
|
||||
|
||||
struct train_params get_default_train_params() {
|
||||
static struct train_params get_default_train_params() {
|
||||
struct train_params params;
|
||||
params.common = get_default_train_params_common();
|
||||
params.fn_vocab_model = "ggml-vic7b-uncensored-q4_0.bin";
|
||||
|
288
ggml.c
288
ggml.c
@ -245,18 +245,18 @@ inline static void * ggml_aligned_malloc(size_t size) {
|
||||
//
|
||||
|
||||
#define GGML_TENSOR_UNARY_OP_LOCALS \
|
||||
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne); \
|
||||
GGML_TENSOR_LOCALS(size_t, nb0, src0, nb); \
|
||||
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne); \
|
||||
GGML_TENSOR_LOCALS(size_t, nb, dst, nb);
|
||||
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
|
||||
GGML_TENSOR_LOCALS(size_t, nb0, src0, nb) \
|
||||
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \
|
||||
GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
|
||||
|
||||
#define GGML_TENSOR_BINARY_OP_LOCALS \
|
||||
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne); \
|
||||
GGML_TENSOR_LOCALS(size_t, nb0, src0, nb); \
|
||||
GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne); \
|
||||
GGML_TENSOR_LOCALS(size_t, nb1, src1, nb); \
|
||||
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne); \
|
||||
GGML_TENSOR_LOCALS(size_t, nb, dst, nb);
|
||||
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
|
||||
GGML_TENSOR_LOCALS(size_t, nb0, src0, nb) \
|
||||
GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne) \
|
||||
GGML_TENSOR_LOCALS(size_t, nb1, src1, nb) \
|
||||
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \
|
||||
GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
|
||||
|
||||
#if defined(GGML_USE_ACCELERATE)
|
||||
#include <Accelerate/Accelerate.h>
|
||||
@ -1866,7 +1866,7 @@ ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type) {
|
||||
#define GGML_F16x8_ADD vaddq_f16
|
||||
#define GGML_F16x8_MUL vmulq_f16
|
||||
#define GGML_F16x8_REDUCE(res, x) \
|
||||
{ \
|
||||
do { \
|
||||
int offset = GGML_F16_ARR >> 1; \
|
||||
for (int i = 0; i < offset; ++i) { \
|
||||
x[i] = vaddq_f16(x[i], x[offset+i]); \
|
||||
@ -1882,7 +1882,7 @@ ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type) {
|
||||
const float32x4_t t0 = vcvt_f32_f16(vget_low_f16 (x[0])); \
|
||||
const float32x4_t t1 = vcvt_f32_f16(vget_high_f16(x[0])); \
|
||||
res = (ggml_float) vaddvq_f32(vaddq_f32(t0, t1)); \
|
||||
}
|
||||
} while (0)
|
||||
|
||||
#define GGML_F16_VEC GGML_F16x8
|
||||
#define GGML_F16_VEC_ZERO GGML_F16x8_ZERO
|
||||
@ -1943,7 +1943,7 @@ ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type) {
|
||||
#define GGML_F32x8_ADD _mm256_add_ps
|
||||
#define GGML_F32x8_MUL _mm256_mul_ps
|
||||
#define GGML_F32x8_REDUCE(res, x) \
|
||||
{ \
|
||||
do { \
|
||||
int offset = GGML_F32_ARR >> 1; \
|
||||
for (int i = 0; i < offset; ++i) { \
|
||||
x[i] = _mm256_add_ps(x[i], x[offset+i]); \
|
||||
@ -1960,7 +1960,7 @@ ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type) {
|
||||
_mm256_extractf128_ps(x[0], 1)); \
|
||||
const __m128 t1 = _mm_hadd_ps(t0, t0); \
|
||||
res = _mm_cvtss_f32(_mm_hadd_ps(t1, t1)); \
|
||||
}
|
||||
} while (0)
|
||||
// TODO: is this optimal ?
|
||||
|
||||
#define GGML_F32_VEC GGML_F32x8
|
||||
@ -5154,31 +5154,31 @@ int32_t ggml_get_i32_1d(const struct ggml_tensor * tensor, int i) {
|
||||
{
|
||||
GGML_ASSERT(tensor->nb[0] == sizeof(int8_t));
|
||||
return ((int8_t *)(tensor->data))[i];
|
||||
} break;
|
||||
}
|
||||
case GGML_TYPE_I16:
|
||||
{
|
||||
GGML_ASSERT(tensor->nb[0] == sizeof(int16_t));
|
||||
return ((int16_t *)(tensor->data))[i];
|
||||
} break;
|
||||
}
|
||||
case GGML_TYPE_I32:
|
||||
{
|
||||
GGML_ASSERT(tensor->nb[0] == sizeof(int32_t));
|
||||
return ((int32_t *)(tensor->data))[i];
|
||||
} break;
|
||||
}
|
||||
case GGML_TYPE_F16:
|
||||
{
|
||||
GGML_ASSERT(tensor->nb[0] == sizeof(ggml_fp16_t));
|
||||
return GGML_FP16_TO_FP32(((ggml_fp16_t *)(tensor->data))[i]);
|
||||
} break;
|
||||
}
|
||||
case GGML_TYPE_F32:
|
||||
{
|
||||
GGML_ASSERT(tensor->nb[0] == sizeof(float));
|
||||
return ((float *)(tensor->data))[i];
|
||||
} break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
GGML_ASSERT(false);
|
||||
} break;
|
||||
}
|
||||
}
|
||||
|
||||
return 0.0f;
|
||||
@ -5228,29 +5228,17 @@ int32_t ggml_get_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i
|
||||
void * data = (char *) tensor->data + i0*tensor->nb[0] + i1*tensor->nb[1] + i2*tensor->nb[2] + i3*tensor->nb[3];
|
||||
switch (tensor->type) {
|
||||
case GGML_TYPE_I8:
|
||||
{
|
||||
return ((int8_t *) data)[0];
|
||||
} break;
|
||||
return ((int8_t *) data)[0];
|
||||
case GGML_TYPE_I16:
|
||||
{
|
||||
return ((int16_t *) data)[0];
|
||||
} break;
|
||||
return ((int16_t *) data)[0];
|
||||
case GGML_TYPE_I32:
|
||||
{
|
||||
return ((int32_t *) data)[0];
|
||||
} break;
|
||||
return ((int32_t *) data)[0];
|
||||
case GGML_TYPE_F16:
|
||||
{
|
||||
return GGML_FP16_TO_FP32(((ggml_fp16_t *) data)[0]);
|
||||
} break;
|
||||
return GGML_FP16_TO_FP32(((ggml_fp16_t *) data)[0]);
|
||||
case GGML_TYPE_F32:
|
||||
{
|
||||
return ((float *) data)[0];
|
||||
} break;
|
||||
return ((float *) data)[0];
|
||||
default:
|
||||
{
|
||||
GGML_ASSERT(false);
|
||||
} break;
|
||||
GGML_ASSERT(false);
|
||||
}
|
||||
|
||||
return 0.0f;
|
||||
@ -5297,31 +5285,31 @@ float ggml_get_f32_1d(const struct ggml_tensor * tensor, int i) {
|
||||
{
|
||||
GGML_ASSERT(tensor->nb[0] == sizeof(int8_t));
|
||||
return ((int8_t *)(tensor->data))[i];
|
||||
} break;
|
||||
}
|
||||
case GGML_TYPE_I16:
|
||||
{
|
||||
GGML_ASSERT(tensor->nb[0] == sizeof(int16_t));
|
||||
return ((int16_t *)(tensor->data))[i];
|
||||
} break;
|
||||
}
|
||||
case GGML_TYPE_I32:
|
||||
{
|
||||
GGML_ASSERT(tensor->nb[0] == sizeof(int32_t));
|
||||
return ((int32_t *)(tensor->data))[i];
|
||||
} break;
|
||||
}
|
||||
case GGML_TYPE_F16:
|
||||
{
|
||||
GGML_ASSERT(tensor->nb[0] == sizeof(ggml_fp16_t));
|
||||
return GGML_FP16_TO_FP32(((ggml_fp16_t *)(tensor->data))[i]);
|
||||
} break;
|
||||
}
|
||||
case GGML_TYPE_F32:
|
||||
{
|
||||
GGML_ASSERT(tensor->nb[0] == sizeof(float));
|
||||
return ((float *)(tensor->data))[i];
|
||||
} break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
GGML_ASSERT(false);
|
||||
} break;
|
||||
}
|
||||
}
|
||||
|
||||
return 0.0f;
|
||||
@ -5371,29 +5359,17 @@ float ggml_get_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2,
|
||||
void * data = (char *) tensor->data + i0*tensor->nb[0] + i1*tensor->nb[1] + i2*tensor->nb[2] + i3*tensor->nb[3];
|
||||
switch (tensor->type) {
|
||||
case GGML_TYPE_I8:
|
||||
{
|
||||
return ((int8_t *) data)[0];
|
||||
} break;
|
||||
return ((int8_t *) data)[0];
|
||||
case GGML_TYPE_I16:
|
||||
{
|
||||
return ((int16_t *) data)[0];
|
||||
} break;
|
||||
return ((int16_t *) data)[0];
|
||||
case GGML_TYPE_I32:
|
||||
{
|
||||
return ((int32_t *) data)[0];
|
||||
} break;
|
||||
return ((int32_t *) data)[0];
|
||||
case GGML_TYPE_F16:
|
||||
{
|
||||
return GGML_FP16_TO_FP32(((ggml_fp16_t *) data)[0]);
|
||||
} break;
|
||||
return GGML_FP16_TO_FP32(((ggml_fp16_t *) data)[0]);
|
||||
case GGML_TYPE_F32:
|
||||
{
|
||||
return ((float *) data)[0];
|
||||
} break;
|
||||
return ((float *) data)[0];
|
||||
default:
|
||||
{
|
||||
GGML_ASSERT(false);
|
||||
} break;
|
||||
GGML_ASSERT(false);
|
||||
}
|
||||
|
||||
return 0.0f;
|
||||
@ -8542,7 +8518,7 @@ static void ggml_compute_forward_dup_f16(
|
||||
return;
|
||||
}
|
||||
|
||||
GGML_TENSOR_UNARY_OP_LOCALS;
|
||||
GGML_TENSOR_UNARY_OP_LOCALS
|
||||
|
||||
const int ith = params->ith; // thread index
|
||||
const int nth = params->nth; // number of threads
|
||||
@ -8813,7 +8789,7 @@ static void ggml_compute_forward_dup_f32(
|
||||
return;
|
||||
}
|
||||
|
||||
GGML_TENSOR_UNARY_OP_LOCALS;
|
||||
GGML_TENSOR_UNARY_OP_LOCALS
|
||||
|
||||
const int ith = params->ith; // thread index
|
||||
const int nth = params->nth; // number of threads
|
||||
@ -9094,7 +9070,7 @@ static void ggml_compute_forward_add_f32(
|
||||
|
||||
const int nr = ggml_nrows(src0);
|
||||
|
||||
GGML_TENSOR_BINARY_OP_LOCALS;
|
||||
GGML_TENSOR_BINARY_OP_LOCALS
|
||||
|
||||
GGML_ASSERT( nb0 == sizeof(float));
|
||||
GGML_ASSERT(nb00 == sizeof(float));
|
||||
@ -9167,7 +9143,7 @@ static void ggml_compute_forward_add_f16_f32(
|
||||
|
||||
const int nr = ggml_nrows(src0);
|
||||
|
||||
GGML_TENSOR_BINARY_OP_LOCALS;
|
||||
GGML_TENSOR_BINARY_OP_LOCALS
|
||||
|
||||
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
||||
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
||||
@ -9221,7 +9197,7 @@ static void ggml_compute_forward_add_f16_f16(
|
||||
|
||||
const int nr = ggml_nrows(src0);
|
||||
|
||||
GGML_TENSOR_BINARY_OP_LOCALS;
|
||||
GGML_TENSOR_BINARY_OP_LOCALS
|
||||
|
||||
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
||||
GGML_ASSERT(src1->type == GGML_TYPE_F16);
|
||||
@ -9272,7 +9248,7 @@ static void ggml_compute_forward_add_q_f32(
|
||||
|
||||
const int nr = ggml_nrows(src0);
|
||||
|
||||
GGML_TENSOR_BINARY_OP_LOCALS;
|
||||
GGML_TENSOR_BINARY_OP_LOCALS
|
||||
|
||||
const int ith = params->ith;
|
||||
const int nth = params->nth;
|
||||
@ -9398,7 +9374,7 @@ static void ggml_compute_forward_add1_f32(
|
||||
|
||||
const int nr = ggml_nrows(src0);
|
||||
|
||||
GGML_TENSOR_UNARY_OP_LOCALS;
|
||||
GGML_TENSOR_UNARY_OP_LOCALS
|
||||
|
||||
GGML_ASSERT( nb0 == sizeof(float));
|
||||
GGML_ASSERT(nb00 == sizeof(float));
|
||||
@ -9453,7 +9429,7 @@ static void ggml_compute_forward_add1_f16_f32(
|
||||
|
||||
const int nr = ggml_nrows(src0);
|
||||
|
||||
GGML_TENSOR_UNARY_OP_LOCALS;
|
||||
GGML_TENSOR_UNARY_OP_LOCALS
|
||||
|
||||
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
||||
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
||||
@ -9503,7 +9479,7 @@ static void ggml_compute_forward_add1_f16_f16(
|
||||
|
||||
const int nr = ggml_nrows(src0);
|
||||
|
||||
GGML_TENSOR_UNARY_OP_LOCALS;
|
||||
GGML_TENSOR_UNARY_OP_LOCALS
|
||||
|
||||
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
||||
GGML_ASSERT(src1->type == GGML_TYPE_F16);
|
||||
@ -9553,7 +9529,7 @@ static void ggml_compute_forward_add1_q_f32(
|
||||
|
||||
const int nr = ggml_nrows(src0);
|
||||
|
||||
GGML_TENSOR_UNARY_OP_LOCALS;
|
||||
GGML_TENSOR_UNARY_OP_LOCALS
|
||||
|
||||
const enum ggml_type type = src0->type;
|
||||
ggml_to_float_t const dequantize_row_q = type_traits[type].to_float;
|
||||
@ -9681,8 +9657,8 @@ static void ggml_compute_forward_acc_f32(
|
||||
const int nr = ggml_nrows(src1);
|
||||
const int nc = src1->ne[0];
|
||||
|
||||
GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne);
|
||||
GGML_TENSOR_LOCALS(size_t, nb1, src1, nb);
|
||||
GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne)
|
||||
GGML_TENSOR_LOCALS(size_t, nb1, src1, nb)
|
||||
|
||||
// src0 and dst as viewed during acc
|
||||
const size_t nb0 = ggml_element_size(src0);
|
||||
@ -9771,7 +9747,7 @@ static void ggml_compute_forward_sub_f32(
|
||||
|
||||
const int nr = ggml_nrows(src0);
|
||||
|
||||
GGML_TENSOR_BINARY_OP_LOCALS;
|
||||
GGML_TENSOR_BINARY_OP_LOCALS
|
||||
|
||||
GGML_ASSERT( nb0 == sizeof(float));
|
||||
GGML_ASSERT(nb00 == sizeof(float));
|
||||
@ -9861,7 +9837,7 @@ static void ggml_compute_forward_mul_f32(
|
||||
|
||||
const int64_t nr = ggml_nrows(src0);
|
||||
|
||||
GGML_TENSOR_BINARY_OP_LOCALS;
|
||||
GGML_TENSOR_BINARY_OP_LOCALS
|
||||
|
||||
GGML_ASSERT( nb0 == sizeof(float));
|
||||
GGML_ASSERT(nb00 == sizeof(float));
|
||||
@ -9952,7 +9928,7 @@ static void ggml_compute_forward_div_f32(
|
||||
|
||||
const int nr = ggml_nrows(src0);
|
||||
|
||||
GGML_TENSOR_BINARY_OP_LOCALS;
|
||||
GGML_TENSOR_BINARY_OP_LOCALS
|
||||
|
||||
GGML_ASSERT( nb0 == sizeof(float));
|
||||
GGML_ASSERT(nb00 == sizeof(float));
|
||||
@ -10161,8 +10137,8 @@ static void ggml_compute_forward_sum_f32(
|
||||
assert(ggml_is_scalar(dst));
|
||||
assert(src0->nb[0] == sizeof(float));
|
||||
|
||||
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne);
|
||||
GGML_TENSOR_LOCALS(size_t, nb0, src0, nb);
|
||||
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne)
|
||||
GGML_TENSOR_LOCALS(size_t, nb0, src0, nb)
|
||||
|
||||
ggml_float sum = 0;
|
||||
ggml_float row_sum = 0;
|
||||
@ -10193,8 +10169,8 @@ static void ggml_compute_forward_sum_f16(
|
||||
|
||||
assert(src0->nb[0] == sizeof(ggml_fp16_t));
|
||||
|
||||
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne);
|
||||
GGML_TENSOR_LOCALS(size_t, nb0, src0, nb);
|
||||
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne)
|
||||
GGML_TENSOR_LOCALS(size_t, nb0, src0, nb)
|
||||
|
||||
float sum = 0;
|
||||
float row_sum = 0;
|
||||
@ -10247,7 +10223,7 @@ static void ggml_compute_forward_sum_rows_f32(
|
||||
GGML_ASSERT(src0->nb[0] == sizeof(float));
|
||||
GGML_ASSERT(dst->nb[0] == sizeof(float));
|
||||
|
||||
GGML_TENSOR_UNARY_OP_LOCALS;
|
||||
GGML_TENSOR_UNARY_OP_LOCALS
|
||||
|
||||
GGML_ASSERT(ne0 == 1);
|
||||
GGML_ASSERT(ne1 == ne01);
|
||||
@ -10297,7 +10273,7 @@ static void ggml_compute_forward_mean_f32(
|
||||
|
||||
assert(src0->nb[0] == sizeof(float));
|
||||
|
||||
GGML_TENSOR_UNARY_OP_LOCALS;
|
||||
GGML_TENSOR_UNARY_OP_LOCALS
|
||||
|
||||
assert(ne0 == 1);
|
||||
assert(ne1 == ne01);
|
||||
@ -10397,7 +10373,7 @@ static void ggml_compute_forward_repeat_f32(
|
||||
return;
|
||||
}
|
||||
|
||||
GGML_TENSOR_UNARY_OP_LOCALS;
|
||||
GGML_TENSOR_UNARY_OP_LOCALS
|
||||
|
||||
// guaranteed to be an integer due to the check in ggml_can_repeat
|
||||
const int nr0 = (int)(ne0/ne00);
|
||||
@ -10508,7 +10484,7 @@ static void ggml_compute_forward_repeat_back_f32(
|
||||
return;
|
||||
}
|
||||
|
||||
GGML_TENSOR_UNARY_OP_LOCALS;
|
||||
GGML_TENSOR_UNARY_OP_LOCALS
|
||||
|
||||
// guaranteed to be an integer due to the check in ggml_can_repeat
|
||||
const int nr0 = (int)(ne00/ne0);
|
||||
@ -10586,7 +10562,7 @@ static void ggml_compute_forward_concat_f32(
|
||||
|
||||
const int ith = params->ith;
|
||||
|
||||
GGML_TENSOR_BINARY_OP_LOCALS;
|
||||
GGML_TENSOR_BINARY_OP_LOCALS
|
||||
|
||||
// TODO: support for transposed / permuted tensors
|
||||
GGML_ASSERT(nb0 == sizeof(float));
|
||||
@ -11188,7 +11164,7 @@ static void ggml_compute_forward_norm_f32(
|
||||
const int ith = params->ith;
|
||||
const int nth = params->nth;
|
||||
|
||||
GGML_TENSOR_UNARY_OP_LOCALS;
|
||||
GGML_TENSOR_UNARY_OP_LOCALS
|
||||
|
||||
float eps;
|
||||
memcpy(&eps, dst->op_params, sizeof(float));
|
||||
@ -11257,7 +11233,7 @@ static void ggml_compute_forward_rms_norm_f32(
|
||||
const int ith = params->ith;
|
||||
const int nth = params->nth;
|
||||
|
||||
GGML_TENSOR_UNARY_OP_LOCALS;
|
||||
GGML_TENSOR_UNARY_OP_LOCALS
|
||||
|
||||
float eps;
|
||||
memcpy(&eps, dst->op_params, sizeof(float));
|
||||
@ -11322,7 +11298,7 @@ static void ggml_compute_forward_rms_norm_back_f32(
|
||||
const int ith = params->ith;
|
||||
const int nth = params->nth;
|
||||
|
||||
GGML_TENSOR_BINARY_OP_LOCALS;
|
||||
GGML_TENSOR_BINARY_OP_LOCALS
|
||||
|
||||
float eps;
|
||||
memcpy(&eps, dst->op_params, sizeof(float));
|
||||
@ -11497,7 +11473,7 @@ static void ggml_compute_forward_group_norm_f32(
|
||||
const int ith = params->ith;
|
||||
const int nth = params->nth;
|
||||
|
||||
GGML_TENSOR_UNARY_OP_LOCALS;
|
||||
GGML_TENSOR_UNARY_OP_LOCALS
|
||||
|
||||
const float eps = 1e-6f; // TODO: make this a parameter
|
||||
|
||||
@ -11608,7 +11584,7 @@ static void ggml_compute_forward_mul_mat(
|
||||
int64_t t0 = ggml_perf_time_us();
|
||||
UNUSED(t0);
|
||||
|
||||
GGML_TENSOR_BINARY_OP_LOCALS;
|
||||
GGML_TENSOR_BINARY_OP_LOCALS
|
||||
|
||||
const int ith = params->ith;
|
||||
const int nth = params->nth;
|
||||
@ -11826,7 +11802,7 @@ static void ggml_compute_forward_out_prod_f32(
|
||||
// int64_t t0 = ggml_perf_time_us();
|
||||
// UNUSED(t0);
|
||||
|
||||
GGML_TENSOR_BINARY_OP_LOCALS;
|
||||
GGML_TENSOR_BINARY_OP_LOCALS
|
||||
|
||||
const int ith = params->ith;
|
||||
const int nth = params->nth;
|
||||
@ -12200,8 +12176,8 @@ static void ggml_compute_forward_set_f32(
|
||||
const int nr = ggml_nrows(src1);
|
||||
const int nc = src1->ne[0];
|
||||
|
||||
GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne);
|
||||
GGML_TENSOR_LOCALS(size_t, nb1, src1, nb);
|
||||
GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne)
|
||||
GGML_TENSOR_LOCALS(size_t, nb1, src1, nb)
|
||||
|
||||
// src0 and dst as viewed during set
|
||||
const size_t nb0 = ggml_element_size(src0);
|
||||
@ -12588,7 +12564,7 @@ static void ggml_compute_forward_diag_f32(
|
||||
|
||||
// TODO: handle transposed/permuted matrices
|
||||
|
||||
GGML_TENSOR_UNARY_OP_LOCALS;
|
||||
GGML_TENSOR_UNARY_OP_LOCALS
|
||||
|
||||
GGML_ASSERT(ne00 == ne0);
|
||||
GGML_ASSERT(ne00 == ne1);
|
||||
@ -13163,7 +13139,7 @@ static void ggml_compute_forward_rope_f32(
|
||||
memcpy(&xpos_base, (int32_t *) dst->op_params + 6, sizeof(float));
|
||||
memcpy(&xpos_down, (int32_t *) dst->op_params + 7, sizeof(bool));
|
||||
|
||||
GGML_TENSOR_UNARY_OP_LOCALS;
|
||||
GGML_TENSOR_UNARY_OP_LOCALS
|
||||
|
||||
//printf("ne0: %d, ne1: %d, ne2: %d, ne3: %d\n", ne0, ne1, ne2, ne3);
|
||||
//printf("n_past = %d, ne2 = %d\n", n_past, ne2);
|
||||
@ -13295,7 +13271,7 @@ static void ggml_compute_forward_rope_f16(
|
||||
memcpy(&freq_base, (int32_t *) dst->op_params + 4, sizeof(float));
|
||||
memcpy(&freq_scale, (int32_t *) dst->op_params + 5, sizeof(float));
|
||||
|
||||
GGML_TENSOR_UNARY_OP_LOCALS;
|
||||
GGML_TENSOR_UNARY_OP_LOCALS
|
||||
|
||||
//printf("ne0: %d, ne1: %d, ne2: %d, ne3: %d\n", ne0, ne1, ne2, ne3);
|
||||
//printf("n_past = %d, ne2 = %d\n", n_past, ne2);
|
||||
@ -13458,7 +13434,7 @@ static void ggml_compute_forward_rope_back_f32(
|
||||
memcpy(&xpos_base, (int32_t *) dst->op_params + 6, sizeof(float));
|
||||
memcpy(&xpos_down, (int32_t *) dst->op_params + 7, sizeof(bool));
|
||||
|
||||
GGML_TENSOR_UNARY_OP_LOCALS;
|
||||
GGML_TENSOR_UNARY_OP_LOCALS
|
||||
|
||||
//printf("ne0: %d, ne1: %d, ne2: %d, ne3: %d\n", ne0, ne1, ne2, ne3);
|
||||
//printf("n_past = %d, ne2 = %d\n", n_past, ne2);
|
||||
@ -13558,7 +13534,7 @@ static void ggml_compute_forward_rope_back_f16(
|
||||
const int n_dims = ((int32_t *) dst->op_params)[1];
|
||||
const int mode = ((int32_t *) dst->op_params)[2];
|
||||
|
||||
GGML_TENSOR_UNARY_OP_LOCALS;
|
||||
GGML_TENSOR_UNARY_OP_LOCALS
|
||||
|
||||
//printf("ne0: %d, ne1: %d, ne2: %d, ne3: %d\n", ne0, ne1, ne2, ne3);
|
||||
//printf("n_past = %d, ne2 = %d\n", n_past, ne2);
|
||||
@ -13672,7 +13648,7 @@ static void ggml_compute_forward_conv_1d_s1_ph_f16_f32(
|
||||
int64_t t0 = ggml_perf_time_us();
|
||||
UNUSED(t0);
|
||||
|
||||
GGML_TENSOR_BINARY_OP_LOCALS;
|
||||
GGML_TENSOR_BINARY_OP_LOCALS
|
||||
|
||||
const int ith = params->ith;
|
||||
const int nth = params->nth;
|
||||
@ -13763,7 +13739,7 @@ static void ggml_compute_forward_conv_1d_s1_ph_f32(
|
||||
int64_t t0 = ggml_perf_time_us();
|
||||
UNUSED(t0);
|
||||
|
||||
GGML_TENSOR_BINARY_OP_LOCALS;
|
||||
GGML_TENSOR_BINARY_OP_LOCALS
|
||||
|
||||
const int ith = params->ith;
|
||||
const int nth = params->nth;
|
||||
@ -13875,7 +13851,7 @@ static void ggml_compute_forward_conv_1d_s2_ph_f16_f32(
|
||||
int64_t t0 = ggml_perf_time_us();
|
||||
UNUSED(t0);
|
||||
|
||||
GGML_TENSOR_BINARY_OP_LOCALS;
|
||||
GGML_TENSOR_BINARY_OP_LOCALS
|
||||
|
||||
const int ith = params->ith;
|
||||
const int nth = params->nth;
|
||||
@ -13966,7 +13942,7 @@ static void ggml_compute_forward_conv_1d_s2_ph_f32(
|
||||
int64_t t0 = ggml_perf_time_us();
|
||||
UNUSED(t0);
|
||||
|
||||
GGML_TENSOR_BINARY_OP_LOCALS;
|
||||
GGML_TENSOR_BINARY_OP_LOCALS
|
||||
|
||||
const int ith = params->ith;
|
||||
const int nth = params->nth;
|
||||
@ -14084,7 +14060,7 @@ static void ggml_compute_forward_conv_1d(
|
||||
ggml_compute_forward_conv_1d_s2_ph(params, src0, src1, dst);
|
||||
} else {
|
||||
GGML_ASSERT(false); // only stride 1 and 2 supported
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// ggml_compute_forward_conv_2d
|
||||
@ -14101,7 +14077,7 @@ static void ggml_compute_forward_conv_2d_f16_f32(
|
||||
int64_t t0 = ggml_perf_time_us();
|
||||
UNUSED(t0);
|
||||
|
||||
GGML_TENSOR_BINARY_OP_LOCALS;
|
||||
GGML_TENSOR_BINARY_OP_LOCALS
|
||||
|
||||
const int ith = params->ith;
|
||||
const int nth = params->nth;
|
||||
@ -14221,7 +14197,7 @@ static void ggml_compute_forward_conv_transpose_2d(
|
||||
int64_t t0 = ggml_perf_time_us();
|
||||
UNUSED(t0);
|
||||
|
||||
GGML_TENSOR_BINARY_OP_LOCALS;
|
||||
GGML_TENSOR_BINARY_OP_LOCALS
|
||||
|
||||
const int ith = params->ith;
|
||||
const int nth = params->nth;
|
||||
@ -14480,7 +14456,7 @@ static void ggml_compute_forward_upscale_f32(
|
||||
|
||||
const int ith = params->ith;
|
||||
|
||||
GGML_TENSOR_UNARY_OP_LOCALS;
|
||||
GGML_TENSOR_UNARY_OP_LOCALS
|
||||
|
||||
const int scale_factor = dst->op_params[0];
|
||||
|
||||
@ -14532,14 +14508,14 @@ static void ggml_compute_forward_flash_attn_f32(
|
||||
int64_t t0 = ggml_perf_time_us();
|
||||
UNUSED(t0);
|
||||
|
||||
GGML_TENSOR_LOCALS(int64_t, neq, q, ne);
|
||||
GGML_TENSOR_LOCALS(size_t, nbq, q, nb);
|
||||
GGML_TENSOR_LOCALS(int64_t, nek, k, ne);
|
||||
GGML_TENSOR_LOCALS(size_t, nbk, k, nb);
|
||||
GGML_TENSOR_LOCALS(int64_t, nev, v, ne);
|
||||
GGML_TENSOR_LOCALS(size_t, nbv, v, nb);
|
||||
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne);
|
||||
GGML_TENSOR_LOCALS(size_t, nb, dst, nb);
|
||||
GGML_TENSOR_LOCALS(int64_t, neq, q, ne)
|
||||
GGML_TENSOR_LOCALS(size_t, nbq, q, nb)
|
||||
GGML_TENSOR_LOCALS(int64_t, nek, k, ne)
|
||||
GGML_TENSOR_LOCALS(size_t, nbk, k, nb)
|
||||
GGML_TENSOR_LOCALS(int64_t, nev, v, ne)
|
||||
GGML_TENSOR_LOCALS(size_t, nbv, v, nb)
|
||||
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne)
|
||||
GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
|
||||
|
||||
const int ith = params->ith;
|
||||
const int nth = params->nth;
|
||||
@ -14722,14 +14698,14 @@ static void ggml_compute_forward_flash_attn_f16(
|
||||
int64_t t0 = ggml_perf_time_us();
|
||||
UNUSED(t0);
|
||||
|
||||
GGML_TENSOR_LOCALS(int64_t, neq, q, ne);
|
||||
GGML_TENSOR_LOCALS(size_t, nbq, q, nb);
|
||||
GGML_TENSOR_LOCALS(int64_t, nek, k, ne);
|
||||
GGML_TENSOR_LOCALS(size_t, nbk, k, nb);
|
||||
GGML_TENSOR_LOCALS(int64_t, nev, v, ne);
|
||||
GGML_TENSOR_LOCALS(size_t, nbv, v, nb);
|
||||
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne);
|
||||
GGML_TENSOR_LOCALS(size_t, nb, dst, nb);
|
||||
GGML_TENSOR_LOCALS(int64_t, neq, q, ne)
|
||||
GGML_TENSOR_LOCALS(size_t, nbq, q, nb)
|
||||
GGML_TENSOR_LOCALS(int64_t, nek, k, ne)
|
||||
GGML_TENSOR_LOCALS(size_t, nbk, k, nb)
|
||||
GGML_TENSOR_LOCALS(int64_t, nev, v, ne)
|
||||
GGML_TENSOR_LOCALS(size_t, nbv, v, nb)
|
||||
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne)
|
||||
GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
|
||||
|
||||
const int ith = params->ith;
|
||||
const int nth = params->nth;
|
||||
@ -14974,18 +14950,18 @@ static void ggml_compute_forward_flash_ff_f16(
|
||||
int64_t t0 = ggml_perf_time_us();
|
||||
UNUSED(t0);
|
||||
|
||||
GGML_TENSOR_LOCALS(int64_t, nea, a, ne);
|
||||
GGML_TENSOR_LOCALS(size_t, nba, a, nb);
|
||||
GGML_TENSOR_LOCALS(int64_t, neb0, b0, ne);
|
||||
GGML_TENSOR_LOCALS(size_t, nbb0, b0, nb);
|
||||
GGML_TENSOR_LOCALS(int64_t, neb1, b1, ne);
|
||||
GGML_TENSOR_LOCALS(size_t, nbb1, b1, nb);
|
||||
GGML_TENSOR_LOCALS(int64_t, nec0, c0, ne);
|
||||
GGML_TENSOR_LOCALS(size_t, nbc0, c0, nb);
|
||||
GGML_TENSOR_LOCALS(int64_t, nec1, c1, ne);
|
||||
GGML_TENSOR_LOCALS(size_t, nbc1, c1, nb);
|
||||
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne);
|
||||
GGML_TENSOR_LOCALS(size_t, nb, dst, nb);
|
||||
GGML_TENSOR_LOCALS(int64_t, nea, a, ne)
|
||||
GGML_TENSOR_LOCALS(size_t, nba, a, nb)
|
||||
GGML_TENSOR_LOCALS(int64_t, neb0, b0, ne)
|
||||
GGML_TENSOR_LOCALS(size_t, nbb0, b0, nb)
|
||||
GGML_TENSOR_LOCALS(int64_t, neb1, b1, ne)
|
||||
GGML_TENSOR_LOCALS(size_t, nbb1, b1, nb)
|
||||
GGML_TENSOR_LOCALS(int64_t, nec0, c0, ne)
|
||||
GGML_TENSOR_LOCALS(size_t, nbc0, c0, nb)
|
||||
GGML_TENSOR_LOCALS(int64_t, nec1, c1, ne)
|
||||
GGML_TENSOR_LOCALS(size_t, nbc1, c1, nb)
|
||||
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne)
|
||||
GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
|
||||
|
||||
const int ith = params->ith;
|
||||
const int nth = params->nth;
|
||||
@ -15133,16 +15109,16 @@ static void ggml_compute_forward_flash_attn_back_f32(
|
||||
int64_t t0 = ggml_perf_time_us();
|
||||
UNUSED(t0);
|
||||
|
||||
GGML_TENSOR_LOCALS(int64_t, neq, q, ne);
|
||||
GGML_TENSOR_LOCALS(size_t, nbq, q, nb);
|
||||
GGML_TENSOR_LOCALS(int64_t, nek, k, ne);
|
||||
GGML_TENSOR_LOCALS(size_t, nbk, k, nb);
|
||||
GGML_TENSOR_LOCALS(int64_t, nev, v, ne);
|
||||
GGML_TENSOR_LOCALS(size_t, nbv, v, nb);
|
||||
GGML_TENSOR_LOCALS(int64_t, ned, d, ne);
|
||||
GGML_TENSOR_LOCALS(size_t, nbd, d, nb);
|
||||
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne);
|
||||
GGML_TENSOR_LOCALS(size_t, nb, dst, nb);
|
||||
GGML_TENSOR_LOCALS(int64_t, neq, q, ne)
|
||||
GGML_TENSOR_LOCALS(size_t, nbq, q, nb)
|
||||
GGML_TENSOR_LOCALS(int64_t, nek, k, ne)
|
||||
GGML_TENSOR_LOCALS(size_t, nbk, k, nb)
|
||||
GGML_TENSOR_LOCALS(int64_t, nev, v, ne)
|
||||
GGML_TENSOR_LOCALS(size_t, nbv, v, nb)
|
||||
GGML_TENSOR_LOCALS(int64_t, ned, d, ne)
|
||||
GGML_TENSOR_LOCALS(size_t, nbd, d, nb)
|
||||
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne)
|
||||
GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
|
||||
|
||||
const int ith = params->ith;
|
||||
const int nth = params->nth;
|
||||
@ -15505,8 +15481,8 @@ static void ggml_compute_forward_win_part_f32(
|
||||
return;
|
||||
}
|
||||
|
||||
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne);
|
||||
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne);
|
||||
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne)
|
||||
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne)
|
||||
|
||||
const int32_t nep0 = ((const int32_t *)(dst->op_params))[0];
|
||||
const int32_t nep1 = ((const int32_t *)(dst->op_params))[1];
|
||||
@ -15567,8 +15543,8 @@ static void ggml_compute_forward_win_unpart_f32(
|
||||
return;
|
||||
}
|
||||
|
||||
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne);
|
||||
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne);
|
||||
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne)
|
||||
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne)
|
||||
|
||||
const int32_t w = ((const int32_t *)(dst->op_params))[0];
|
||||
|
||||
@ -15685,7 +15661,7 @@ static void ggml_compute_forward_get_rel_pos_f16(
|
||||
|
||||
// ref: https://github.com/facebookresearch/segment-anything/blob/main/segment_anything/modeling/image_encoder.py#L292-L322
|
||||
|
||||
GGML_TENSOR_UNARY_OP_LOCALS;
|
||||
GGML_TENSOR_UNARY_OP_LOCALS
|
||||
|
||||
const int64_t w = ne1;
|
||||
|
||||
@ -19637,7 +19613,7 @@ static enum ggml_opt_result linesearch_backtracking(
|
||||
(*step) *= width;
|
||||
}
|
||||
|
||||
return GGML_LINESEARCH_FAIL;
|
||||
GGML_UNREACHABLE();
|
||||
}
|
||||
|
||||
static enum ggml_opt_result ggml_opt_lbfgs(
|
||||
@ -19904,7 +19880,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
||||
step[0] = 1.0;
|
||||
}
|
||||
|
||||
return GGML_OPT_DID_NOT_CONVERGE;
|
||||
GGML_UNREACHABLE();
|
||||
}
|
||||
|
||||
struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type) {
|
||||
@ -20638,10 +20614,10 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
||||
} break;
|
||||
case GGUF_TYPE_ARRAY:
|
||||
case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type"); break;
|
||||
};
|
||||
}
|
||||
} break;
|
||||
case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type");
|
||||
};
|
||||
}
|
||||
|
||||
if (!ok) {
|
||||
break;
|
||||
@ -21369,10 +21345,10 @@ static void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf *
|
||||
} break;
|
||||
case GGUF_TYPE_ARRAY:
|
||||
case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type"); break;
|
||||
};
|
||||
}
|
||||
} break;
|
||||
case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type");
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// write tensor infos
|
||||
|
8
ggml.h
8
ggml.h
@ -248,6 +248,14 @@
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#ifndef NDEBUG
|
||||
#define GGML_UNREACHABLE() GGML_ASSERT(!"statement should not be reached")
|
||||
#elif defined(__GNUC__)
|
||||
#define GGML_UNREACHABLE() __builtin_unreachable()
|
||||
#else
|
||||
#define GGML_UNREACHABLE() ((void) 0)
|
||||
#endif
|
||||
|
||||
// used to copy the number of elements and stride in bytes of tensors into local variables.
|
||||
// main purpose is to reduce code duplication and improve readability.
|
||||
//
|
||||
|
14
llama.cpp
14
llama.cpp
@ -449,7 +449,7 @@ struct LLM_TN {
|
||||
//
|
||||
|
||||
#define GGUF_GET_KEY(ctx, dst, func, type, req, key) \
|
||||
{ \
|
||||
do { \
|
||||
const std::string skey(key); \
|
||||
const int kid = gguf_find_key(ctx, skey.c_str()); \
|
||||
if (kid >= 0) { \
|
||||
@ -461,7 +461,7 @@ struct LLM_TN {
|
||||
} else if (req) { \
|
||||
throw std::runtime_error(format("key not found in model: %s", skey.c_str())); \
|
||||
} \
|
||||
}
|
||||
} while (0)
|
||||
|
||||
//
|
||||
// ggml helpers
|
||||
@ -1913,7 +1913,7 @@ static void llm_load_hparams(
|
||||
}
|
||||
} break;
|
||||
default: (void)0;
|
||||
};
|
||||
}
|
||||
|
||||
model.ftype = ml.ftype;
|
||||
}
|
||||
@ -2438,7 +2438,7 @@ static void llm_load_tensors(
|
||||
} break;
|
||||
default:
|
||||
throw std::runtime_error("unknown architecture");
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
ml.done_getting_tensors();
|
||||
@ -3981,7 +3981,7 @@ static struct ggml_cgraph * llama_build_graph(
|
||||
} break;
|
||||
default:
|
||||
GGML_ASSERT(false);
|
||||
};
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
@ -4626,7 +4626,7 @@ static std::vector<llama_vocab::id> llama_tokenize_internal(const llama_vocab &
|
||||
llm_tokenizer_bpe tokenizer(vocab);
|
||||
tokenizer.tokenize(raw_text, output);
|
||||
} break;
|
||||
};
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
@ -7520,7 +7520,7 @@ int llama_token_to_piece(const struct llama_model * model, llama_token token, ch
|
||||
buf[2] = '\x85';
|
||||
return 3;
|
||||
} else if (llama_is_control_token(model->vocab, token)) {
|
||||
;
|
||||
// do nothing
|
||||
} else if (llama_is_byte_token(model->vocab, token)) {
|
||||
if (length < 1) {
|
||||
return -1;
|
||||
|
@ -43,7 +43,7 @@ static_assert(QK4_1 == QK8_0, "QK4_1 and QK8_0 must be the same");
|
||||
static_assert(QK4_0 == QK8_0, "QK4_0 and QK8_0 must be the same");
|
||||
|
||||
template <typename T>
|
||||
void fillQ4blocks(std::vector<T>& blocks, std::mt19937& rndm) {
|
||||
static void fillQ4blocks(std::vector<T>& blocks, std::mt19937& rndm) {
|
||||
for (auto& b : blocks) {
|
||||
b.d = 1;
|
||||
for (int i=0; i<QK4_1/2; ++i) {
|
||||
@ -54,7 +54,7 @@ void fillQ4blocks(std::vector<T>& blocks, std::mt19937& rndm) {
|
||||
}
|
||||
}
|
||||
|
||||
void fillQ80blocks(std::vector<block_q8_0>& blocks, std::mt19937& rndm) {
|
||||
static void fillQ80blocks(std::vector<block_q8_0>& blocks, std::mt19937& rndm) {
|
||||
for (auto& b : blocks) {
|
||||
b.d = 1;
|
||||
int sum = 0;
|
||||
@ -66,7 +66,7 @@ void fillQ80blocks(std::vector<block_q8_0>& blocks, std::mt19937& rndm) {
|
||||
}
|
||||
}
|
||||
|
||||
float simpleDot(const block_q4_0& x, const block_q8_0& y) {
|
||||
static float simpleDot(const block_q4_0& x, const block_q8_0& y) {
|
||||
int s1 = 0; //, s2 = 0;
|
||||
for (int i=0; i<QK4_1/2; i+=2) {
|
||||
int v1 = x.qs[i+0] & 0xf;
|
||||
@ -81,7 +81,7 @@ float simpleDot(const block_q4_0& x, const block_q8_0& y) {
|
||||
//return y.d * x.d * (s1 - 8 * s2);
|
||||
}
|
||||
|
||||
float simpleDot(const block_q4_1& x, const block_q8_0& y) {
|
||||
static float simpleDot(const block_q4_1& x, const block_q8_0& y) {
|
||||
int s1 = 0; //, s2 = 0;
|
||||
for (int i=0; i<QK4_1/2; i+=2) {
|
||||
int v1 = x.qs[i+0] & 0xf;
|
||||
|
@ -107,7 +107,7 @@ static struct ggml_tensor * get_random_tensor_f32(
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
};
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
@ -155,7 +155,7 @@ static struct ggml_tensor * get_random_tensor_f16(
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
};
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
@ -203,7 +203,7 @@ static struct ggml_tensor * get_random_tensor_i32(
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
};
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
@ -101,7 +101,7 @@ static struct ggml_tensor * get_random_tensor(
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
};
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
@ -124,7 +124,7 @@ int main(void) {
|
||||
struct ggml_context * ctx = ggml_init(params);
|
||||
|
||||
int64_t ne1[4] = {4, 128, 1, 1};
|
||||
int64_t ne2[4] = {4, 256, 1, 1};;
|
||||
int64_t ne2[4] = {4, 256, 1, 1};
|
||||
int64_t ne3[4] = {128, 256, 1, 1};
|
||||
|
||||
struct ggml_tensor * a = get_random_tensor(ctx, 2, ne1, -1, +1);
|
||||
|
Loading…
Reference in New Issue
Block a user