mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-28 15:18:26 +01:00
ae8de6d50a
* ggml : build backends as libraries --------- Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> Co-authored-by: R0CKSTAR <xiaodong.ye@mthreads.com>
20 lines
642 B
C
20 lines
642 B
C
#pragma once
|
|
|
|
#include "ggml.h"
|
|
|
|
// GGML internal header
|
|
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
|
|
// Quantization utilizing an importance matrix (a.k.a. "Activation aWare Quantization")
|
|
size_t quantize_q4_0_4x4(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
|
size_t quantize_q4_0_4x8(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
|
size_t quantize_q4_0_8x8(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row, const float * imatrix);
|
|
|
|
#ifdef __cplusplus
|
|
}
|
|
#endif
|
|
|