mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-25 19:08:44 +01:00
180 lines
2.9 KiB
Plaintext
180 lines
2.9 KiB
Plaintext
|
#if !defined(DATA_A_F32) && !defined(DATA_A_F16)
|
||
|
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
|
||
|
#endif
|
||
|
|
||
|
#if defined(DATA_A_F32)
|
||
|
#define QUANT_K 1
|
||
|
#define QUANT_R 1
|
||
|
|
||
|
#ifndef LOAD_VEC_A
|
||
|
#define A_TYPE float
|
||
|
#elif LOAD_VEC_A == 4
|
||
|
#define A_TYPE vec4
|
||
|
#elif LOAD_VEC_A == 8
|
||
|
#define A_TYPE mat2x4
|
||
|
#endif
|
||
|
#endif
|
||
|
|
||
|
#if defined(DATA_A_F16)
|
||
|
#define QUANT_K 1
|
||
|
#define QUANT_R 1
|
||
|
|
||
|
#ifndef LOAD_VEC_A
|
||
|
#define A_TYPE float16_t
|
||
|
#elif LOAD_VEC_A == 4
|
||
|
#define A_TYPE f16vec4
|
||
|
#elif LOAD_VEC_A == 8
|
||
|
#define A_TYPE f16mat2x4
|
||
|
#endif
|
||
|
#endif
|
||
|
|
||
|
#if defined(DATA_A_Q4_0)
|
||
|
#extension GL_EXT_shader_16bit_storage : require
|
||
|
#define QUANT_K 32
|
||
|
#define QUANT_R 2
|
||
|
|
||
|
struct block_q4_0
|
||
|
{
|
||
|
float16_t d;
|
||
|
uint8_t qs[16];
|
||
|
};
|
||
|
|
||
|
#define A_TYPE block_q4_0
|
||
|
#endif
|
||
|
|
||
|
#if defined(DATA_A_Q4_1)
|
||
|
#extension GL_EXT_shader_16bit_storage : require
|
||
|
#define QUANT_K 32
|
||
|
#define QUANT_R 2
|
||
|
|
||
|
struct block_q4_1
|
||
|
{
|
||
|
float16_t d;
|
||
|
float16_t m;
|
||
|
uint8_t qs[16];
|
||
|
};
|
||
|
|
||
|
#define A_TYPE block_q4_1
|
||
|
#endif
|
||
|
|
||
|
#if defined(DATA_A_Q5_0)
|
||
|
#extension GL_EXT_shader_16bit_storage : require
|
||
|
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
|
||
|
#define QUANT_K 32
|
||
|
#define QUANT_R 2
|
||
|
|
||
|
struct block_q5_0
|
||
|
{
|
||
|
float16_t d;
|
||
|
uint16_t qh[2];
|
||
|
uint8_t qs[16];
|
||
|
};
|
||
|
|
||
|
#define A_TYPE block_q5_0
|
||
|
#endif
|
||
|
|
||
|
#if defined(DATA_A_Q5_1)
|
||
|
#extension GL_EXT_shader_16bit_storage : require
|
||
|
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
|
||
|
#define QUANT_K 32
|
||
|
#define QUANT_R 2
|
||
|
|
||
|
struct block_q5_1
|
||
|
{
|
||
|
float16_t d;
|
||
|
float16_t m;
|
||
|
uint qh;
|
||
|
uint8_t qs[16];
|
||
|
};
|
||
|
|
||
|
#define A_TYPE block_q5_1
|
||
|
#endif
|
||
|
|
||
|
#if defined(DATA_A_Q8_0)
|
||
|
#extension GL_EXT_shader_16bit_storage : require
|
||
|
#define QUANT_K 32
|
||
|
#define QUANT_R 1
|
||
|
|
||
|
struct block_q8_0
|
||
|
{
|
||
|
float16_t d;
|
||
|
int8_t qs[32];
|
||
|
};
|
||
|
|
||
|
#define A_TYPE block_q8_0
|
||
|
#endif
|
||
|
|
||
|
// K-quants
|
||
|
#if defined(DATA_A_Q2_K)
|
||
|
#extension GL_EXT_shader_16bit_storage : require
|
||
|
#define QUANT_K 256
|
||
|
|
||
|
struct block_q2_K
|
||
|
{
|
||
|
uint8_t scales[QUANT_K/16];
|
||
|
uint8_t qs[QUANT_K/4];
|
||
|
f16vec2 d;
|
||
|
};
|
||
|
|
||
|
#define A_TYPE block_q2_K
|
||
|
#endif
|
||
|
|
||
|
#if defined(DATA_A_Q3_K)
|
||
|
#extension GL_EXT_shader_16bit_storage : require
|
||
|
#define QUANT_K 256
|
||
|
|
||
|
struct block_q3_K
|
||
|
{
|
||
|
uint8_t hmask[QUANT_K/8];
|
||
|
uint8_t qs[QUANT_K/4];
|
||
|
uint8_t scales[12];
|
||
|
float16_t d;
|
||
|
};
|
||
|
|
||
|
#define A_TYPE block_q3_K
|
||
|
#endif
|
||
|
|
||
|
#if defined(DATA_A_Q4_K)
|
||
|
#extension GL_EXT_shader_16bit_storage : require
|
||
|
#define QUANT_K 256
|
||
|
|
||
|
struct block_q4_K
|
||
|
{
|
||
|
f16vec2 d;
|
||
|
uint8_t scales[3*QUANT_K/64];
|
||
|
uint8_t qs[QUANT_K/2];
|
||
|
};
|
||
|
|
||
|
#define A_TYPE block_q4_K
|
||
|
#endif
|
||
|
|
||
|
#if defined(DATA_A_Q5_K)
|
||
|
#extension GL_EXT_shader_16bit_storage : require
|
||
|
#define QUANT_K 256
|
||
|
|
||
|
struct block_q5_K
|
||
|
{
|
||
|
f16vec2 d;
|
||
|
uint8_t scales[12];
|
||
|
uint8_t qh[QUANT_K/8];
|
||
|
uint8_t qs[QUANT_K/2];
|
||
|
};
|
||
|
|
||
|
#define A_TYPE block_q5_K
|
||
|
#endif
|
||
|
|
||
|
#if defined(DATA_A_Q6_K)
|
||
|
#extension GL_EXT_shader_16bit_storage : require
|
||
|
#define QUANT_K 256
|
||
|
|
||
|
struct block_q6_K
|
||
|
{
|
||
|
uint8_t ql[QUANT_K/2];
|
||
|
uint8_t qh[QUANT_K/4];
|
||
|
int8_t scales[QUANT_K/16];
|
||
|
float16_t d;
|
||
|
};
|
||
|
|
||
|
#define A_TYPE block_q6_K
|
||
|
#endif
|