mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-14 06:19:02 +01:00
vulkan: define all quant data structures in types.comp (#10440)
This commit is contained in:
parent
5b3466bedf
commit
c31ed2abfc
@ -30,10 +30,8 @@
|
|||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(DATA_A_Q4_0)
|
#define QUANT_K_Q4_0 32
|
||||||
#extension GL_EXT_shader_16bit_storage : require
|
#define QUANT_R_Q4_0 2
|
||||||
#define QUANT_K 32
|
|
||||||
#define QUANT_R 2
|
|
||||||
|
|
||||||
struct block_q4_0
|
struct block_q4_0
|
||||||
{
|
{
|
||||||
@ -46,14 +44,15 @@ struct block_q4_0_packed16
|
|||||||
uint16_t qs[16/2];
|
uint16_t qs[16/2];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#if defined(DATA_A_Q4_0)
|
||||||
|
#define QUANT_K QUANT_K_Q4_0
|
||||||
|
#define QUANT_R QUANT_R_Q4_0
|
||||||
#define A_TYPE block_q4_0
|
#define A_TYPE block_q4_0
|
||||||
#define A_TYPE_PACKED16 block_q4_0_packed16
|
#define A_TYPE_PACKED16 block_q4_0_packed16
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(DATA_A_Q4_1)
|
#define QUANT_K_Q4_1 32
|
||||||
#extension GL_EXT_shader_16bit_storage : require
|
#define QUANT_R_Q4_1 2
|
||||||
#define QUANT_K 32
|
|
||||||
#define QUANT_R 2
|
|
||||||
|
|
||||||
struct block_q4_1
|
struct block_q4_1
|
||||||
{
|
{
|
||||||
@ -69,15 +68,15 @@ struct block_q4_1_packed16
|
|||||||
uint16_t qs[16/2];
|
uint16_t qs[16/2];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#if defined(DATA_A_Q4_1)
|
||||||
|
#define QUANT_K QUANT_K_Q4_1
|
||||||
|
#define QUANT_R QUANT_R_Q4_1
|
||||||
#define A_TYPE block_q4_1
|
#define A_TYPE block_q4_1
|
||||||
#define A_TYPE_PACKED16 block_q4_1_packed16
|
#define A_TYPE_PACKED16 block_q4_1_packed16
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(DATA_A_Q5_0)
|
#define QUANT_K_Q5_0 32
|
||||||
#extension GL_EXT_shader_16bit_storage : require
|
#define QUANT_R_Q5_0 2
|
||||||
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
|
|
||||||
#define QUANT_K 32
|
|
||||||
#define QUANT_R 2
|
|
||||||
|
|
||||||
struct block_q5_0
|
struct block_q5_0
|
||||||
{
|
{
|
||||||
@ -93,15 +92,15 @@ struct block_q5_0_packed16
|
|||||||
uint16_t qs[16/2];
|
uint16_t qs[16/2];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#if defined(DATA_A_Q5_0)
|
||||||
|
#define QUANT_K QUANT_K_Q5_0
|
||||||
|
#define QUANT_R QUANT_R_Q5_0
|
||||||
#define A_TYPE block_q5_0
|
#define A_TYPE block_q5_0
|
||||||
#define A_TYPE_PACKED16 block_q5_0_packed16
|
#define A_TYPE_PACKED16 block_q5_0_packed16
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(DATA_A_Q5_1)
|
#define QUANT_K_Q5_1 32
|
||||||
#extension GL_EXT_shader_16bit_storage : require
|
#define QUANT_R_Q5_1 2
|
||||||
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
|
|
||||||
#define QUANT_K 32
|
|
||||||
#define QUANT_R 2
|
|
||||||
|
|
||||||
struct block_q5_1
|
struct block_q5_1
|
||||||
{
|
{
|
||||||
@ -119,14 +118,15 @@ struct block_q5_1_packed16
|
|||||||
uint16_t qs[16/2];
|
uint16_t qs[16/2];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#if defined(DATA_A_Q5_1)
|
||||||
|
#define QUANT_K QUANT_K_Q5_1
|
||||||
|
#define QUANT_R QUANT_R_Q5_1
|
||||||
#define A_TYPE block_q5_1
|
#define A_TYPE block_q5_1
|
||||||
#define A_TYPE_PACKED16 block_q5_1_packed16
|
#define A_TYPE_PACKED16 block_q5_1_packed16
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(DATA_A_Q8_0)
|
#define QUANT_K_Q8_0 32
|
||||||
#extension GL_EXT_shader_16bit_storage : require
|
#define QUANT_R_Q8_0 1
|
||||||
#define QUANT_K 32
|
|
||||||
#define QUANT_R 1
|
|
||||||
|
|
||||||
struct block_q8_0
|
struct block_q8_0
|
||||||
{
|
{
|
||||||
@ -139,164 +139,164 @@ struct block_q8_0_packed16
|
|||||||
uint16_t qs[32/2];
|
uint16_t qs[32/2];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#if defined(DATA_A_Q8_0)
|
||||||
|
#define QUANT_K QUANT_K_Q8_0
|
||||||
|
#define QUANT_R QUANT_R_Q8_0
|
||||||
#define A_TYPE block_q8_0
|
#define A_TYPE block_q8_0
|
||||||
#define A_TYPE_PACKED16 block_q8_0_packed16
|
#define A_TYPE_PACKED16 block_q8_0_packed16
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// K-quants
|
// K-quants
|
||||||
#if defined(DATA_A_Q2_K)
|
#define QUANT_K_Q2_K 256
|
||||||
#extension GL_EXT_shader_16bit_storage : require
|
|
||||||
#define QUANT_K 256
|
|
||||||
|
|
||||||
struct block_q2_K
|
struct block_q2_K
|
||||||
{
|
{
|
||||||
uint8_t scales[QUANT_K/16];
|
uint8_t scales[QUANT_K_Q2_K/16];
|
||||||
uint8_t qs[QUANT_K/4];
|
uint8_t qs[QUANT_K_Q2_K/4];
|
||||||
f16vec2 d;
|
f16vec2 d;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct block_q2_K_packed16
|
struct block_q2_K_packed16
|
||||||
{
|
{
|
||||||
uint16_t scales[QUANT_K/16/2];
|
uint16_t scales[QUANT_K_Q2_K/16/2];
|
||||||
uint16_t qs[QUANT_K/4/2];
|
uint16_t qs[QUANT_K_Q2_K/4/2];
|
||||||
f16vec2 d;
|
f16vec2 d;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct block_q2_K_packed32
|
struct block_q2_K_packed32
|
||||||
{
|
{
|
||||||
uint32_t scales[QUANT_K/16/4];
|
uint32_t scales[QUANT_K_Q2_K/16/4];
|
||||||
uint32_t qs[QUANT_K/4/4];
|
uint32_t qs[QUANT_K_Q2_K/4/4];
|
||||||
f16vec2 d;
|
f16vec2 d;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#if defined(DATA_A_Q2_K)
|
||||||
|
#define QUANT_K QUANT_K_Q2_K
|
||||||
#define A_TYPE block_q2_K
|
#define A_TYPE block_q2_K
|
||||||
#define A_TYPE_PACKED16 block_q2_K_packed16
|
#define A_TYPE_PACKED16 block_q2_K_packed16
|
||||||
#define A_TYPE_PACKED32 block_q2_K_packed32
|
#define A_TYPE_PACKED32 block_q2_K_packed32
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(DATA_A_Q3_K)
|
#define QUANT_K_Q3_K 256
|
||||||
#extension GL_EXT_shader_16bit_storage : require
|
|
||||||
#define QUANT_K 256
|
|
||||||
|
|
||||||
struct block_q3_K
|
struct block_q3_K
|
||||||
{
|
{
|
||||||
uint8_t hmask[QUANT_K/8];
|
uint8_t hmask[QUANT_K_Q3_K/8];
|
||||||
uint8_t qs[QUANT_K/4];
|
uint8_t qs[QUANT_K_Q3_K/4];
|
||||||
uint8_t scales[12];
|
uint8_t scales[12];
|
||||||
float16_t d;
|
float16_t d;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct block_q3_K_packed16
|
struct block_q3_K_packed16
|
||||||
{
|
{
|
||||||
uint16_t hmask[QUANT_K/8/2];
|
uint16_t hmask[QUANT_K_Q3_K/8/2];
|
||||||
uint16_t qs[QUANT_K/4/2];
|
uint16_t qs[QUANT_K_Q3_K/4/2];
|
||||||
uint16_t scales[12/2];
|
uint16_t scales[12/2];
|
||||||
float16_t d;
|
float16_t d;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#if defined(DATA_A_Q3_K)
|
||||||
|
#define QUANT_K QUANT_K_Q3_K
|
||||||
#define A_TYPE block_q3_K
|
#define A_TYPE block_q3_K
|
||||||
#define A_TYPE_PACKED16 block_q3_K_packed16
|
#define A_TYPE_PACKED16 block_q3_K_packed16
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(DATA_A_Q4_K)
|
#define QUANT_K_Q4_K 256
|
||||||
#extension GL_EXT_shader_16bit_storage : require
|
|
||||||
#define QUANT_K 256
|
|
||||||
|
|
||||||
struct block_q4_K
|
struct block_q4_K
|
||||||
{
|
{
|
||||||
f16vec2 d;
|
f16vec2 d;
|
||||||
uint8_t scales[3*QUANT_K/64];
|
uint8_t scales[3*QUANT_K_Q4_K/64];
|
||||||
uint8_t qs[QUANT_K/2];
|
uint8_t qs[QUANT_K_Q4_K/2];
|
||||||
};
|
};
|
||||||
|
|
||||||
struct block_q4_K_packed16
|
struct block_q4_K_packed16
|
||||||
{
|
{
|
||||||
f16vec2 d;
|
f16vec2 d;
|
||||||
uint16_t scales[3*QUANT_K/64/2];
|
uint16_t scales[3*QUANT_K_Q4_K/64/2];
|
||||||
uint16_t qs[QUANT_K/2/2];
|
uint16_t qs[QUANT_K_Q4_K/2/2];
|
||||||
};
|
};
|
||||||
|
|
||||||
struct block_q4_K_packed32
|
struct block_q4_K_packed32
|
||||||
{
|
{
|
||||||
f16vec2 d;
|
f16vec2 d;
|
||||||
uint32_t scales[3*QUANT_K/64/4];
|
uint32_t scales[3*QUANT_K_Q4_K/64/4];
|
||||||
uint32_t qs[QUANT_K/2/4];
|
uint32_t qs[QUANT_K_Q4_K/2/4];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#if defined(DATA_A_Q4_K)
|
||||||
|
#define QUANT_K QUANT_K_Q4_K
|
||||||
#define A_TYPE block_q4_K
|
#define A_TYPE block_q4_K
|
||||||
#define A_TYPE_PACKED16 block_q4_K_packed16
|
#define A_TYPE_PACKED16 block_q4_K_packed16
|
||||||
#define A_TYPE_PACKED32 block_q4_K_packed32
|
#define A_TYPE_PACKED32 block_q4_K_packed32
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(DATA_A_Q5_K)
|
#define QUANT_K_Q5_K 256
|
||||||
#extension GL_EXT_shader_16bit_storage : require
|
|
||||||
#define QUANT_K 256
|
|
||||||
|
|
||||||
struct block_q5_K
|
struct block_q5_K
|
||||||
{
|
{
|
||||||
f16vec2 d;
|
f16vec2 d;
|
||||||
uint8_t scales[12];
|
uint8_t scales[12];
|
||||||
uint8_t qh[QUANT_K/8];
|
uint8_t qh[QUANT_K_Q5_K/8];
|
||||||
uint8_t qs[QUANT_K/2];
|
uint8_t qs[QUANT_K_Q5_K/2];
|
||||||
};
|
};
|
||||||
|
|
||||||
struct block_q5_K_packed16
|
struct block_q5_K_packed16
|
||||||
{
|
{
|
||||||
f16vec2 d;
|
f16vec2 d;
|
||||||
uint16_t scales[12/2];
|
uint16_t scales[12/2];
|
||||||
uint16_t qh[QUANT_K/8/2];
|
uint16_t qh[QUANT_K_Q5_K/8/2];
|
||||||
uint16_t qs[QUANT_K/2/2];
|
uint16_t qs[QUANT_K_Q5_K/2/2];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#if defined(DATA_A_Q5_K)
|
||||||
|
#define QUANT_K QUANT_K_Q5_K
|
||||||
#define A_TYPE block_q5_K
|
#define A_TYPE block_q5_K
|
||||||
#define A_TYPE_PACKED16 block_q5_K_packed16
|
#define A_TYPE_PACKED16 block_q5_K_packed16
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(DATA_A_Q6_K)
|
#define QUANT_K_Q6_K 256
|
||||||
#extension GL_EXT_shader_16bit_storage : require
|
|
||||||
#define QUANT_K 256
|
|
||||||
|
|
||||||
struct block_q6_K
|
struct block_q6_K
|
||||||
{
|
{
|
||||||
uint8_t ql[QUANT_K/2];
|
uint8_t ql[QUANT_K_Q6_K/2];
|
||||||
uint8_t qh[QUANT_K/4];
|
uint8_t qh[QUANT_K_Q6_K/4];
|
||||||
int8_t scales[QUANT_K/16];
|
int8_t scales[QUANT_K_Q6_K/16];
|
||||||
float16_t d;
|
float16_t d;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct block_q6_K_packed16
|
struct block_q6_K_packed16
|
||||||
{
|
{
|
||||||
uint16_t ql[QUANT_K/2/2];
|
uint16_t ql[QUANT_K_Q6_K/2/2];
|
||||||
uint16_t qh[QUANT_K/4/2];
|
uint16_t qh[QUANT_K_Q6_K/4/2];
|
||||||
int8_t scales[QUANT_K/16];
|
int8_t scales[QUANT_K_Q6_K/16];
|
||||||
float16_t d;
|
float16_t d;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#if defined(DATA_A_Q6_K)
|
||||||
|
#define QUANT_K QUANT_K_Q6_K
|
||||||
#define A_TYPE block_q6_K
|
#define A_TYPE block_q6_K
|
||||||
#define A_TYPE_PACKED16 block_q6_K_packed16
|
#define A_TYPE_PACKED16 block_q6_K_packed16
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// IQuants
|
// IQuants
|
||||||
|
|
||||||
#if defined(DATA_A_IQ4_NL)
|
#define QUANT_K_IQ4_NL 32
|
||||||
#extension GL_EXT_shader_16bit_storage : require
|
#define QUANT_R_IQ4_NL 2
|
||||||
#define QUANT_K 32
|
|
||||||
#define QUANT_R 2
|
|
||||||
|
|
||||||
struct block_iq4_nl
|
struct block_iq4_nl
|
||||||
{
|
{
|
||||||
float16_t d;
|
float16_t d;
|
||||||
uint8_t qs[QUANT_K/2];
|
uint8_t qs[QUANT_K_IQ4_NL/2];
|
||||||
};
|
};
|
||||||
|
|
||||||
struct block_iq4_nl_packed16
|
struct block_iq4_nl_packed16
|
||||||
{
|
{
|
||||||
float16_t d;
|
float16_t d;
|
||||||
uint16_t qs[QUANT_K/2/2];
|
uint16_t qs[QUANT_K_IQ4_NL/2/2];
|
||||||
};
|
};
|
||||||
|
|
||||||
#define A_TYPE block_iq4_nl
|
#if defined(DATA_A_IQ4_NL)
|
||||||
#define A_TYPE_PACKED16 block_iq4_nl_packed16
|
|
||||||
|
|
||||||
const int8_t kvalues_iq4nl_const[16] = {
|
const int8_t kvalues_iq4nl_const[16] = {
|
||||||
int8_t(-127), int8_t(-104), int8_t(-83), int8_t(-65), int8_t(-49), int8_t(-35), int8_t(-22), int8_t(-10),
|
int8_t(-127), int8_t(-104), int8_t(-83), int8_t(-65), int8_t(-49), int8_t(-35), int8_t(-22), int8_t(-10),
|
||||||
@ -313,6 +313,11 @@ void init_iq4nl_shmem()
|
|||||||
}
|
}
|
||||||
barrier();
|
barrier();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define QUANT_K QUANT_K_IQ4_NL
|
||||||
|
#define QUANT_R QUANT_R_IQ4_NL
|
||||||
|
#define A_TYPE block_iq4_nl
|
||||||
|
#define A_TYPE_PACKED16 block_iq4_nl_packed16
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif // !defined(GGML_TYPES_COMP)
|
#endif // !defined(GGML_TYPES_COMP)
|
||||||
|
Loading…
Reference in New Issue
Block a user