mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-26 03:12:23 +01:00
7c7836d9d4
* Refactor shaders, extract GLSL code from ggml_vk_generate_shaders.py into vulkan-shaders directory * Improve debug log code * Add memory debug output option * Fix flake8 * Fix unnecessary high llama-3 VRAM use
180 lines
2.9 KiB
Plaintext
180 lines
2.9 KiB
Plaintext
#if !defined(DATA_A_F32) && !defined(DATA_A_F16)
|
|
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
|
|
#endif
|
|
|
|
#if defined(DATA_A_F32)
|
|
#define QUANT_K 1
|
|
#define QUANT_R 1
|
|
|
|
#ifndef LOAD_VEC_A
|
|
#define A_TYPE float
|
|
#elif LOAD_VEC_A == 4
|
|
#define A_TYPE vec4
|
|
#elif LOAD_VEC_A == 8
|
|
#define A_TYPE mat2x4
|
|
#endif
|
|
#endif
|
|
|
|
#if defined(DATA_A_F16)
|
|
#define QUANT_K 1
|
|
#define QUANT_R 1
|
|
|
|
#ifndef LOAD_VEC_A
|
|
#define A_TYPE float16_t
|
|
#elif LOAD_VEC_A == 4
|
|
#define A_TYPE f16vec4
|
|
#elif LOAD_VEC_A == 8
|
|
#define A_TYPE f16mat2x4
|
|
#endif
|
|
#endif
|
|
|
|
#if defined(DATA_A_Q4_0)
|
|
#extension GL_EXT_shader_16bit_storage : require
|
|
#define QUANT_K 32
|
|
#define QUANT_R 2
|
|
|
|
struct block_q4_0
|
|
{
|
|
float16_t d;
|
|
uint8_t qs[16];
|
|
};
|
|
|
|
#define A_TYPE block_q4_0
|
|
#endif
|
|
|
|
#if defined(DATA_A_Q4_1)
|
|
#extension GL_EXT_shader_16bit_storage : require
|
|
#define QUANT_K 32
|
|
#define QUANT_R 2
|
|
|
|
struct block_q4_1
|
|
{
|
|
float16_t d;
|
|
float16_t m;
|
|
uint8_t qs[16];
|
|
};
|
|
|
|
#define A_TYPE block_q4_1
|
|
#endif
|
|
|
|
#if defined(DATA_A_Q5_0)
|
|
#extension GL_EXT_shader_16bit_storage : require
|
|
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
|
|
#define QUANT_K 32
|
|
#define QUANT_R 2
|
|
|
|
struct block_q5_0
|
|
{
|
|
float16_t d;
|
|
uint16_t qh[2];
|
|
uint8_t qs[16];
|
|
};
|
|
|
|
#define A_TYPE block_q5_0
|
|
#endif
|
|
|
|
#if defined(DATA_A_Q5_1)
|
|
#extension GL_EXT_shader_16bit_storage : require
|
|
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
|
|
#define QUANT_K 32
|
|
#define QUANT_R 2
|
|
|
|
struct block_q5_1
|
|
{
|
|
float16_t d;
|
|
float16_t m;
|
|
uint qh;
|
|
uint8_t qs[16];
|
|
};
|
|
|
|
#define A_TYPE block_q5_1
|
|
#endif
|
|
|
|
#if defined(DATA_A_Q8_0)
|
|
#extension GL_EXT_shader_16bit_storage : require
|
|
#define QUANT_K 32
|
|
#define QUANT_R 1
|
|
|
|
struct block_q8_0
|
|
{
|
|
float16_t d;
|
|
int8_t qs[32];
|
|
};
|
|
|
|
#define A_TYPE block_q8_0
|
|
#endif
|
|
|
|
// K-quants
|
|
#if defined(DATA_A_Q2_K)
|
|
#extension GL_EXT_shader_16bit_storage : require
|
|
#define QUANT_K 256
|
|
|
|
struct block_q2_K
|
|
{
|
|
uint8_t scales[QUANT_K/16];
|
|
uint8_t qs[QUANT_K/4];
|
|
f16vec2 d;
|
|
};
|
|
|
|
#define A_TYPE block_q2_K
|
|
#endif
|
|
|
|
#if defined(DATA_A_Q3_K)
|
|
#extension GL_EXT_shader_16bit_storage : require
|
|
#define QUANT_K 256
|
|
|
|
struct block_q3_K
|
|
{
|
|
uint8_t hmask[QUANT_K/8];
|
|
uint8_t qs[QUANT_K/4];
|
|
uint8_t scales[12];
|
|
float16_t d;
|
|
};
|
|
|
|
#define A_TYPE block_q3_K
|
|
#endif
|
|
|
|
#if defined(DATA_A_Q4_K)
|
|
#extension GL_EXT_shader_16bit_storage : require
|
|
#define QUANT_K 256
|
|
|
|
struct block_q4_K
|
|
{
|
|
f16vec2 d;
|
|
uint8_t scales[3*QUANT_K/64];
|
|
uint8_t qs[QUANT_K/2];
|
|
};
|
|
|
|
#define A_TYPE block_q4_K
|
|
#endif
|
|
|
|
#if defined(DATA_A_Q5_K)
|
|
#extension GL_EXT_shader_16bit_storage : require
|
|
#define QUANT_K 256
|
|
|
|
struct block_q5_K
|
|
{
|
|
f16vec2 d;
|
|
uint8_t scales[12];
|
|
uint8_t qh[QUANT_K/8];
|
|
uint8_t qs[QUANT_K/2];
|
|
};
|
|
|
|
#define A_TYPE block_q5_K
|
|
#endif
|
|
|
|
#if defined(DATA_A_Q6_K)
|
|
#extension GL_EXT_shader_16bit_storage : require
|
|
#define QUANT_K 256
|
|
|
|
struct block_q6_K
|
|
{
|
|
uint8_t ql[QUANT_K/2];
|
|
uint8_t qh[QUANT_K/4];
|
|
int8_t scales[QUANT_K/16];
|
|
float16_t d;
|
|
};
|
|
|
|
#define A_TYPE block_q6_K
|
|
#endif
|