mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-23 18:09:18 +01:00
7c7836d9d4
* Refactor shaders, extract GLSL code from ggml_vk_generate_shaders.py into vulkan-shaders directory * Improve debug log code * Add memory debug output option * Fix flake8 * Fix unnecessary high llama-3 VRAM use
82 lines
1.7 KiB
Plaintext
82 lines
1.7 KiB
Plaintext
#extension GL_EXT_control_flow_attributes : enable
|
|
#extension GL_EXT_shader_16bit_storage : require
|
|
#extension GL_EXT_shader_8bit_storage : require
|
|
|
|
#define K_QUANTS_PER_ITERATION 2
|
|
|
|
#ifdef MUL_MAT_ID
|
|
#define EXPERT_COUNT 8
|
|
#endif
|
|
|
|
#include "types.comp"
|
|
|
|
layout (binding = 0) readonly buffer A {A_TYPE data_a[];};
|
|
layout (binding = 1) readonly buffer B {B_TYPE data_b[];};
|
|
layout (binding = 2) writeonly buffer D {D_TYPE data_d[];};
|
|
#ifdef MUL_MAT_ID
|
|
layout (binding = 3) readonly buffer IDS {int data_ids[];};
|
|
#endif
|
|
|
|
#include "dequant_funcs.comp"
|
|
|
|
layout (push_constant) uniform parameter
|
|
{
|
|
uint ncols;
|
|
uint stride_a;
|
|
uint stride_b;
|
|
uint stride_d;
|
|
|
|
uint batch_stride_a;
|
|
uint batch_stride_b;
|
|
uint batch_stride_d;
|
|
|
|
#ifdef MUL_MAT_ID
|
|
uint nei0;
|
|
uint ne11;
|
|
#else
|
|
uint ne02;
|
|
uint ne12;
|
|
uint broadcast2;
|
|
uint broadcast3;
|
|
#endif
|
|
} p;
|
|
|
|
void get_offsets(out uint a_offset, out uint b_offset, out uint d_offset) {
|
|
#ifdef MUL_MAT_ID
|
|
const uint expert_idx = gl_GlobalInvocationID.y;
|
|
#else
|
|
const uint batch_idx = gl_GlobalInvocationID.y;
|
|
#endif
|
|
|
|
#ifndef MUL_MAT_ID
|
|
const uint i13 = batch_idx / p.ne12;
|
|
const uint i12 = batch_idx % p.ne12;
|
|
|
|
const uint i03 = i13 / p.broadcast3;
|
|
const uint i02 = i12 / p.broadcast2;
|
|
|
|
const uint batch_idx_a = i03 * p.ne02 + i02;
|
|
#else
|
|
const uint expert_id = data_ids[expert_idx];
|
|
#endif
|
|
|
|
a_offset =
|
|
#ifdef MUL_MAT_ID
|
|
expert_id * p.batch_stride_a;
|
|
#else
|
|
batch_idx_a * p.batch_stride_a;
|
|
#endif
|
|
b_offset =
|
|
#ifdef MUL_MAT_ID
|
|
(expert_idx % p.ne11) * p.stride_b;
|
|
#else
|
|
batch_idx * p.batch_stride_b;
|
|
#endif
|
|
d_offset =
|
|
#ifdef MUL_MAT_ID
|
|
expert_idx * p.stride_d;
|
|
#else
|
|
batch_idx * p.batch_stride_d;
|
|
#endif
|
|
}
|