diff --git a/CMakeLists.txt b/CMakeLists.txt
index 88585fb93..31532df91 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -429,7 +429,7 @@ if (LLAMA_KOMPUTE)
         set(spv_file ${source}.spv)
         add_custom_command(
             OUTPUT ${spv_file}
-            DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/${source}
+            DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/${source} ${CMAKE_CURRENT_SOURCE_DIR}/kompute/common.comp
             COMMAND ${glslc_executable} --target-env=vulkan1.2 -o ${spv_file} ${CMAKE_CURRENT_SOURCE_DIR}/${source}
             COMMENT "Compiling ${source} to ${source}.spv"
         )
diff --git a/kompute/common.comp b/kompute/common.comp
new file mode 100644
index 000000000..12fc7d8b5
--- /dev/null
+++ b/kompute/common.comp
@@ -0,0 +1,124 @@
+/**
+ * Copyright (c) 2023 Nomic, Inc. All rights reserved.
+ *
+ * This software is licensed under the terms of the Software for Open Models License (SOM),
+ * version 1.0, as detailed in the LICENSE_SOM.txt file. A copy of this license should accompany
+ * this software. Except as expressly granted in the SOM license, all rights are reserved by Nomic, Inc.
+ */
+
+#extension GL_EXT_shader_16bit_storage: require
+#extension GL_EXT_shader_8bit_storage: require
+#extension GL_EXT_shader_explicit_arithmetic_types_float16: require
+#extension GL_EXT_shader_explicit_arithmetic_types_int8: require
+#extension GL_EXT_shader_explicit_arithmetic_types_int16: require
+#extension GL_EXT_control_flow_attributes: enable
+
+#define QK4_0 32
+#define QR4_0 2
+#define QK4_1 32
+
+#define GELU_COEF_A 0.044715
+#define SQRT_2_OVER_PI 0.79788456080286535587989211986876
+
+#ifndef QK_K
+#define QK_K 256
+#endif
+
+#if QK_K == 256
+#define K_SCALE_SIZE 12
+#else
+#define K_SCALE_SIZE 4
+#endif
+
+#define BM 128
+#define BN 128
+#define BK 8
+#define TM 8
+#define TN 8
+
+#define u8BufToU16(buf, idx) (((uint16_t(buf[idx + 1]) << 8)) | buf[idx])
+#define u8BufToFloat16(buf, idx) uint16BitsToHalf u8BufToU16(buf, idx)
+#define u8BufToU32(buf, idx) (((uint32_t u8BufToU16(buf, idx + 2) << 8 | buf[idx + 1]) << 8) | buf[idx])
+#define u8BufToFloat(buf, idx) uintBitsToFloat u8BufToU32(buf, idx)
+
+#define sizeof_block_q4_0 0x12
+#define sizeof_block_q4_1 0x14
+struct block_q4_0 {
+    float16_t d;
+    uint8_t qs[QK4_0 / 2];
+};
+struct block_q4_1 {
+    float16_t d;
+    float16_t m;
+    uint8_t qs[QK4_1 / 2];
+};
+
+#ifndef QK_K
+#define QK_K 256
+#endif
+
+#if QK_K == 256
+#define K_SCALE_SIZE 12
+#else
+#define K_SCALE_SIZE 4
+#endif
+
+struct block_q2_K {
+    uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits
+    uint8_t qs[QK_K/4];      // quants
+    float16_t d;           // super-block scale for quantized scales
+    float16_t dmin;        // super-block scale for quantized mins
+};
+// 84 bytes / block
+
+struct block_q3_K {
+    uint8_t hmask[QK_K/8];     // quants - high bit
+    uint8_t qs[QK_K/4];        // quants - low 2 bits
+#if QK_K == 64
+    uint8_t scales[2];
+#else
+    uint8_t scales[K_SCALE_SIZE]; // scales, quantized with 6 bits
+#endif
+    float16_t d;             // super-block scale
+};
+
+#if QK_K == 64
+typedef struct {
+    float16_t    d[2];          // super-block scales/mins
+    uint8_t scales[2];
+    uint8_t qs[QK_K/2];    // 4-bit quants
+} block_q4_K;
+#else
+struct block_q4_K {
+    float16_t d;             // super-block scale for quantized scales
+    float16_t dmin;          // super-block scale for quantized mins
+    uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
+    uint8_t qs[QK_K/2];        // 4--bit quants
+};
+#endif
+
+#if QK_K == 64
+struct block_q5_K {
+    float16_t  d;                     // super-block scales/mins
+    int8_t  scales[QK_K/16];     // 8-bit block scales
+    uint8_t qh[QK_K/8];          // quants, high bit
+    uint8_t qs[QK_K/2];          // quants, low 4 bits
+};
+#else
+struct block_q5_K {
+    float16_t d;                      // super-block scale for quantized scales
+    float16_t dmin;                   // super-block scale for quantized mins
+    uint8_t scales[3*QK_K/64];   // scales and mins, quantized with 6 bits
+    uint8_t qh[QK_K/8];          // quants, high bit
+    uint8_t qs[QK_K/2];          // quants, low 4 bits
+};
+// 176 bytes / block
+#endif
+
+struct block_q6_K {
+    uint8_t ql[QK_K/2];      // quants, lower 4 bits
+    uint8_t qh[QK_K/4];      // quants, upper 2 bits
+    int8_t  scales[QK_K/16]; // scales, quantized with 8 bits
+    float16_t d;                  // super-block scale
+};
+// 210 bytes / block
diff --git a/kompute/op_add.comp b/kompute/op_add.comp
index 7e4e43d75..019a68449 100644
--- a/kompute/op_add.comp
+++ b/kompute/op_add.comp
@@ -8,122 +8,7 @@
 
 #version 450
 
-#extension GL_EXT_shader_16bit_storage: require
-#extension GL_EXT_shader_8bit_storage: require
-#extension GL_EXT_shader_explicit_arithmetic_types_float16: require
-#extension GL_EXT_shader_explicit_arithmetic_types_int8: require
-#extension GL_EXT_shader_explicit_arithmetic_types_int16: require
-#extension GL_EXT_control_flow_attributes: enable
-
-#define QK4_0 32
-#define QR4_0 2
-#define QK4_1 32
-
-#define GELU_COEF_A 0.044715
-#define SQRT_2_OVER_PI 0.79788456080286535587989211986876
-
-#ifndef QK_K
-#define QK_K 256
-#endif
-
-#if QK_K == 256
-#define K_SCALE_SIZE 12
-#else
-#define K_SCALE_SIZE 4
-#endif
-
-#define BM 128
-#define BN 128
-#define BK 8
-#define TM 8
-#define TN 8
-
-#define u8BufToU16(buf, idx) (((uint16_t(buf[idx + 1]) << 8)) | buf[idx])
-#define u8BufToFloat16(buf, idx) uint16BitsToHalf u8BufToU16(buf, idx)
-#define u8BufToU32(buf, idx) (((uint32_t u8BufToU16(buf, idx + 2) << 8 | buf[idx + 1]) << 8) | buf[idx])
-#define u8BufToFloat(buf, idx) uintBitsToFloat u8BufToU32(buf, idx)
-
-#define sizeof_block_q4_0 0x12
-#define sizeof_block_q4_1 0x14
-struct block_q4_0 {
-    float16_t d;
-    uint8_t qs[QK4_0 / 2];
-};
-struct block_q4_1 {
-    float16_t d;
-    float16_t m;
-    uint8_t qs[QK4_1 / 2];
-};
-
-#ifndef QK_K
-#define QK_K 256
-#endif
-
-#if QK_K == 256
-#define K_SCALE_SIZE 12
-#else
-#define K_SCALE_SIZE 4
-#endif
-
-struct block_q2_K {
-    uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits
-    uint8_t qs[QK_K/4];      // quants
-    float16_t d;           // super-block scale for quantized scales
-    float16_t dmin;        // super-block scale for quantized mins
-};
-// 84 bytes / block
-
-struct block_q3_K {
-    uint8_t hmask[QK_K/8];     // quants - high bit
-    uint8_t qs[QK_K/4];        // quants - low 2 bits
-#if QK_K == 64
-    uint8_t scales[2];
-#else
-    uint8_t scales[K_SCALE_SIZE]; // scales, quantized with 6 bits
-#endif
-    float16_t d;             // super-block scale
-};
-
-#if QK_K == 64
-typedef struct {
-    float16_t    d[2];          // super-block scales/mins
-    uint8_t scales[2];
-    uint8_t qs[QK_K/2];    // 4-bit quants
-} block_q4_K;
-#else
-struct block_q4_K {
-    float16_t d;             // super-block scale for quantized scales
-    float16_t dmin;          // super-block scale for quantized mins
-    uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
-    uint8_t qs[QK_K/2];        // 4--bit quants
-};
-#endif
-
-#if QK_K == 64
-struct block_q5_K {
-    float16_t  d;                     // super-block scales/mins
-    int8_t  scales[QK_K/16];     // 8-bit block scales
-    uint8_t qh[QK_K/8];          // quants, high bit
-    uint8_t qs[QK_K/2];          // quants, low 4 bits
-};
-#else
-struct block_q5_K {
-    float16_t d;                      // super-block scale for quantized scales
-    float16_t dmin;                   // super-block scale for quantized mins
-    uint8_t scales[3*QK_K/64];   // scales and mins, quantized with 6 bits
-    uint8_t qh[QK_K/8];          // quants, high bit
-    uint8_t qs[QK_K/2];          // quants, low 4 bits
-};
-// 176 bytes / block
-#endif
-
-struct block_q6_K {
-    uint8_t ql[QK_K/2];      // quants, lower 4 bits
-    uint8_t qh[QK_K/4];      // quants, upper 2 bits
-    int8_t  scales[QK_K/16]; // scales, quantized with 8 bits
-    float16_t d;                  // super-block scale
-};
-// 210 bytes / block
+#include "common.comp"
 
 layout(local_size_x = 1) in;
 
diff --git a/kompute/op_addrow.comp b/kompute/op_addrow.comp
index 492f672e5..926c929e4 100644
--- a/kompute/op_addrow.comp
+++ b/kompute/op_addrow.comp
@@ -8,122 +8,7 @@
 
 #version 450
 
-#extension GL_EXT_shader_16bit_storage: require
-#extension GL_EXT_shader_8bit_storage: require
-#extension GL_EXT_shader_explicit_arithmetic_types_float16: require
-#extension GL_EXT_shader_explicit_arithmetic_types_int8: require
-#extension GL_EXT_shader_explicit_arithmetic_types_int16: require
-#extension GL_EXT_control_flow_attributes: enable
-
-#define QK4_0 32
-#define QR4_0 2
-#define QK4_1 32
-
-#define GELU_COEF_A 0.044715
-#define SQRT_2_OVER_PI 0.79788456080286535587989211986876
-
-#ifndef QK_K
-#define QK_K 256
-#endif
-
-#if QK_K == 256
-#define K_SCALE_SIZE 12
-#else
-#define K_SCALE_SIZE 4
-#endif
-
-#define BM 128
-#define BN 128
-#define BK 8
-#define TM 8
-#define TN 8
-
-#define u8BufToU16(buf, idx) (((uint16_t(buf[idx + 1]) << 8)) | buf[idx])
-#define u8BufToFloat16(buf, idx) uint16BitsToHalf u8BufToU16(buf, idx)
-#define u8BufToU32(buf, idx) (((uint32_t u8BufToU16(buf, idx + 2) << 8 | buf[idx + 1]) << 8) | buf[idx])
-#define u8BufToFloat(buf, idx) uintBitsToFloat u8BufToU32(buf, idx)
-
-#define sizeof_block_q4_0 0x12
-#define sizeof_block_q4_1 0x14
-struct block_q4_0 {
-    float16_t d;
-    uint8_t qs[QK4_0 / 2];
-};
-struct block_q4_1 {
-    float16_t d;
-    float16_t m;
-    uint8_t qs[QK4_1 / 2];
-};
-
-#ifndef QK_K
-#define QK_K 256
-#endif
-
-#if QK_K == 256
-#define K_SCALE_SIZE 12
-#else
-#define K_SCALE_SIZE 4
-#endif
-
-struct block_q2_K {
-    uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits
-    uint8_t qs[QK_K/4];      // quants
-    float16_t d;           // super-block scale for quantized scales
-    float16_t dmin;        // super-block scale for quantized mins
-};
-// 84 bytes / block
-
-struct block_q3_K {
-    uint8_t hmask[QK_K/8];     // quants - high bit
-    uint8_t qs[QK_K/4];        // quants - low 2 bits
-#if QK_K == 64
-    uint8_t scales[2];
-#else
-    uint8_t scales[K_SCALE_SIZE]; // scales, quantized with 6 bits
-#endif
-    float16_t d;             // super-block scale
-};
-
-#if QK_K == 64
-typedef struct {
-    float16_t    d[2];          // super-block scales/mins
-    uint8_t scales[2];
-    uint8_t qs[QK_K/2];    // 4-bit quants
-} block_q4_K;
-#else
-struct block_q4_K {
-    float16_t d;             // super-block scale for quantized scales
-    float16_t dmin;          // super-block scale for quantized mins
-    uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
-    uint8_t qs[QK_K/2];        // 4--bit quants
-};
-#endif
-
-#if QK_K == 64
-struct block_q5_K {
-    float16_t  d;                     // super-block scales/mins
-    int8_t  scales[QK_K/16];     // 8-bit block scales
-    uint8_t qh[QK_K/8];          // quants, high bit
-    uint8_t qs[QK_K/2];          // quants, low 4 bits
-};
-#else
-struct block_q5_K {
-    float16_t d;                      // super-block scale for quantized scales
-    float16_t dmin;                   // super-block scale for quantized mins
-    uint8_t scales[3*QK_K/64];   // scales and mins, quantized with 6 bits
-    uint8_t qh[QK_K/8];          // quants, high bit
-    uint8_t qs[QK_K/2];          // quants, low 4 bits
-};
-// 176 bytes / block
-#endif
-
-struct block_q6_K {
-    uint8_t ql[QK_K/2];      // quants, lower 4 bits
-    uint8_t qh[QK_K/4];      // quants, upper 2 bits
-    int8_t  scales[QK_K/16]; // scales, quantized with 8 bits
-    float16_t d;                  // super-block scale
-};
-// 210 bytes / block
+#include "common.comp"
 
 layout(local_size_x = 1) in;
 
diff --git a/kompute/op_cpy_f16_f16.comp b/kompute/op_cpy_f16_f16.comp
index 40d756ae5..5f425ae28 100644
--- a/kompute/op_cpy_f16_f16.comp
+++ b/kompute/op_cpy_f16_f16.comp
@@ -8,122 +8,7 @@
 
 #version 450
 
-#extension GL_EXT_shader_16bit_storage: require
-#extension GL_EXT_shader_8bit_storage: require
-#extension GL_EXT_shader_explicit_arithmetic_types_float16: require
-#extension GL_EXT_shader_explicit_arithmetic_types_int8: require
-#extension GL_EXT_shader_explicit_arithmetic_types_int16: require
-#extension GL_EXT_control_flow_attributes: enable
-
-#define QK4_0 32
-#define QR4_0 2
-#define QK4_1 32
-
-#define GELU_COEF_A 0.044715
-#define SQRT_2_OVER_PI 0.79788456080286535587989211986876
-
-#ifndef QK_K
-#define QK_K 256
-#endif
-
-#if QK_K == 256
-#define K_SCALE_SIZE 12
-#else
-#define K_SCALE_SIZE 4
-#endif
-
-#define BM 128
-#define BN 128
-#define BK 8
-#define TM 8
-#define TN 8
-
-#define u8BufToU16(buf, idx) (((uint16_t(buf[idx + 1]) << 8)) | buf[idx])
-#define u8BufToFloat16(buf, idx) uint16BitsToHalf u8BufToU16(buf, idx)
-#define u8BufToU32(buf, idx) (((uint32_t u8BufToU16(buf, idx + 2) << 8 | buf[idx + 1]) << 8) | buf[idx])
-#define u8BufToFloat(buf, idx) uintBitsToFloat u8BufToU32(buf, idx)
-
-#define sizeof_block_q4_0 0x12
-#define sizeof_block_q4_1 0x14
-struct block_q4_0 {
-    float16_t d;
-    uint8_t qs[QK4_0 / 2];
-};
-struct block_q4_1 {
-    float16_t d;
-    float16_t m;
-    uint8_t qs[QK4_1 / 2];
-};
-
-#ifndef QK_K
-#define QK_K 256
-#endif
-
-#if QK_K == 256
-#define K_SCALE_SIZE 12
-#else
-#define K_SCALE_SIZE 4
-#endif
-
-struct block_q2_K {
-    uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits
-    uint8_t qs[QK_K/4];      // quants
-    float16_t d;           // super-block scale for quantized scales
-    float16_t dmin;        // super-block scale for quantized mins
-};
-// 84 bytes / block
-
-struct block_q3_K {
-    uint8_t hmask[QK_K/8];     // quants - high bit
-    uint8_t qs[QK_K/4];        // quants - low 2 bits
-#if QK_K == 64
-    uint8_t scales[2];
-#else
-    uint8_t scales[K_SCALE_SIZE]; // scales, quantized with 6 bits
-#endif
-    float16_t d;             // super-block scale
-};
-
-#if QK_K == 64
-typedef struct {
-    float16_t    d[2];          // super-block scales/mins
-    uint8_t scales[2];
-    uint8_t qs[QK_K/2];    // 4-bit quants
-} block_q4_K;
-#else
-struct block_q4_K {
-    float16_t d;             // super-block scale for quantized scales
-    float16_t dmin;          // super-block scale for quantized mins
-    uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
-    uint8_t qs[QK_K/2];        // 4--bit quants
-};
-#endif
-
-#if QK_K == 64
-struct block_q5_K {
-    float16_t  d;                     // super-block scales/mins
-    int8_t  scales[QK_K/16];     // 8-bit block scales
-    uint8_t qh[QK_K/8];          // quants, high bit
-    uint8_t qs[QK_K/2];          // quants, low 4 bits
-};
-#else
-struct block_q5_K {
-    float16_t d;                      // super-block scale for quantized scales
-    float16_t dmin;                   // super-block scale for quantized mins
-    uint8_t scales[3*QK_K/64];   // scales and mins, quantized with 6 bits
-    uint8_t qh[QK_K/8];          // quants, high bit
-    uint8_t qs[QK_K/2];          // quants, low 4 bits
-};
-// 176 bytes / block
-#endif
-
-struct block_q6_K {
-    uint8_t ql[QK_K/2];      // quants, lower 4 bits
-    uint8_t qh[QK_K/4];      // quants, upper 2 bits
-    int8_t  scales[QK_K/16]; // scales, quantized with 8 bits
-    float16_t d;                  // super-block scale
-};
-// 210 bytes / block
+#include "common.comp"
 
 #define nth 32
 #define IN_TYPE float16_t
diff --git a/kompute/op_cpy_f16_f32.comp b/kompute/op_cpy_f16_f32.comp
index 309c48aed..4298bebdd 100644
--- a/kompute/op_cpy_f16_f32.comp
+++ b/kompute/op_cpy_f16_f32.comp
@@ -8,122 +8,7 @@
 
 #version 450
 
-#extension GL_EXT_shader_16bit_storage: require
-#extension GL_EXT_shader_8bit_storage: require
-#extension GL_EXT_shader_explicit_arithmetic_types_float16: require
-#extension GL_EXT_shader_explicit_arithmetic_types_int8: require
-#extension GL_EXT_shader_explicit_arithmetic_types_int16: require
-#extension GL_EXT_control_flow_attributes: enable
-
-#define QK4_0 32
-#define QR4_0 2
-#define QK4_1 32
-
-#define GELU_COEF_A 0.044715
-#define SQRT_2_OVER_PI 0.79788456080286535587989211986876
-
-#ifndef QK_K
-#define QK_K 256
-#endif
-
-#if QK_K == 256
-#define K_SCALE_SIZE 12
-#else
-#define K_SCALE_SIZE 4
-#endif
-
-#define BM 128
-#define BN 128
-#define BK 8
-#define TM 8
-#define TN 8
-
-#define u8BufToU16(buf, idx) (((uint16_t(buf[idx + 1]) << 8)) | buf[idx])
-#define u8BufToFloat16(buf, idx) uint16BitsToHalf u8BufToU16(buf, idx)
-#define u8BufToU32(buf, idx) (((uint32_t u8BufToU16(buf, idx + 2) << 8 | buf[idx + 1]) << 8) | buf[idx])
-#define u8BufToFloat(buf, idx) uintBitsToFloat u8BufToU32(buf, idx)
-
-#define sizeof_block_q4_0 0x12
-#define sizeof_block_q4_1 0x14
-struct block_q4_0 {
-    float16_t d;
-    uint8_t qs[QK4_0 / 2];
-};
-struct block_q4_1 {
-    float16_t d;
-    float16_t m;
-    uint8_t qs[QK4_1 / 2];
-};
-
-#ifndef QK_K
-#define QK_K 256
-#endif
-
-#if QK_K == 256
-#define K_SCALE_SIZE 12
-#else
-#define K_SCALE_SIZE 4
-#endif
-
-struct block_q2_K {
-    uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits
-    uint8_t qs[QK_K/4];      // quants
-    float16_t d;           // super-block scale for quantized scales
-    float16_t dmin;        // super-block scale for quantized mins
-};
-// 84 bytes / block
-
-struct block_q3_K {
-    uint8_t hmask[QK_K/8];     // quants - high bit
-    uint8_t qs[QK_K/4];        // quants - low 2 bits
-#if QK_K == 64
-    uint8_t scales[2];
-#else
-    uint8_t scales[K_SCALE_SIZE]; // scales, quantized with 6 bits
-#endif
-    float16_t d;             // super-block scale
-};
-
-#if QK_K == 64
-typedef struct {
-    float16_t    d[2];          // super-block scales/mins
-    uint8_t scales[2];
-    uint8_t qs[QK_K/2];    // 4-bit quants
-} block_q4_K;
-#else
-struct block_q4_K {
-    float16_t d;             // super-block scale for quantized scales
-    float16_t dmin;          // super-block scale for quantized mins
-    uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
-    uint8_t qs[QK_K/2];        // 4--bit quants
-};
-#endif
-
-#if QK_K == 64
-struct block_q5_K {
-    float16_t  d;                     // super-block scales/mins
-    int8_t  scales[QK_K/16];     // 8-bit block scales
-    uint8_t qh[QK_K/8];          // quants, high bit
-    uint8_t qs[QK_K/2];          // quants, low 4 bits
-};
-#else
-struct block_q5_K {
-    float16_t d;                      // super-block scale for quantized scales
-    float16_t dmin;                   // super-block scale for quantized mins
-    uint8_t scales[3*QK_K/64];   // scales and mins, quantized with 6 bits
-    uint8_t qh[QK_K/8];          // quants, high bit
-    uint8_t qs[QK_K/2];          // quants, low 4 bits
-};
-// 176 bytes / block
-#endif
-
-struct block_q6_K {
-    uint8_t ql[QK_K/2];      // quants, lower 4 bits
-    uint8_t qh[QK_K/4];      // quants, upper 2 bits
-    int8_t  scales[QK_K/16]; // scales, quantized with 8 bits
-    float16_t d;                  // super-block scale
-};
-// 210 bytes / block
+#include "common.comp"
 
 #define nth 32
 #define IN_TYPE float16_t
diff --git a/kompute/op_cpy_f32_f16.comp b/kompute/op_cpy_f32_f16.comp
index fb0e00d67..2d763edfd 100644
--- a/kompute/op_cpy_f32_f16.comp
+++ b/kompute/op_cpy_f32_f16.comp
@@ -8,122 +8,7 @@
 
 #version 450
 
-#extension GL_EXT_shader_16bit_storage: require
-#extension GL_EXT_shader_8bit_storage: require
-#extension GL_EXT_shader_explicit_arithmetic_types_float16: require
-#extension GL_EXT_shader_explicit_arithmetic_types_int8: require
-#extension GL_EXT_shader_explicit_arithmetic_types_int16: require
-#extension GL_EXT_control_flow_attributes: enable
-
-#define QK4_0 32
-#define QR4_0 2
-#define QK4_1 32
-
-#define GELU_COEF_A 0.044715
-#define SQRT_2_OVER_PI 0.79788456080286535587989211986876
-
-#ifndef QK_K
-#define QK_K 256
-#endif
-
-#if QK_K == 256
-#define K_SCALE_SIZE 12
-#else
-#define K_SCALE_SIZE 4
-#endif
-
-#define BM 128
-#define BN 128
-#define BK 8
-#define TM 8
-#define TN 8
-
-#define u8BufToU16(buf, idx) (((uint16_t(buf[idx + 1]) << 8)) | buf[idx])
-#define u8BufToFloat16(buf, idx) uint16BitsToHalf u8BufToU16(buf, idx)
-#define u8BufToU32(buf, idx) (((uint32_t u8BufToU16(buf, idx + 2) << 8 | buf[idx + 1]) << 8) | buf[idx])
-#define u8BufToFloat(buf, idx) uintBitsToFloat u8BufToU32(buf, idx)
-
-#define sizeof_block_q4_0 0x12
-#define sizeof_block_q4_1 0x14
-struct block_q4_0 {
-    float16_t d;
-    uint8_t qs[QK4_0 / 2];
-};
-struct block_q4_1 {
-    float16_t d;
-    float16_t m;
-    uint8_t qs[QK4_1 / 2];
-};
-
-#ifndef QK_K
-#define QK_K 256
-#endif
-
-#if QK_K == 256
-#define K_SCALE_SIZE 12
-#else
-#define K_SCALE_SIZE 4
-#endif
-
-struct block_q2_K {
-    uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits
-    uint8_t qs[QK_K/4];      // quants
-    float16_t d;           // super-block scale for quantized scales
-    float16_t dmin;        // super-block scale for quantized mins
-};
-// 84 bytes / block
-
-struct block_q3_K {
-    uint8_t hmask[QK_K/8];     // quants - high bit
-    uint8_t qs[QK_K/4];        // quants - low 2 bits
-#if QK_K == 64
-    uint8_t scales[2];
-#else
-    uint8_t scales[K_SCALE_SIZE]; // scales, quantized with 6 bits
-#endif
-    float16_t d;             // super-block scale
-};
-
-#if QK_K == 64
-typedef struct {
-    float16_t    d[2];          // super-block scales/mins
-    uint8_t scales[2];
-    uint8_t qs[QK_K/2];    // 4-bit quants
-} block_q4_K;
-#else
-struct block_q4_K {
-    float16_t d;             // super-block scale for quantized scales
-    float16_t dmin;          // super-block scale for quantized mins
-    uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
-    uint8_t qs[QK_K/2];        // 4--bit quants
-};
-#endif
-
-#if QK_K == 64
-struct block_q5_K {
-    float16_t  d;                     // super-block scales/mins
-    int8_t  scales[QK_K/16];     // 8-bit block scales
-    uint8_t qh[QK_K/8];          // quants, high bit
-    uint8_t qs[QK_K/2];          // quants, low 4 bits
-};
-#else
-struct block_q5_K {
-    float16_t d;                      // super-block scale for quantized scales
-    float16_t dmin;                   // super-block scale for quantized mins
-    uint8_t scales[3*QK_K/64];   // scales and mins, quantized with 6 bits
-    uint8_t qh[QK_K/8];          // quants, high bit
-    uint8_t qs[QK_K/2];          // quants, low 4 bits
-};
-// 176 bytes / block
-#endif
-
-struct block_q6_K {
-    uint8_t ql[QK_K/2];      // quants, lower 4 bits
-    uint8_t qh[QK_K/4];      // quants, upper 2 bits
-    int8_t  scales[QK_K/16]; // scales, quantized with 8 bits
-    float16_t d;                  // super-block scale
-};
-// 210 bytes / block
+#include "common.comp"
 
 #define nth 32
 #define IN_TYPE float
diff --git a/kompute/op_cpy_f32_f32.comp b/kompute/op_cpy_f32_f32.comp
index f43480b8d..4e5b1d393 100644
--- a/kompute/op_cpy_f32_f32.comp
+++ b/kompute/op_cpy_f32_f32.comp
@@ -1,121 +1,6 @@
 #version 450
 
-#extension GL_EXT_shader_16bit_storage: require
-#extension GL_EXT_shader_8bit_storage: require
-#extension GL_EXT_shader_explicit_arithmetic_types_float16: require
-#extension GL_EXT_shader_explicit_arithmetic_types_int8: require
-#extension GL_EXT_shader_explicit_arithmetic_types_int16: require
-#extension GL_EXT_control_flow_attributes: enable
-
-#define QK4_0 32
-#define QR4_0 2
-#define QK4_1 32
-
-#define GELU_COEF_A 0.044715
-#define SQRT_2_OVER_PI 0.79788456080286535587989211986876
-
-#ifndef QK_K
-#define QK_K 256
-#endif
-
-#if QK_K == 256
-#define K_SCALE_SIZE 12
-#else
-#define K_SCALE_SIZE 4
-#endif
-
-#define BM 128
-#define BN 128
-#define BK 8
-#define TM 8
-#define TN 8
-
-#define u8BufToU16(buf, idx) (((uint16_t(buf[idx + 1]) << 8)) | buf[idx])
-#define u8BufToFloat16(buf, idx) uint16BitsToHalf u8BufToU16(buf, idx)
-#define u8BufToU32(buf, idx) (((uint32_t u8BufToU16(buf, idx + 2) << 8 | buf[idx + 1]) << 8) | buf[idx])
-#define u8BufToFloat(buf, idx) uintBitsToFloat u8BufToU32(buf, idx)
-
-#define sizeof_block_q4_0 0x12
-#define sizeof_block_q4_1 0x14
-struct block_q4_0 {
-    float16_t d;
-    uint8_t qs[QK4_0 / 2];
-};
-struct block_q4_1 {
-    float16_t d;
-    float16_t m;
-    uint8_t qs[QK4_1 / 2];
-};
-
-#ifndef QK_K
-#define QK_K 256
-#endif
-
-#if QK_K == 256
-#define K_SCALE_SIZE 12
-#else
-#define K_SCALE_SIZE 4
-#endif
-
-struct block_q2_K {
-    uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits
-    uint8_t qs[QK_K/4];      // quants
-    float16_t d;           // super-block scale for quantized scales
-    float16_t dmin;        // super-block scale for quantized mins
-};
-// 84 bytes / block
-
-struct block_q3_K {
-    uint8_t hmask[QK_K/8];     // quants - high bit
-    uint8_t qs[QK_K/4];        // quants - low 2 bits
-#if QK_K == 64
-    uint8_t scales[2];
-#else
-    uint8_t scales[K_SCALE_SIZE]; // scales, quantized with 6 bits
-#endif
-    float16_t d;             // super-block scale
-};
-
-#if QK_K == 64
-typedef struct {
-    float16_t    d[2];          // super-block scales/mins
-    uint8_t scales[2];
-    uint8_t qs[QK_K/2];    // 4-bit quants
-} block_q4_K;
-#else
-struct block_q4_K {
-    float16_t d;             // super-block scale for quantized scales
-    float16_t dmin;          // super-block scale for quantized mins
-    uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
-    uint8_t qs[QK_K/2];        // 4--bit quants
-};
-#endif
-
-#if QK_K == 64
-struct block_q5_K {
-    float16_t  d;                     // super-block scales/mins
-    int8_t  scales[QK_K/16];     // 8-bit block scales
-    uint8_t qh[QK_K/8];          // quants, high bit
-    uint8_t qs[QK_K/2];          // quants, low 4 bits
-};
-#else
-struct block_q5_K {
-    float16_t d;                      // super-block scale for quantized scales
-    float16_t dmin;                   // super-block scale for quantized mins
-    uint8_t scales[3*QK_K/64];   // scales and mins, quantized with 6 bits
-    uint8_t qh[QK_K/8];          // quants, high bit
-    uint8_t qs[QK_K/2];          // quants, low 4 bits
-};
-// 176 bytes / block
-#endif
-
-struct block_q6_K {
-    uint8_t ql[QK_K/2];      // quants, lower 4 bits
-    uint8_t qh[QK_K/4];      // quants, upper 2 bits
-    int8_t  scales[QK_K/16]; // scales, quantized with 8 bits
-    float16_t d;                  // super-block scale
-};
-// 210 bytes / block
+#include "common.comp"
 
 #define nth 32
 #define IN_TYPE float
diff --git a/kompute/op_diagmask.comp b/kompute/op_diagmask.comp
index 18b0192d7..8dc2cc60a 100644
--- a/kompute/op_diagmask.comp
+++ b/kompute/op_diagmask.comp
@@ -8,122 +8,7 @@
 
 #version 450
 
-#extension GL_EXT_shader_16bit_storage: require
-#extension GL_EXT_shader_8bit_storage: require
-#extension GL_EXT_shader_explicit_arithmetic_types_float16: require
-#extension GL_EXT_shader_explicit_arithmetic_types_int8: require
-#extension GL_EXT_shader_explicit_arithmetic_types_int16: require
-#extension GL_EXT_control_flow_attributes: enable
-
-#define QK4_0 32
-#define QR4_0 2
-#define QK4_1 32
-
-#define GELU_COEF_A 0.044715
-#define SQRT_2_OVER_PI 0.79788456080286535587989211986876
-
-#ifndef QK_K
-#define QK_K 256
-#endif
-
-#if QK_K == 256
-#define K_SCALE_SIZE 12
-#else
-#define K_SCALE_SIZE 4
-#endif
-
-#define BM 128
-#define BN 128
-#define BK 8
-#define TM 8
-#define TN 8
-
-#define u8BufToU16(buf, idx) (((uint16_t(buf[idx + 1]) << 8)) | buf[idx])
-#define u8BufToFloat16(buf, idx) uint16BitsToHalf u8BufToU16(buf, idx)
-#define u8BufToU32(buf, idx) (((uint32_t u8BufToU16(buf, idx + 2) << 8 | buf[idx + 1]) << 8) | buf[idx])
-#define u8BufToFloat(buf, idx) uintBitsToFloat u8BufToU32(buf, idx)
-
-#define sizeof_block_q4_0 0x12
-#define sizeof_block_q4_1 0x14
-struct block_q4_0 {
-    float16_t d;
-    uint8_t qs[QK4_0 / 2];
-};
-struct block_q4_1 {
-    float16_t d;
-    float16_t m;
-    uint8_t qs[QK4_1 / 2];
-};
-
-#ifndef QK_K
-#define QK_K 256
-#endif
-
-#if QK_K == 256
-#define K_SCALE_SIZE 12
-#else
-#define K_SCALE_SIZE 4
-#endif
-
-struct block_q2_K {
-    uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits
-    uint8_t qs[QK_K/4];      // quants
-    float16_t d;           // super-block scale for quantized scales
-    float16_t dmin;        // super-block scale for quantized mins
-};
-// 84 bytes / block
-
-struct block_q3_K {
-    uint8_t hmask[QK_K/8];     // quants - high bit
-    uint8_t qs[QK_K/4];        // quants - low 2 bits
-#if QK_K == 64
-    uint8_t scales[2];
-#else
-    uint8_t scales[K_SCALE_SIZE]; // scales, quantized with 6 bits
-#endif
-    float16_t d;             // super-block scale
-};
-
-#if QK_K == 64
-typedef struct {
-    float16_t    d[2];          // super-block scales/mins
-    uint8_t scales[2];
-    uint8_t qs[QK_K/2];    // 4-bit quants
-} block_q4_K;
-#else
-struct block_q4_K {
-    float16_t d;             // super-block scale for quantized scales
-    float16_t dmin;          // super-block scale for quantized mins
-    uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
-    uint8_t qs[QK_K/2];        // 4--bit quants
-};
-#endif
-
-#if QK_K == 64
-struct block_q5_K {
-    float16_t  d;                     // super-block scales/mins
-    int8_t  scales[QK_K/16];     // 8-bit block scales
-    uint8_t qh[QK_K/8];          // quants, high bit
-    uint8_t qs[QK_K/2];          // quants, low 4 bits
-};
-#else
-struct block_q5_K {
-    float16_t d;                      // super-block scale for quantized scales
-    float16_t dmin;                   // super-block scale for quantized mins
-    uint8_t scales[3*QK_K/64];   // scales and mins, quantized with 6 bits
-    uint8_t qh[QK_K/8];          // quants, high bit
-    uint8_t qs[QK_K/2];          // quants, low 4 bits
-};
-// 176 bytes / block
-#endif
-
-struct block_q6_K {
-    uint8_t ql[QK_K/2];      // quants, lower 4 bits
-    uint8_t qh[QK_K/4];      // quants, upper 2 bits
-    int8_t  scales[QK_K/16]; // scales, quantized with 8 bits
-    float16_t d;                  // super-block scale
-};
-// 210 bytes / block
+#include "common.comp"
 
 layout(local_size_x = 1) in;
 
diff --git a/kompute/op_gelu.comp b/kompute/op_gelu.comp
index 8079b8ef2..c9f8ce3cf 100644
--- a/kompute/op_gelu.comp
+++ b/kompute/op_gelu.comp
@@ -8,122 +8,7 @@
 
 #version 450
 
-#extension GL_EXT_shader_16bit_storage: require
-#extension GL_EXT_shader_8bit_storage: require
-#extension GL_EXT_shader_explicit_arithmetic_types_float16: require
-#extension GL_EXT_shader_explicit_arithmetic_types_int8: require
-#extension GL_EXT_shader_explicit_arithmetic_types_int16: require
-#extension GL_EXT_control_flow_attributes: enable
-
-#define QK4_0 32
-#define QR4_0 2
-#define QK4_1 32
-
-#define GELU_COEF_A 0.044715
-#define SQRT_2_OVER_PI 0.79788456080286535587989211986876
-
-#ifndef QK_K
-#define QK_K 256
-#endif
-
-#if QK_K == 256
-#define K_SCALE_SIZE 12
-#else
-#define K_SCALE_SIZE 4
-#endif
-
-#define BM 128
-#define BN 128
-#define BK 8
-#define TM 8
-#define TN 8
-
-#define u8BufToU16(buf, idx) (((uint16_t(buf[idx + 1]) << 8)) | buf[idx])
-#define u8BufToFloat16(buf, idx) uint16BitsToHalf u8BufToU16(buf, idx)
-#define u8BufToU32(buf, idx) (((uint32_t u8BufToU16(buf, idx + 2) << 8 | buf[idx + 1]) << 8) | buf[idx])
-#define u8BufToFloat(buf, idx) uintBitsToFloat u8BufToU32(buf, idx)
-
-#define sizeof_block_q4_0 0x12
-#define sizeof_block_q4_1 0x14
-struct block_q4_0 {
-    float16_t d;
-    uint8_t qs[QK4_0 / 2];
-};
-struct block_q4_1 {
-    float16_t d;
-    float16_t m;
-    uint8_t qs[QK4_1 / 2];
-};
-
-#ifndef QK_K
-#define QK_K 256
-#endif
-
-#if QK_K == 256
-#define K_SCALE_SIZE 12
-#else
-#define K_SCALE_SIZE 4
-#endif
-
-struct block_q2_K {
-    uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits
-    uint8_t qs[QK_K/4];      // quants
-    float16_t d;           // super-block scale for quantized scales
-    float16_t dmin;        // super-block scale for quantized mins
-};
-// 84 bytes / block
-
-struct block_q3_K {
-    uint8_t hmask[QK_K/8];     // quants - high bit
-    uint8_t qs[QK_K/4];        // quants - low 2 bits
-#if QK_K == 64
-    uint8_t scales[2];
-#else
-    uint8_t scales[K_SCALE_SIZE]; // scales, quantized with 6 bits
-#endif
-    float16_t d;             // super-block scale
-};
-
-#if QK_K == 64
-typedef struct {
-    float16_t    d[2];          // super-block scales/mins
-    uint8_t scales[2];
-    uint8_t qs[QK_K/2];    // 4-bit quants
-} block_q4_K;
-#else
-struct block_q4_K {
-    float16_t d;             // super-block scale for quantized scales
-    float16_t dmin;          // super-block scale for quantized mins
-    uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
-    uint8_t qs[QK_K/2];        // 4--bit quants
-};
-#endif
-
-#if QK_K == 64
-struct block_q5_K {
-    float16_t  d;                     // super-block scales/mins
-    int8_t  scales[QK_K/16];     // 8-bit block scales
-    uint8_t qh[QK_K/8];          // quants, high bit
-    uint8_t qs[QK_K/2];          // quants, low 4 bits
-};
-#else
-struct block_q5_K {
-    float16_t d;                      // super-block scale for quantized scales
-    float16_t dmin;                   // super-block scale for quantized mins
-    uint8_t scales[3*QK_K/64];   // scales and mins, quantized with 6 bits
-    uint8_t qh[QK_K/8];          // quants, high bit
-    uint8_t qs[QK_K/2];          // quants, low 4 bits
-};
-// 176 bytes / block
-#endif
-
-struct block_q6_K {
-    uint8_t ql[QK_K/2];      // quants, lower 4 bits
-    uint8_t qh[QK_K/4];      // quants, upper 2 bits
-    int8_t  scales[QK_K/16]; // scales, quantized with 8 bits
-    float16_t d;                  // super-block scale
-};
-// 210 bytes / block
+#include "common.comp"
 
 layout(local_size_x = 1) in;
 
diff --git a/kompute/op_getrows_f16.comp b/kompute/op_getrows_f16.comp
index e0f5bb16e..17b478b5e 100644
--- a/kompute/op_getrows_f16.comp
+++ b/kompute/op_getrows_f16.comp
@@ -8,122 +8,7 @@
 
 #version 450
 
-#extension GL_EXT_shader_16bit_storage: require
-#extension GL_EXT_shader_8bit_storage: require
-#extension GL_EXT_shader_explicit_arithmetic_types_float16: require
-#extension GL_EXT_shader_explicit_arithmetic_types_int8: require
-#extension GL_EXT_shader_explicit_arithmetic_types_int16: require
-#extension GL_EXT_control_flow_attributes: enable
-
-#define QK4_0 32
-#define QR4_0 2
-#define QK4_1 32
-
-#define GELU_COEF_A 0.044715
-#define SQRT_2_OVER_PI 0.79788456080286535587989211986876
-
-#ifndef QK_K
-#define QK_K 256
-#endif
-
-#if QK_K == 256
-#define K_SCALE_SIZE 12
-#else
-#define K_SCALE_SIZE 4
-#endif
-
-#define BM 128
-#define BN 128
-#define BK 8
-#define TM 8
-#define TN 8
-
-#define u8BufToU16(buf, idx) (((uint16_t(buf[idx + 1]) << 8)) | buf[idx])
-#define u8BufToFloat16(buf, idx) uint16BitsToHalf u8BufToU16(buf, idx)
-#define u8BufToU32(buf, idx) (((uint32_t u8BufToU16(buf, idx + 2) << 8 | buf[idx + 1]) << 8) | buf[idx])
-#define u8BufToFloat(buf, idx) uintBitsToFloat u8BufToU32(buf, idx)
-
-#define sizeof_block_q4_0 0x12
-#define sizeof_block_q4_1 0x14
-struct block_q4_0 {
-    float16_t d;
-    uint8_t qs[QK4_0 / 2];
-};
-struct block_q4_1 {
-    float16_t d;
-    float16_t m;
-    uint8_t qs[QK4_1 / 2];
-};
-
-#ifndef QK_K
-#define QK_K 256
-#endif
-
-#if QK_K == 256
-#define K_SCALE_SIZE 12
-#else
-#define K_SCALE_SIZE 4
-#endif
-
-struct block_q2_K {
-    uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits
-    uint8_t qs[QK_K/4];      // quants
-    float16_t d;           // super-block scale for quantized scales
-    float16_t dmin;        // super-block scale for quantized mins
-};
-// 84 bytes / block
-
-struct block_q3_K {
-    uint8_t hmask[QK_K/8];     // quants - high bit
-    uint8_t qs[QK_K/4];        // quants - low 2 bits
-#if QK_K == 64
-    uint8_t scales[2];
-#else
-    uint8_t scales[K_SCALE_SIZE]; // scales, quantized with 6 bits
-#endif
-    float16_t d;             // super-block scale
-};
-
-#if QK_K == 64
-typedef struct {
-    float16_t    d[2];          // super-block scales/mins
-    uint8_t scales[2];
-    uint8_t qs[QK_K/2];    // 4-bit quants
-} block_q4_K;
-#else
-struct block_q4_K {
-    float16_t d;             // super-block scale for quantized scales
-    float16_t dmin;          // super-block scale for quantized mins
-    uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
-    uint8_t qs[QK_K/2];        // 4--bit quants
-};
-#endif
-
-#if QK_K == 64
-struct block_q5_K {
-    float16_t  d;                     // super-block scales/mins
-    int8_t  scales[QK_K/16];     // 8-bit block scales
-    uint8_t qh[QK_K/8];          // quants, high bit
-    uint8_t qs[QK_K/2];          // quants, low 4 bits
-};
-#else
-struct block_q5_K {
-    float16_t d;                      // super-block scale for quantized scales
-    float16_t dmin;                   // super-block scale for quantized mins
-    uint8_t scales[3*QK_K/64];   // scales and mins, quantized with 6 bits
-    uint8_t qh[QK_K/8];          // quants, high bit
-    uint8_t qs[QK_K/2];          // quants, low 4 bits
-};
-// 176 bytes / block
-#endif
-
-struct block_q6_K {
-    uint8_t ql[QK_K/2];      // quants, lower 4 bits
-    uint8_t qh[QK_K/4];      // quants, upper 2 bits
-    int8_t  scales[QK_K/16]; // scales, quantized with 8 bits
-    float16_t d;                  // super-block scale
-};
-// 210 bytes / block
+#include "common.comp"
 
 layout(local_size_x = 1) in;
 
diff --git a/kompute/op_getrows_q4_0.comp b/kompute/op_getrows_q4_0.comp
index cddba929b..590f218e6 100644
--- a/kompute/op_getrows_q4_0.comp
+++ b/kompute/op_getrows_q4_0.comp
@@ -8,122 +8,7 @@
 
 #version 450
 
-#extension GL_EXT_shader_16bit_storage: require
-#extension GL_EXT_shader_8bit_storage: require
-#extension GL_EXT_shader_explicit_arithmetic_types_float16: require
-#extension GL_EXT_shader_explicit_arithmetic_types_int8: require
-#extension GL_EXT_shader_explicit_arithmetic_types_int16: require
-#extension GL_EXT_control_flow_attributes: enable
-
-#define QK4_0 32
-#define QR4_0 2
-#define QK4_1 32
-
-#define GELU_COEF_A 0.044715
-#define SQRT_2_OVER_PI 0.79788456080286535587989211986876
-
-#ifndef QK_K
-#define QK_K 256
-#endif
-
-#if QK_K == 256
-#define K_SCALE_SIZE 12
-#else
-#define K_SCALE_SIZE 4
-#endif
-
-#define BM 128
-#define BN 128
-#define BK 8
-#define TM 8
-#define TN 8
-
-#define u8BufToU16(buf, idx) (((uint16_t(buf[idx + 1]) << 8)) | buf[idx])
-#define u8BufToFloat16(buf, idx) uint16BitsToHalf u8BufToU16(buf, idx)
-#define u8BufToU32(buf, idx) (((uint32_t u8BufToU16(buf, idx + 2) << 8 | buf[idx + 1]) << 8) | buf[idx])
-#define u8BufToFloat(buf, idx) uintBitsToFloat u8BufToU32(buf, idx)
-
-#define sizeof_block_q4_0 0x12
-#define sizeof_block_q4_1 0x14
-struct block_q4_0 {
-    float16_t d;
-    uint8_t qs[QK4_0 / 2];
-};
-struct block_q4_1 {
-    float16_t d;
-    float16_t m;
-    uint8_t qs[QK4_1 / 2];
-};
-
-#ifndef QK_K
-#define QK_K 256
-#endif
-
-#if QK_K == 256
-#define K_SCALE_SIZE 12
-#else
-#define K_SCALE_SIZE 4
-#endif
-
-struct block_q2_K {
-    uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits
-    uint8_t qs[QK_K/4];      // quants
-    float16_t d;           // super-block scale for quantized scales
-    float16_t dmin;        // super-block scale for quantized mins
-};
-// 84 bytes / block
-
-struct block_q3_K {
-    uint8_t hmask[QK_K/8];     // quants - high bit
-    uint8_t qs[QK_K/4];        // quants - low 2 bits
-#if QK_K == 64
-    uint8_t scales[2];
-#else
-    uint8_t scales[K_SCALE_SIZE]; // scales, quantized with 6 bits
-#endif
-    float16_t d;             // super-block scale
-};
-
-#if QK_K == 64
-typedef struct {
-    float16_t    d[2];          // super-block scales/mins
-    uint8_t scales[2];
-    uint8_t qs[QK_K/2];    // 4-bit quants
-} block_q4_K;
-#else
-struct block_q4_K {
-    float16_t d;             // super-block scale for quantized scales
-    float16_t dmin;          // super-block scale for quantized mins
-    uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
-    uint8_t qs[QK_K/2];        // 4--bit quants
-};
-#endif
-
-#if QK_K == 64
-struct block_q5_K {
-    float16_t  d;                     // super-block scales/mins
-    int8_t  scales[QK_K/16];     // 8-bit block scales
-    uint8_t qh[QK_K/8];          // quants, high bit
-    uint8_t qs[QK_K/2];          // quants, low 4 bits
-};
-#else
-struct block_q5_K {
-    float16_t d;                      // super-block scale for quantized scales
-    float16_t dmin;                   // super-block scale for quantized mins
-    uint8_t scales[3*QK_K/64];   // scales and mins, quantized with 6 bits
-    uint8_t qh[QK_K/8];          // quants, high bit
-    uint8_t qs[QK_K/2];          // quants, low 4 bits
-};
-// 176 bytes / block
-#endif
-
-struct block_q6_K {
-    uint8_t ql[QK_K/2];      // quants, lower 4 bits
-    uint8_t qh[QK_K/4];      // quants, upper 2 bits
-    int8_t  scales[QK_K/16]; // scales, quantized with 8 bits
-    float16_t d;                  // super-block scale
-};
-// 210 bytes / block
+#include "common.comp"
 
 layout(local_size_x = 1) in;
 
diff --git a/kompute/op_getrows_q4_1.comp b/kompute/op_getrows_q4_1.comp
index 151848a9d..44718c6af 100644
--- a/kompute/op_getrows_q4_1.comp
+++ b/kompute/op_getrows_q4_1.comp
@@ -8,122 +8,7 @@
 
 #version 450
 
-#extension GL_EXT_shader_16bit_storage: require
-#extension GL_EXT_shader_8bit_storage: require
-#extension GL_EXT_shader_explicit_arithmetic_types_float16: require
-#extension GL_EXT_shader_explicit_arithmetic_types_int8: require
-#extension GL_EXT_shader_explicit_arithmetic_types_int16: require
-#extension GL_EXT_control_flow_attributes: enable
-
-#define QK4_0 32
-#define QR4_0 2
-#define QK4_1 32
-
-#define GELU_COEF_A 0.044715
-#define SQRT_2_OVER_PI 0.79788456080286535587989211986876
-
-#ifndef QK_K
-#define QK_K 256
-#endif
-
-#if QK_K == 256
-#define K_SCALE_SIZE 12
-#else
-#define K_SCALE_SIZE 4
-#endif
-
-#define BM 128
-#define BN 128
-#define BK 8
-#define TM 8
-#define TN 8
-
-#define u8BufToU16(buf, idx) (((uint16_t(buf[idx + 1]) << 8)) | buf[idx])
-#define u8BufToFloat16(buf, idx) uint16BitsToHalf u8BufToU16(buf, idx)
-#define u8BufToU32(buf, idx) (((uint32_t u8BufToU16(buf, idx + 2) << 8 | buf[idx + 1]) << 8) | buf[idx])
-#define u8BufToFloat(buf, idx) uintBitsToFloat u8BufToU32(buf, idx)
-
-#define sizeof_block_q4_0 0x12
-#define sizeof_block_q4_1 0x14
-struct block_q4_0 {
-    float16_t d;
-    uint8_t qs[QK4_0 / 2];
-};
-struct block_q4_1 {
-    float16_t d;
-    float16_t m;
-    uint8_t qs[QK4_1 / 2];
-};
-
-#ifndef QK_K
-#define QK_K 256
-#endif
-
-#if QK_K == 256
-#define K_SCALE_SIZE 12
-#else
-#define K_SCALE_SIZE 4
-#endif
-
-struct block_q2_K {
-    uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits
-    uint8_t qs[QK_K/4];      // quants
-    float16_t d;           // super-block scale for quantized scales
-    float16_t dmin;        // super-block scale for quantized mins
-};
-// 84 bytes / block
-
-struct block_q3_K {
-    uint8_t hmask[QK_K/8];     // quants - high bit
-    uint8_t qs[QK_K/4];        // quants - low 2 bits
-#if QK_K == 64
-    uint8_t scales[2];
-#else
-    uint8_t scales[K_SCALE_SIZE]; // scales, quantized with 6 bits
-#endif
-    float16_t d;             // super-block scale
-};
-
-#if QK_K == 64
-typedef struct {
-    float16_t    d[2];          // super-block scales/mins
-    uint8_t scales[2];
-    uint8_t qs[QK_K/2];    // 4-bit quants
-} block_q4_K;
-#else
-struct block_q4_K {
-    float16_t d;             // super-block scale for quantized scales
-    float16_t dmin;          // super-block scale for quantized mins
-    uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
-    uint8_t qs[QK_K/2];        // 4--bit quants
-};
-#endif
-
-#if QK_K == 64
-struct block_q5_K {
-    float16_t  d;                     // super-block scales/mins
-    int8_t  scales[QK_K/16];     // 8-bit block scales
-    uint8_t qh[QK_K/8];          // quants, high bit
-    uint8_t qs[QK_K/2];          // quants, low 4 bits
-};
-#else
-struct block_q5_K {
-    float16_t d;                      // super-block scale for quantized scales
-    float16_t dmin;                   // super-block scale for quantized mins
-    uint8_t scales[3*QK_K/64];   // scales and mins, quantized with 6 bits
-    uint8_t qh[QK_K/8];          // quants, high bit
-    uint8_t qs[QK_K/2];          // quants, low 4 bits
-};
-// 176 bytes / block
-#endif
-
-struct block_q6_K {
-    uint8_t ql[QK_K/2];      // quants, lower 4 bits
-    uint8_t qh[QK_K/4];      // quants, upper 2 bits
-    int8_t  scales[QK_K/16]; // scales, quantized with 8 bits
-    float16_t d;                  // super-block scale
-};
-// 210 bytes / block
+#include "common.comp"
 
 layout(local_size_x = 1) in;
 
diff --git a/kompute/op_mul.comp b/kompute/op_mul.comp
index 4907015d8..348eae7b3 100644
--- a/kompute/op_mul.comp
+++ b/kompute/op_mul.comp
@@ -8,122 +8,7 @@
 
 #version 450
 
-#extension GL_EXT_shader_16bit_storage: require
-#extension GL_EXT_shader_8bit_storage: require
-#extension GL_EXT_shader_explicit_arithmetic_types_float16: require
-#extension GL_EXT_shader_explicit_arithmetic_types_int8: require
-#extension GL_EXT_shader_explicit_arithmetic_types_int16: require
-#extension GL_EXT_control_flow_attributes: enable
-
-#define QK4_0 32
-#define QR4_0 2
-#define QK4_1 32
-
-#define GELU_COEF_A 0.044715
-#define SQRT_2_OVER_PI 0.79788456080286535587989211986876
-
-#ifndef QK_K
-#define QK_K 256
-#endif
-
-#if QK_K == 256
-#define K_SCALE_SIZE 12
-#else
-#define K_SCALE_SIZE 4
-#endif
-
-#define BM 128
-#define BN 128
-#define BK 8
-#define TM 8
-#define TN 8
-
-#define u8BufToU16(buf, idx) (((uint16_t(buf[idx + 1]) << 8)) | buf[idx])
-#define u8BufToFloat16(buf, idx) uint16BitsToHalf u8BufToU16(buf, idx)
-#define u8BufToU32(buf, idx) (((uint32_t u8BufToU16(buf, idx + 2) << 8 | buf[idx + 1]) << 8) | buf[idx])
-#define u8BufToFloat(buf, idx) uintBitsToFloat u8BufToU32(buf, idx)
-
-#define sizeof_block_q4_0 0x12
-#define sizeof_block_q4_1 0x14
-struct block_q4_0 {
-    float16_t d;
-    uint8_t qs[QK4_0 / 2];
-};
-struct block_q4_1 {
-    float16_t d;
-    float16_t m;
-    uint8_t qs[QK4_1 / 2];
-};
-
-#ifndef QK_K
-#define QK_K 256
-#endif
-
-#if QK_K == 256
-#define K_SCALE_SIZE 12
-#else
-#define K_SCALE_SIZE 4
-#endif
-
-struct block_q2_K {
-    uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits
-    uint8_t qs[QK_K/4];      // quants
-    float16_t d;           // super-block scale for quantized scales
-    float16_t dmin;        // super-block scale for quantized mins
-};
-// 84 bytes / block
-
-struct block_q3_K {
-    uint8_t hmask[QK_K/8];     // quants - high bit
-    uint8_t qs[QK_K/4];        // quants - low 2 bits
-#if QK_K == 64
-    uint8_t scales[2];
-#else
-    uint8_t scales[K_SCALE_SIZE]; // scales, quantized with 6 bits
-#endif
-    float16_t d;             // super-block scale
-};
-
-#if QK_K == 64
-typedef struct {
-    float16_t    d[2];          // super-block scales/mins
-    uint8_t scales[2];
-    uint8_t qs[QK_K/2];    // 4-bit quants
-} block_q4_K;
-#else
-struct block_q4_K {
-    float16_t d;             // super-block scale for quantized scales
-    float16_t dmin;          // super-block scale for quantized mins
-    uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
-    uint8_t qs[QK_K/2];        // 4--bit quants
-};
-#endif
-
-#if QK_K == 64
-struct block_q5_K {
-    float16_t  d;                     // super-block scales/mins
-    int8_t  scales[QK_K/16];     // 8-bit block scales
-    uint8_t qh[QK_K/8];          // quants, high bit
-    uint8_t qs[QK_K/2];          // quants, low 4 bits
-};
-#else
-struct block_q5_K {
-    float16_t d;                      // super-block scale for quantized scales
-    float16_t dmin;                   // super-block scale for quantized mins
-    uint8_t scales[3*QK_K/64];   // scales and mins, quantized with 6 bits
-    uint8_t qh[QK_K/8];          // quants, high bit
-    uint8_t qs[QK_K/2];          // quants, low 4 bits
-};
-// 176 bytes / block
-#endif
-
-struct block_q6_K {
-    uint8_t ql[QK_K/2];      // quants, lower 4 bits
-    uint8_t qh[QK_K/4];      // quants, upper 2 bits
-    int8_t  scales[QK_K/16]; // scales, quantized with 8 bits
-    float16_t d;                  // super-block scale
-};
-// 210 bytes / block
+#include "common.comp"
 
 layout(local_size_x = 1) in;
 
diff --git a/kompute/op_mul_mat_f16.comp b/kompute/op_mul_mat_f16.comp
index f1198b593..1390c00cf 100644
--- a/kompute/op_mul_mat_f16.comp
+++ b/kompute/op_mul_mat_f16.comp
@@ -8,122 +8,7 @@
 
 #version 450
 
-#extension GL_EXT_shader_16bit_storage: require
-#extension GL_EXT_shader_8bit_storage: require
-#extension GL_EXT_shader_explicit_arithmetic_types_float16: require
-#extension GL_EXT_shader_explicit_arithmetic_types_int8: require
-#extension GL_EXT_shader_explicit_arithmetic_types_int16: require
-#extension GL_EXT_control_flow_attributes: enable
-
-#define QK4_0 32
-#define QR4_0 2
-#define QK4_1 32
-
-#define GELU_COEF_A 0.044715
-#define SQRT_2_OVER_PI 0.79788456080286535587989211986876
-
-#ifndef QK_K
-#define QK_K 256
-#endif
-
-#if QK_K == 256
-#define K_SCALE_SIZE 12
-#else
-#define K_SCALE_SIZE 4
-#endif
-
-#define BM 128
-#define BN 128
-#define BK 8
-#define TM 8
-#define TN 8
-
-#define u8BufToU16(buf, idx) (((uint16_t(buf[idx + 1]) << 8)) | buf[idx])
-#define u8BufToFloat16(buf, idx) uint16BitsToHalf u8BufToU16(buf, idx)
-#define u8BufToU32(buf, idx) (((uint32_t u8BufToU16(buf, idx + 2) << 8 | buf[idx + 1]) << 8) | buf[idx])
-#define u8BufToFloat(buf, idx) uintBitsToFloat u8BufToU32(buf, idx)
-
-#define sizeof_block_q4_0 0x12
-#define sizeof_block_q4_1 0x14
-struct block_q4_0 {
-    float16_t d;
-    uint8_t qs[QK4_0 / 2];
-};
-struct block_q4_1 {
-    float16_t d;
-    float16_t m;
-    uint8_t qs[QK4_1 / 2];
-};
-
-#ifndef QK_K
-#define QK_K 256
-#endif
-
-#if QK_K == 256
-#define K_SCALE_SIZE 12
-#else
-#define K_SCALE_SIZE 4
-#endif
-
-struct block_q2_K {
-    uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits
-    uint8_t qs[QK_K/4];      // quants
-    float16_t d;           // super-block scale for quantized scales
-    float16_t dmin;        // super-block scale for quantized mins
-};
-// 84 bytes / block
-
-struct block_q3_K {
-    uint8_t hmask[QK_K/8];     // quants - high bit
-    uint8_t qs[QK_K/4];        // quants - low 2 bits
-#if QK_K == 64
-    uint8_t scales[2];
-#else
-    uint8_t scales[K_SCALE_SIZE]; // scales, quantized with 6 bits
-#endif
-    float16_t d;             // super-block scale
-};
-
-#if QK_K == 64
-typedef struct {
-    float16_t    d[2];          // super-block scales/mins
-    uint8_t scales[2];
-    uint8_t qs[QK_K/2];    // 4-bit quants
-} block_q4_K;
-#else
-struct block_q4_K {
-    float16_t d;             // super-block scale for quantized scales
-    float16_t dmin;          // super-block scale for quantized mins
-    uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
-    uint8_t qs[QK_K/2];        // 4--bit quants
-};
-#endif
-
-#if QK_K == 64
-struct block_q5_K {
-    float16_t  d;                     // super-block scales/mins
-    int8_t  scales[QK_K/16];     // 8-bit block scales
-    uint8_t qh[QK_K/8];          // quants, high bit
-    uint8_t qs[QK_K/2];          // quants, low 4 bits
-};
-#else
-struct block_q5_K {
-    float16_t d;                      // super-block scale for quantized scales
-    float16_t dmin;                   // super-block scale for quantized mins
-    uint8_t scales[3*QK_K/64];   // scales and mins, quantized with 6 bits
-    uint8_t qh[QK_K/8];          // quants, high bit
-    uint8_t qs[QK_K/2];          // quants, low 4 bits
-};
-// 176 bytes / block
-#endif
-
-struct block_q6_K {
-    uint8_t ql[QK_K/2];      // quants, lower 4 bits
-    uint8_t qh[QK_K/4];      // quants, upper 2 bits
-    int8_t  scales[QK_K/16]; // scales, quantized with 8 bits
-    float16_t d;                  // super-block scale
-};
-// 210 bytes / block
+#include "common.comp"
 
 layout(local_size_x = 64) in;
 
diff --git a/kompute/op_mul_mat_q4_0.comp b/kompute/op_mul_mat_q4_0.comp
index 206aea7d5..9b6dd72dc 100644
--- a/kompute/op_mul_mat_q4_0.comp
+++ b/kompute/op_mul_mat_q4_0.comp
@@ -8,122 +8,7 @@
 
 #version 450
 
-#extension GL_EXT_shader_16bit_storage: require
-#extension GL_EXT_shader_8bit_storage: require
-#extension GL_EXT_shader_explicit_arithmetic_types_float16: require
-#extension GL_EXT_shader_explicit_arithmetic_types_int8: require
-#extension GL_EXT_shader_explicit_arithmetic_types_int16: require
-#extension GL_EXT_control_flow_attributes: enable
-
-#define QK4_0 32
-#define QR4_0 2
-#define QK4_1 32
-
-#define GELU_COEF_A 0.044715
-#define SQRT_2_OVER_PI 0.79788456080286535587989211986876
-
-#ifndef QK_K
-#define QK_K 256
-#endif
-
-#if QK_K == 256
-#define K_SCALE_SIZE 12
-#else
-#define K_SCALE_SIZE 4
-#endif
-
-#define BM 128
-#define BN 128
-#define BK 8
-#define TM 8
-#define TN 8
-
-#define u8BufToU16(buf, idx) (((uint16_t(buf[idx + 1]) << 8)) | buf[idx])
-#define u8BufToFloat16(buf, idx) uint16BitsToHalf u8BufToU16(buf, idx)
-#define u8BufToU32(buf, idx) (((uint32_t u8BufToU16(buf, idx + 2) << 8 | buf[idx + 1]) << 8) | buf[idx])
-#define u8BufToFloat(buf, idx) uintBitsToFloat u8BufToU32(buf, idx)
-
-#define sizeof_block_q4_0 0x12
-#define sizeof_block_q4_1 0x14
-struct block_q4_0 {
-    float16_t d;
-    uint8_t qs[QK4_0 / 2];
-};
-struct block_q4_1 {
-    float16_t d;
-    float16_t m;
-    uint8_t qs[QK4_1 / 2];
-};
-
-#ifndef QK_K
-#define QK_K 256
-#endif
-
-#if QK_K == 256
-#define K_SCALE_SIZE 12
-#else
-#define K_SCALE_SIZE 4
-#endif
-
-struct block_q2_K {
-    uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits
-    uint8_t qs[QK_K/4];      // quants
-    float16_t d;           // super-block scale for quantized scales
-    float16_t dmin;        // super-block scale for quantized mins
-};
-// 84 bytes / block
-
-struct block_q3_K {
-    uint8_t hmask[QK_K/8];     // quants - high bit
-    uint8_t qs[QK_K/4];        // quants - low 2 bits
-#if QK_K == 64
-    uint8_t scales[2];
-#else
-    uint8_t scales[K_SCALE_SIZE]; // scales, quantized with 6 bits
-#endif
-    float16_t d;             // super-block scale
-};
-
-#if QK_K == 64
-typedef struct {
-    float16_t    d[2];          // super-block scales/mins
-    uint8_t scales[2];
-    uint8_t qs[QK_K/2];    // 4-bit quants
-} block_q4_K;
-#else
-struct block_q4_K {
-    float16_t d;             // super-block scale for quantized scales
-    float16_t dmin;          // super-block scale for quantized mins
-    uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
-    uint8_t qs[QK_K/2];        // 4--bit quants
-};
-#endif
-
-#if QK_K == 64
-struct block_q5_K {
-    float16_t  d;                     // super-block scales/mins
-    int8_t  scales[QK_K/16];     // 8-bit block scales
-    uint8_t qh[QK_K/8];          // quants, high bit
-    uint8_t qs[QK_K/2];          // quants, low 4 bits
-};
-#else
-struct block_q5_K {
-    float16_t d;                      // super-block scale for quantized scales
-    float16_t dmin;                   // super-block scale for quantized mins
-    uint8_t scales[3*QK_K/64];   // scales and mins, quantized with 6 bits
-    uint8_t qh[QK_K/8];          // quants, high bit
-    uint8_t qs[QK_K/2];          // quants, low 4 bits
-};
-// 176 bytes / block
-#endif
-
-struct block_q6_K {
-    uint8_t ql[QK_K/2];      // quants, lower 4 bits
-    uint8_t qh[QK_K/4];      // quants, upper 2 bits
-    int8_t  scales[QK_K/16]; // scales, quantized with 8 bits
-    float16_t d;                  // super-block scale
-};
-// 210 bytes / block
+#include "common.comp"
 
 layout(local_size_x = 8, local_size_y = 8) in;
 
diff --git a/kompute/op_mul_mat_q4_1.comp b/kompute/op_mul_mat_q4_1.comp
index 8bdf810a1..fb7b051b8 100644
--- a/kompute/op_mul_mat_q4_1.comp
+++ b/kompute/op_mul_mat_q4_1.comp
@@ -8,122 +8,7 @@
 
 #version 450
 
-#extension GL_EXT_shader_16bit_storage: require
-#extension GL_EXT_shader_8bit_storage: require
-#extension GL_EXT_shader_explicit_arithmetic_types_float16: require
-#extension GL_EXT_shader_explicit_arithmetic_types_int8: require
-#extension GL_EXT_shader_explicit_arithmetic_types_int16: require
-#extension GL_EXT_control_flow_attributes: enable
-
-#define QK4_0 32
-#define QR4_0 2
-#define QK4_1 32
-
-#define GELU_COEF_A 0.044715
-#define SQRT_2_OVER_PI 0.79788456080286535587989211986876
-
-#ifndef QK_K
-#define QK_K 256
-#endif
-
-#if QK_K == 256
-#define K_SCALE_SIZE 12
-#else
-#define K_SCALE_SIZE 4
-#endif
-
-#define BM 128
-#define BN 128
-#define BK 8
-#define TM 8
-#define TN 8
-
-#define u8BufToU16(buf, idx) (((uint16_t(buf[idx + 1]) << 8)) | buf[idx])
-#define u8BufToFloat16(buf, idx) uint16BitsToHalf u8BufToU16(buf, idx)
-#define u8BufToU32(buf, idx) (((uint32_t u8BufToU16(buf, idx + 2) << 8 | buf[idx + 1]) << 8) | buf[idx])
-#define u8BufToFloat(buf, idx) uintBitsToFloat u8BufToU32(buf, idx)
-
-#define sizeof_block_q4_0 0x12
-#define sizeof_block_q4_1 0x14
-struct block_q4_0 {
-    float16_t d;
-    uint8_t qs[QK4_0 / 2];
-};
-struct block_q4_1 {
-    float16_t d;
-    float16_t m;
-    uint8_t qs[QK4_1 / 2];
-};
-
-#ifndef QK_K
-#define QK_K 256
-#endif
-
-#if QK_K == 256
-#define K_SCALE_SIZE 12
-#else
-#define K_SCALE_SIZE 4
-#endif
-
-struct block_q2_K {
-    uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits
-    uint8_t qs[QK_K/4];      // quants
-    float16_t d;           // super-block scale for quantized scales
-    float16_t dmin;        // super-block scale for quantized mins
-};
-// 84 bytes / block
-
-struct block_q3_K {
-    uint8_t hmask[QK_K/8];     // quants - high bit
-    uint8_t qs[QK_K/4];        // quants - low 2 bits
-#if QK_K == 64
-    uint8_t scales[2];
-#else
-    uint8_t scales[K_SCALE_SIZE]; // scales, quantized with 6 bits
-#endif
-    float16_t d;             // super-block scale
-};
-
-#if QK_K == 64
-typedef struct {
-    float16_t    d[2];          // super-block scales/mins
-    uint8_t scales[2];
-    uint8_t qs[QK_K/2];    // 4-bit quants
-} block_q4_K;
-#else
-struct block_q4_K {
-    float16_t d;             // super-block scale for quantized scales
-    float16_t dmin;          // super-block scale for quantized mins
-    uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
-    uint8_t qs[QK_K/2];        // 4--bit quants
-};
-#endif
-
-#if QK_K == 64
-struct block_q5_K {
-    float16_t  d;                     // super-block scales/mins
-    int8_t  scales[QK_K/16];     // 8-bit block scales
-    uint8_t qh[QK_K/8];          // quants, high bit
-    uint8_t qs[QK_K/2];          // quants, low 4 bits
-};
-#else
-struct block_q5_K {
-    float16_t d;                      // super-block scale for quantized scales
-    float16_t dmin;                   // super-block scale for quantized mins
-    uint8_t scales[3*QK_K/64];   // scales and mins, quantized with 6 bits
-    uint8_t qh[QK_K/8];          // quants, high bit
-    uint8_t qs[QK_K/2];          // quants, low 4 bits
-};
-// 176 bytes / block
-#endif
-
-struct block_q6_K {
-    uint8_t ql[QK_K/2];      // quants, lower 4 bits
-    uint8_t qh[QK_K/4];      // quants, upper 2 bits
-    int8_t  scales[QK_K/16]; // scales, quantized with 8 bits
-    float16_t d;                  // super-block scale
-};
-// 210 bytes / block
+#include "common.comp"
 
 layout(local_size_x = 8, local_size_y = 8) in;
 
diff --git a/kompute/op_mulrow.comp b/kompute/op_mulrow.comp
index 3defd0a5f..498dbdfcd 100644
--- a/kompute/op_mulrow.comp
+++ b/kompute/op_mulrow.comp
@@ -8,122 +8,7 @@
 
 #version 450
 
-#extension GL_EXT_shader_16bit_storage: require
-#extension GL_EXT_shader_8bit_storage: require
-#extension GL_EXT_shader_explicit_arithmetic_types_float16: require
-#extension GL_EXT_shader_explicit_arithmetic_types_int8: require
-#extension GL_EXT_shader_explicit_arithmetic_types_int16: require
-#extension GL_EXT_control_flow_attributes: enable
-
-#define QK4_0 32
-#define QR4_0 2
-#define QK4_1 32
-
-#define GELU_COEF_A 0.044715
-#define SQRT_2_OVER_PI 0.79788456080286535587989211986876
-
-#ifndef QK_K
-#define QK_K 256
-#endif
-
-#if QK_K == 256
-#define K_SCALE_SIZE 12
-#else
-#define K_SCALE_SIZE 4
-#endif
-
-#define BM 128
-#define BN 128
-#define BK 8
-#define TM 8
-#define TN 8
-
-#define u8BufToU16(buf, idx) (((uint16_t(buf[idx + 1]) << 8)) | buf[idx])
-#define u8BufToFloat16(buf, idx) uint16BitsToHalf u8BufToU16(buf, idx)
-#define u8BufToU32(buf, idx) (((uint32_t u8BufToU16(buf, idx + 2) << 8 | buf[idx + 1]) << 8) | buf[idx])
-#define u8BufToFloat(buf, idx) uintBitsToFloat u8BufToU32(buf, idx)
-
-#define sizeof_block_q4_0 0x12
-#define sizeof_block_q4_1 0x14
-struct block_q4_0 {
-    float16_t d;
-    uint8_t qs[QK4_0 / 2];
-};
-struct block_q4_1 {
-    float16_t d;
-    float16_t m;
-    uint8_t qs[QK4_1 / 2];
-};
-
-#ifndef QK_K
-#define QK_K 256
-#endif
-
-#if QK_K == 256
-#define K_SCALE_SIZE 12
-#else
-#define K_SCALE_SIZE 4
-#endif
-
-struct block_q2_K {
-    uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits
-    uint8_t qs[QK_K/4];      // quants
-    float16_t d;           // super-block scale for quantized scales
-    float16_t dmin;        // super-block scale for quantized mins
-};
-// 84 bytes / block
-
-struct block_q3_K {
-    uint8_t hmask[QK_K/8];     // quants - high bit
-    uint8_t qs[QK_K/4];        // quants - low 2 bits
-#if QK_K == 64
-    uint8_t scales[2];
-#else
-    uint8_t scales[K_SCALE_SIZE]; // scales, quantized with 6 bits
-#endif
-    float16_t d;             // super-block scale
-};
-
-#if QK_K == 64
-typedef struct {
-    float16_t    d[2];          // super-block scales/mins
-    uint8_t scales[2];
-    uint8_t qs[QK_K/2];    // 4-bit quants
-} block_q4_K;
-#else
-struct block_q4_K {
-    float16_t d;             // super-block scale for quantized scales
-    float16_t dmin;          // super-block scale for quantized mins
-    uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
-    uint8_t qs[QK_K/2];        // 4--bit quants
-};
-#endif
-
-#if QK_K == 64
-struct block_q5_K {
-    float16_t  d;                     // super-block scales/mins
-    int8_t  scales[QK_K/16];     // 8-bit block scales
-    uint8_t qh[QK_K/8];          // quants, high bit
-    uint8_t qs[QK_K/2];          // quants, low 4 bits
-};
-#else
-struct block_q5_K {
-    float16_t d;                      // super-block scale for quantized scales
-    float16_t dmin;                   // super-block scale for quantized mins
-    uint8_t scales[3*QK_K/64];   // scales and mins, quantized with 6 bits
-    uint8_t qh[QK_K/8];          // quants, high bit
-    uint8_t qs[QK_K/2];          // quants, low 4 bits
-};
-// 176 bytes / block
-#endif
-
-struct block_q6_K {
-    uint8_t ql[QK_K/2];      // quants, lower 4 bits
-    uint8_t qh[QK_K/4];      // quants, upper 2 bits
-    int8_t  scales[QK_K/16]; // scales, quantized with 8 bits
-    float16_t d;                  // super-block scale
-};
-// 210 bytes / block
+#include "common.comp"
 
 layout(local_size_x = 1) in;
 
diff --git a/kompute/op_norm.comp b/kompute/op_norm.comp
index ec0a8568d..4b2db25e3 100644
--- a/kompute/op_norm.comp
+++ b/kompute/op_norm.comp
@@ -8,122 +8,7 @@
 
 #version 450
 
-#extension GL_EXT_shader_16bit_storage: require
-#extension GL_EXT_shader_8bit_storage: require
-#extension GL_EXT_shader_explicit_arithmetic_types_float16: require
-#extension GL_EXT_shader_explicit_arithmetic_types_int8: require
-#extension GL_EXT_shader_explicit_arithmetic_types_int16: require
-#extension GL_EXT_control_flow_attributes: enable
-
-#define QK4_0 32
-#define QR4_0 2
-#define QK4_1 32
-
-#define GELU_COEF_A 0.044715
-#define SQRT_2_OVER_PI 0.79788456080286535587989211986876
-
-#ifndef QK_K
-#define QK_K 256
-#endif
-
-#if QK_K == 256
-#define K_SCALE_SIZE 12
-#else
-#define K_SCALE_SIZE 4
-#endif
-
-#define BM 128
-#define BN 128
-#define BK 8
-#define TM 8
-#define TN 8
-
-#define u8BufToU16(buf, idx) (((uint16_t(buf[idx + 1]) << 8)) | buf[idx])
-#define u8BufToFloat16(buf, idx) uint16BitsToHalf u8BufToU16(buf, idx)
-#define u8BufToU32(buf, idx) (((uint32_t u8BufToU16(buf, idx + 2) << 8 | buf[idx + 1]) << 8) | buf[idx])
-#define u8BufToFloat(buf, idx) uintBitsToFloat u8BufToU32(buf, idx)
-
-#define sizeof_block_q4_0 0x12
-#define sizeof_block_q4_1 0x14
-struct block_q4_0 {
-    float16_t d;
-    uint8_t qs[QK4_0 / 2];
-};
-struct block_q4_1 {
-    float16_t d;
-    float16_t m;
-    uint8_t qs[QK4_1 / 2];
-};
-
-#ifndef QK_K
-#define QK_K 256
-#endif
-
-#if QK_K == 256
-#define K_SCALE_SIZE 12
-#else
-#define K_SCALE_SIZE 4
-#endif
-
-struct block_q2_K {
-    uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits
-    uint8_t qs[QK_K/4];      // quants
-    float16_t d;           // super-block scale for quantized scales
-    float16_t dmin;        // super-block scale for quantized mins
-};
-// 84 bytes / block
-
-struct block_q3_K {
-    uint8_t hmask[QK_K/8];     // quants - high bit
-    uint8_t qs[QK_K/4];        // quants - low 2 bits
-#if QK_K == 64
-    uint8_t scales[2];
-#else
-    uint8_t scales[K_SCALE_SIZE]; // scales, quantized with 6 bits
-#endif
-    float16_t d;             // super-block scale
-};
-
-#if QK_K == 64
-typedef struct {
-    float16_t    d[2];          // super-block scales/mins
-    uint8_t scales[2];
-    uint8_t qs[QK_K/2];    // 4-bit quants
-} block_q4_K;
-#else
-struct block_q4_K {
-    float16_t d;             // super-block scale for quantized scales
-    float16_t dmin;          // super-block scale for quantized mins
-    uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
-    uint8_t qs[QK_K/2];        // 4--bit quants
-};
-#endif
-
-#if QK_K == 64
-struct block_q5_K {
-    float16_t  d;                     // super-block scales/mins
-    int8_t  scales[QK_K/16];     // 8-bit block scales
-    uint8_t qh[QK_K/8];          // quants, high bit
-    uint8_t qs[QK_K/2];          // quants, low 4 bits
-};
-#else
-struct block_q5_K {
-    float16_t d;                      // super-block scale for quantized scales
-    float16_t dmin;                   // super-block scale for quantized mins
-    uint8_t scales[3*QK_K/64];   // scales and mins, quantized with 6 bits
-    uint8_t qh[QK_K/8];          // quants, high bit
-    uint8_t qs[QK_K/2];          // quants, low 4 bits
-};
-// 176 bytes / block
-#endif
-
-struct block_q6_K {
-    uint8_t ql[QK_K/2];      // quants, lower 4 bits
-    uint8_t qh[QK_K/4];      // quants, upper 2 bits
-    int8_t  scales[QK_K/16]; // scales, quantized with 8 bits
-    float16_t d;                  // super-block scale
-};
-// 210 bytes / block
+#include "common.comp"
 
 #define nth 256
 
diff --git a/kompute/op_relu.comp b/kompute/op_relu.comp
index bc2c31f43..41f46be96 100644
--- a/kompute/op_relu.comp
+++ b/kompute/op_relu.comp
@@ -8,122 +8,7 @@
 
 #version 450
 
-#extension GL_EXT_shader_16bit_storage: require
-#extension GL_EXT_shader_8bit_storage: require
-#extension GL_EXT_shader_explicit_arithmetic_types_float16: require
-#extension GL_EXT_shader_explicit_arithmetic_types_int8: require
-#extension GL_EXT_shader_explicit_arithmetic_types_int16: require
-#extension GL_EXT_control_flow_attributes: enable
-
-#define QK4_0 32
-#define QR4_0 2
-#define QK4_1 32
-
-#define GELU_COEF_A 0.044715
-#define SQRT_2_OVER_PI 0.79788456080286535587989211986876
-
-#ifndef QK_K
-#define QK_K 256
-#endif
-
-#if QK_K == 256
-#define K_SCALE_SIZE 12
-#else
-#define K_SCALE_SIZE 4
-#endif
-
-#define BM 128
-#define BN 128
-#define BK 8
-#define TM 8
-#define TN 8
-
-#define u8BufToU16(buf, idx) (((uint16_t(buf[idx + 1]) << 8)) | buf[idx])
-#define u8BufToFloat16(buf, idx) uint16BitsToHalf u8BufToU16(buf, idx)
-#define u8BufToU32(buf, idx) (((uint32_t u8BufToU16(buf, idx + 2) << 8 | buf[idx + 1]) << 8) | buf[idx])
-#define u8BufToFloat(buf, idx) uintBitsToFloat u8BufToU32(buf, idx)
-
-#define sizeof_block_q4_0 0x12
-#define sizeof_block_q4_1 0x14
-struct block_q4_0 {
-    float16_t d;
-    uint8_t qs[QK4_0 / 2];
-};
-struct block_q4_1 {
-    float16_t d;
-    float16_t m;
-    uint8_t qs[QK4_1 / 2];
-};
-
-#ifndef QK_K
-#define QK_K 256
-#endif
-
-#if QK_K == 256
-#define K_SCALE_SIZE 12
-#else
-#define K_SCALE_SIZE 4
-#endif
-
-struct block_q2_K {
-    uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits
-    uint8_t qs[QK_K/4];      // quants
-    float16_t d;           // super-block scale for quantized scales
-    float16_t dmin;        // super-block scale for quantized mins
-};
-// 84 bytes / block
-
-struct block_q3_K {
-    uint8_t hmask[QK_K/8];     // quants - high bit
-    uint8_t qs[QK_K/4];        // quants - low 2 bits
-#if QK_K == 64
-    uint8_t scales[2];
-#else
-    uint8_t scales[K_SCALE_SIZE]; // scales, quantized with 6 bits
-#endif
-    float16_t d;             // super-block scale
-};
-
-#if QK_K == 64
-typedef struct {
-    float16_t    d[2];          // super-block scales/mins
-    uint8_t scales[2];
-    uint8_t qs[QK_K/2];    // 4-bit quants
-} block_q4_K;
-#else
-struct block_q4_K {
-    float16_t d;             // super-block scale for quantized scales
-    float16_t dmin;          // super-block scale for quantized mins
-    uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
-    uint8_t qs[QK_K/2];        // 4--bit quants
-};
-#endif
-
-#if QK_K == 64
-struct block_q5_K {
-    float16_t  d;                     // super-block scales/mins
-    int8_t  scales[QK_K/16];     // 8-bit block scales
-    uint8_t qh[QK_K/8];          // quants, high bit
-    uint8_t qs[QK_K/2];          // quants, low 4 bits
-};
-#else
-struct block_q5_K {
-    float16_t d;                      // super-block scale for quantized scales
-    float16_t dmin;                   // super-block scale for quantized mins
-    uint8_t scales[3*QK_K/64];   // scales and mins, quantized with 6 bits
-    uint8_t qh[QK_K/8];          // quants, high bit
-    uint8_t qs[QK_K/2];          // quants, low 4 bits
-};
-// 176 bytes / block
-#endif
-
-struct block_q6_K {
-    uint8_t ql[QK_K/2];      // quants, lower 4 bits
-    uint8_t qh[QK_K/4];      // quants, upper 2 bits
-    int8_t  scales[QK_K/16]; // scales, quantized with 8 bits
-    float16_t d;                  // super-block scale
-};
-// 210 bytes / block
+#include "common.comp"
 
 layout(local_size_x = 1) in;
 
diff --git a/kompute/op_rmsnorm.comp b/kompute/op_rmsnorm.comp
index 784713c36..dd2c5cdde 100644
--- a/kompute/op_rmsnorm.comp
+++ b/kompute/op_rmsnorm.comp
@@ -8,122 +8,7 @@
 
 #version 450
 
-#extension GL_EXT_shader_16bit_storage: require
-#extension GL_EXT_shader_8bit_storage: require
-#extension GL_EXT_shader_explicit_arithmetic_types_float16: require
-#extension GL_EXT_shader_explicit_arithmetic_types_int8: require
-#extension GL_EXT_shader_explicit_arithmetic_types_int16: require
-#extension GL_EXT_control_flow_attributes: enable
-
-#define QK4_0 32
-#define QR4_0 2
-#define QK4_1 32
-
-#define GELU_COEF_A 0.044715
-#define SQRT_2_OVER_PI 0.79788456080286535587989211986876
-
-#ifndef QK_K
-#define QK_K 256
-#endif
-
-#if QK_K == 256
-#define K_SCALE_SIZE 12
-#else
-#define K_SCALE_SIZE 4
-#endif
-
-#define BM 128
-#define BN 128
-#define BK 8
-#define TM 8
-#define TN 8
-
-#define u8BufToU16(buf, idx) (((uint16_t(buf[idx + 1]) << 8)) | buf[idx])
-#define u8BufToFloat16(buf, idx) uint16BitsToHalf u8BufToU16(buf, idx)
-#define u8BufToU32(buf, idx) (((uint32_t u8BufToU16(buf, idx + 2) << 8 | buf[idx + 1]) << 8) | buf[idx])
-#define u8BufToFloat(buf, idx) uintBitsToFloat u8BufToU32(buf, idx)
-
-#define sizeof_block_q4_0 0x12
-#define sizeof_block_q4_1 0x14
-struct block_q4_0 {
-    float16_t d;
-    uint8_t qs[QK4_0 / 2];
-};
-struct block_q4_1 {
-    float16_t d;
-    float16_t m;
-    uint8_t qs[QK4_1 / 2];
-};
-
-#ifndef QK_K
-#define QK_K 256
-#endif
-
-#if QK_K == 256
-#define K_SCALE_SIZE 12
-#else
-#define K_SCALE_SIZE 4
-#endif
-
-struct block_q2_K {
-    uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits
-    uint8_t qs[QK_K/4];      // quants
-    float16_t d;           // super-block scale for quantized scales
-    float16_t dmin;        // super-block scale for quantized mins
-};
-// 84 bytes / block
-
-struct block_q3_K {
-    uint8_t hmask[QK_K/8];     // quants - high bit
-    uint8_t qs[QK_K/4];        // quants - low 2 bits
-#if QK_K == 64
-    uint8_t scales[2];
-#else
-    uint8_t scales[K_SCALE_SIZE]; // scales, quantized with 6 bits
-#endif
-    float16_t d;             // super-block scale
-};
-
-#if QK_K == 64
-typedef struct {
-    float16_t    d[2];          // super-block scales/mins
-    uint8_t scales[2];
-    uint8_t qs[QK_K/2];    // 4-bit quants
-} block_q4_K;
-#else
-struct block_q4_K {
-    float16_t d;             // super-block scale for quantized scales
-    float16_t dmin;          // super-block scale for quantized mins
-    uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
-    uint8_t qs[QK_K/2];        // 4--bit quants
-};
-#endif
-
-#if QK_K == 64
-struct block_q5_K {
-    float16_t  d;                     // super-block scales/mins
-    int8_t  scales[QK_K/16];     // 8-bit block scales
-    uint8_t qh[QK_K/8];          // quants, high bit
-    uint8_t qs[QK_K/2];          // quants, low 4 bits
-};
-#else
-struct block_q5_K {
-    float16_t d;                      // super-block scale for quantized scales
-    float16_t dmin;                   // super-block scale for quantized mins
-    uint8_t scales[3*QK_K/64];   // scales and mins, quantized with 6 bits
-    uint8_t qh[QK_K/8];          // quants, high bit
-    uint8_t qs[QK_K/2];          // quants, low 4 bits
-};
-// 176 bytes / block
-#endif
-
-struct block_q6_K {
-    uint8_t ql[QK_K/2];      // quants, lower 4 bits
-    uint8_t qh[QK_K/4];      // quants, upper 2 bits
-    int8_t  scales[QK_K/16]; // scales, quantized with 8 bits
-    float16_t d;                  // super-block scale
-};
-// 210 bytes / block
+#include "common.comp"
 
 #define nth 256
 
diff --git a/kompute/op_rope.comp b/kompute/op_rope.comp
index ca6bb6831..3fa84f579 100644
--- a/kompute/op_rope.comp
+++ b/kompute/op_rope.comp
@@ -8,122 +8,7 @@
 
 #version 450
 
-#extension GL_EXT_shader_16bit_storage: require
-#extension GL_EXT_shader_8bit_storage: require
-#extension GL_EXT_shader_explicit_arithmetic_types_float16: require
-#extension GL_EXT_shader_explicit_arithmetic_types_int8: require
-#extension GL_EXT_shader_explicit_arithmetic_types_int16: require
-#extension GL_EXT_control_flow_attributes: enable
-
-#define QK4_0 32
-#define QR4_0 2
-#define QK4_1 32
-
-#define GELU_COEF_A 0.044715
-#define SQRT_2_OVER_PI 0.79788456080286535587989211986876
-
-#ifndef QK_K
-#define QK_K 256
-#endif
-
-#if QK_K == 256
-#define K_SCALE_SIZE 12
-#else
-#define K_SCALE_SIZE 4
-#endif
-
-#define BM 128
-#define BN 128
-#define BK 8
-#define TM 8
-#define TN 8
-
-#define u8BufToU16(buf, idx) (((uint16_t(buf[idx + 1]) << 8)) | buf[idx])
-#define u8BufToFloat16(buf, idx) uint16BitsToHalf u8BufToU16(buf, idx)
-#define u8BufToU32(buf, idx) (((uint32_t u8BufToU16(buf, idx + 2) << 8 | buf[idx + 1]) << 8) | buf[idx])
-#define u8BufToFloat(buf, idx) uintBitsToFloat u8BufToU32(buf, idx)
-
-#define sizeof_block_q4_0 0x12
-#define sizeof_block_q4_1 0x14
-struct block_q4_0 {
-    float16_t d;
-    uint8_t qs[QK4_0 / 2];
-};
-struct block_q4_1 {
-    float16_t d;
-    float16_t m;
-    uint8_t qs[QK4_1 / 2];
-};
-
-#ifndef QK_K
-#define QK_K 256
-#endif
-
-#if QK_K == 256
-#define K_SCALE_SIZE 12
-#else
-#define K_SCALE_SIZE 4
-#endif
-
-struct block_q2_K {
-    uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits
-    uint8_t qs[QK_K/4];      // quants
-    float16_t d;           // super-block scale for quantized scales
-    float16_t dmin;        // super-block scale for quantized mins
-};
-// 84 bytes / block
-
-struct block_q3_K {
-    uint8_t hmask[QK_K/8];     // quants - high bit
-    uint8_t qs[QK_K/4];        // quants - low 2 bits
-#if QK_K == 64
-    uint8_t scales[2];
-#else
-    uint8_t scales[K_SCALE_SIZE]; // scales, quantized with 6 bits
-#endif
-    float16_t d;             // super-block scale
-};
-
-#if QK_K == 64
-typedef struct {
-    float16_t    d[2];          // super-block scales/mins
-    uint8_t scales[2];
-    uint8_t qs[QK_K/2];    // 4-bit quants
-} block_q4_K;
-#else
-struct block_q4_K {
-    float16_t d;             // super-block scale for quantized scales
-    float16_t dmin;          // super-block scale for quantized mins
-    uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
-    uint8_t qs[QK_K/2];        // 4--bit quants
-};
-#endif
-
-#if QK_K == 64
-struct block_q5_K {
-    float16_t  d;                     // super-block scales/mins
-    int8_t  scales[QK_K/16];     // 8-bit block scales
-    uint8_t qh[QK_K/8];          // quants, high bit
-    uint8_t qs[QK_K/2];          // quants, low 4 bits
-};
-#else
-struct block_q5_K {
-    float16_t d;                      // super-block scale for quantized scales
-    float16_t dmin;                   // super-block scale for quantized mins
-    uint8_t scales[3*QK_K/64];   // scales and mins, quantized with 6 bits
-    uint8_t qh[QK_K/8];          // quants, high bit
-    uint8_t qs[QK_K/2];          // quants, low 4 bits
-};
-// 176 bytes / block
-#endif
-
-struct block_q6_K {
-    uint8_t ql[QK_K/2];      // quants, lower 4 bits
-    uint8_t qh[QK_K/4];      // quants, upper 2 bits
-    int8_t  scales[QK_K/16]; // scales, quantized with 8 bits
-    float16_t d;                  // super-block scale
-};
-// 210 bytes / block
+#include "common.comp"
 
 layout(local_size_x = 1) in;
 
diff --git a/kompute/op_scale.comp b/kompute/op_scale.comp
index f537121a4..8530aaf3e 100644
--- a/kompute/op_scale.comp
+++ b/kompute/op_scale.comp
@@ -8,122 +8,8 @@
 
 #version 450
 
-#extension GL_EXT_shader_16bit_storage: require
-#extension GL_EXT_shader_8bit_storage: require
-#extension GL_EXT_shader_explicit_arithmetic_types_float16: require
-#extension GL_EXT_shader_explicit_arithmetic_types_int8: require
-#extension GL_EXT_shader_explicit_arithmetic_types_int16: require
-#extension GL_EXT_control_flow_attributes: enable
+#include "common.comp"
 
-#define QK4_0 32
-#define QR4_0 2
-#define QK4_1 32
-
-#define GELU_COEF_A 0.044715
-#define SQRT_2_OVER_PI 0.79788456080286535587989211986876
-
-#ifndef QK_K
-#define QK_K 256
-#endif
-
-#if QK_K == 256
-#define K_SCALE_SIZE 12
-#else
-#define K_SCALE_SIZE 4
-#endif
-
-#define BM 128
-#define BN 128
-#define BK 8
-#define TM 8
-#define TN 8
-
-#define u8BufToU16(buf, idx) (((uint16_t(buf[idx + 1]) << 8)) | buf[idx])
-#define u8BufToFloat16(buf, idx) uint16BitsToHalf u8BufToU16(buf, idx)
-#define u8BufToU32(buf, idx) (((uint32_t u8BufToU16(buf, idx + 2) << 8 | buf[idx + 1]) << 8) | buf[idx])
-#define u8BufToFloat(buf, idx) uintBitsToFloat u8BufToU32(buf, idx)
-
-#define sizeof_block_q4_0 0x12
-#define sizeof_block_q4_1 0x14
-struct block_q4_0 {
-    float16_t d;
-    uint8_t qs[QK4_0 / 2];
-};
-struct block_q4_1 {
-    float16_t d;
-    float16_t m;
-    uint8_t qs[QK4_1 / 2];
-};
-
-#ifndef QK_K
-#define QK_K 256
-#endif
-
-#if QK_K == 256
-#define K_SCALE_SIZE 12
-#else
-#define K_SCALE_SIZE 4
-#endif
-
-struct block_q2_K {
-    uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits
-    uint8_t qs[QK_K/4];      // quants
-    float16_t d;           // super-block scale for quantized scales
-    float16_t dmin;        // super-block scale for quantized mins
-};
-// 84 bytes / block
-
-struct block_q3_K {
-    uint8_t hmask[QK_K/8];     // quants - high bit
-    uint8_t qs[QK_K/4];        // quants - low 2 bits
-#if QK_K == 64
-    uint8_t scales[2];
-#else
-    uint8_t scales[K_SCALE_SIZE]; // scales, quantized with 6 bits
-#endif
-    float16_t d;             // super-block scale
-};
-
-#if QK_K == 64
-typedef struct {
-    float16_t    d[2];          // super-block scales/mins
-    uint8_t scales[2];
-    uint8_t qs[QK_K/2];    // 4-bit quants
-} block_q4_K;
-#else
-struct block_q4_K {
-    float16_t d;             // super-block scale for quantized scales
-    float16_t dmin;          // super-block scale for quantized mins
-    uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
-    uint8_t qs[QK_K/2];        // 4--bit quants
-};
-#endif
-
-#if QK_K == 64
-struct block_q5_K {
-    float16_t  d;                     // super-block scales/mins
-    int8_t  scales[QK_K/16];     // 8-bit block scales
-    uint8_t qh[QK_K/8];          // quants, high bit
-    uint8_t qs[QK_K/2];          // quants, low 4 bits
-};
-#else
-struct block_q5_K {
-    float16_t d;                      // super-block scale for quantized scales
-    float16_t dmin;                   // super-block scale for quantized mins
-    uint8_t scales[3*QK_K/64];   // scales and mins, quantized with 6 bits
-    uint8_t qh[QK_K/8];          // quants, high bit
-    uint8_t qs[QK_K/2];          // quants, low 4 bits
-};
-// 176 bytes / block
-#endif
-
-struct block_q6_K {
-    uint8_t ql[QK_K/2];      // quants, lower 4 bits
-    uint8_t qh[QK_K/4];      // quants, upper 2 bits
-    int8_t  scales[QK_K/16]; // scales, quantized with 8 bits
-    float16_t d;                  // super-block scale
-};
-// 210 bytes / block
 layout(local_size_x = 1) in;
 
 layout(binding = 0) buffer restrict readonly tensorIn { float in_[]; };
diff --git a/kompute/op_silu.comp b/kompute/op_silu.comp
index 90c034ac7..c5acac281 100644
--- a/kompute/op_silu.comp
+++ b/kompute/op_silu.comp
@@ -8,122 +8,7 @@
 
 #version 450
 
-#extension GL_EXT_shader_16bit_storage: require
-#extension GL_EXT_shader_8bit_storage: require
-#extension GL_EXT_shader_explicit_arithmetic_types_float16: require
-#extension GL_EXT_shader_explicit_arithmetic_types_int8: require
-#extension GL_EXT_shader_explicit_arithmetic_types_int16: require
-#extension GL_EXT_control_flow_attributes: enable
-
-#define QK4_0 32
-#define QR4_0 2
-#define QK4_1 32
-
-#define GELU_COEF_A 0.044715
-#define SQRT_2_OVER_PI 0.79788456080286535587989211986876
-
-#ifndef QK_K
-#define QK_K 256
-#endif
-
-#if QK_K == 256
-#define K_SCALE_SIZE 12
-#else
-#define K_SCALE_SIZE 4
-#endif
-
-#define BM 128
-#define BN 128
-#define BK 8
-#define TM 8
-#define TN 8
-
-#define u8BufToU16(buf, idx) (((uint16_t(buf[idx + 1]) << 8)) | buf[idx])
-#define u8BufToFloat16(buf, idx) uint16BitsToHalf u8BufToU16(buf, idx)
-#define u8BufToU32(buf, idx) (((uint32_t u8BufToU16(buf, idx + 2) << 8 | buf[idx + 1]) << 8) | buf[idx])
-#define u8BufToFloat(buf, idx) uintBitsToFloat u8BufToU32(buf, idx)
-
-#define sizeof_block_q4_0 0x12
-#define sizeof_block_q4_1 0x14
-struct block_q4_0 {
-    float16_t d;
-    uint8_t qs[QK4_0 / 2];
-};
-struct block_q4_1 {
-    float16_t d;
-    float16_t m;
-    uint8_t qs[QK4_1 / 2];
-};
-
-#ifndef QK_K
-#define QK_K 256
-#endif
-
-#if QK_K == 256
-#define K_SCALE_SIZE 12
-#else
-#define K_SCALE_SIZE 4
-#endif
-
-struct block_q2_K {
-    uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits
-    uint8_t qs[QK_K/4];      // quants
-    float16_t d;           // super-block scale for quantized scales
-    float16_t dmin;        // super-block scale for quantized mins
-};
-// 84 bytes / block
-
-struct block_q3_K {
-    uint8_t hmask[QK_K/8];     // quants - high bit
-    uint8_t qs[QK_K/4];        // quants - low 2 bits
-#if QK_K == 64
-    uint8_t scales[2];
-#else
-    uint8_t scales[K_SCALE_SIZE]; // scales, quantized with 6 bits
-#endif
-    float16_t d;             // super-block scale
-};
-
-#if QK_K == 64
-typedef struct {
-    float16_t    d[2];          // super-block scales/mins
-    uint8_t scales[2];
-    uint8_t qs[QK_K/2];    // 4-bit quants
-} block_q4_K;
-#else
-struct block_q4_K {
-    float16_t d;             // super-block scale for quantized scales
-    float16_t dmin;          // super-block scale for quantized mins
-    uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
-    uint8_t qs[QK_K/2];        // 4--bit quants
-};
-#endif
-
-#if QK_K == 64
-struct block_q5_K {
-    float16_t  d;                     // super-block scales/mins
-    int8_t  scales[QK_K/16];     // 8-bit block scales
-    uint8_t qh[QK_K/8];          // quants, high bit
-    uint8_t qs[QK_K/2];          // quants, low 4 bits
-};
-#else
-struct block_q5_K {
-    float16_t d;                      // super-block scale for quantized scales
-    float16_t dmin;                   // super-block scale for quantized mins
-    uint8_t scales[3*QK_K/64];   // scales and mins, quantized with 6 bits
-    uint8_t qh[QK_K/8];          // quants, high bit
-    uint8_t qs[QK_K/2];          // quants, low 4 bits
-};
-// 176 bytes / block
-#endif
-
-struct block_q6_K {
-    uint8_t ql[QK_K/2];      // quants, lower 4 bits
-    uint8_t qh[QK_K/4];      // quants, upper 2 bits
-    int8_t  scales[QK_K/16]; // scales, quantized with 8 bits
-    float16_t d;                  // super-block scale
-};
-// 210 bytes / block
+#include "common.comp"
 
 layout(local_size_x = 1) in;
 
diff --git a/kompute/op_softmax.comp b/kompute/op_softmax.comp
index ce0e71924..e936d8f68 100644
--- a/kompute/op_softmax.comp
+++ b/kompute/op_softmax.comp
@@ -8,122 +8,7 @@
 
 #version 450
 
-#extension GL_EXT_shader_16bit_storage: require
-#extension GL_EXT_shader_8bit_storage: require
-#extension GL_EXT_shader_explicit_arithmetic_types_float16: require
-#extension GL_EXT_shader_explicit_arithmetic_types_int8: require
-#extension GL_EXT_shader_explicit_arithmetic_types_int16: require
-#extension GL_EXT_control_flow_attributes: enable
-
-#define QK4_0 32
-#define QR4_0 2
-#define QK4_1 32
-
-#define GELU_COEF_A 0.044715
-#define SQRT_2_OVER_PI 0.79788456080286535587989211986876
-
-#ifndef QK_K
-#define QK_K 256
-#endif
-
-#if QK_K == 256
-#define K_SCALE_SIZE 12
-#else
-#define K_SCALE_SIZE 4
-#endif
-
-#define BM 128
-#define BN 128
-#define BK 8
-#define TM 8
-#define TN 8
-
-#define u8BufToU16(buf, idx) (((uint16_t(buf[idx + 1]) << 8)) | buf[idx])
-#define u8BufToFloat16(buf, idx) uint16BitsToHalf u8BufToU16(buf, idx)
-#define u8BufToU32(buf, idx) (((uint32_t u8BufToU16(buf, idx + 2) << 8 | buf[idx + 1]) << 8) | buf[idx])
-#define u8BufToFloat(buf, idx) uintBitsToFloat u8BufToU32(buf, idx)
-
-#define sizeof_block_q4_0 0x12
-#define sizeof_block_q4_1 0x14
-struct block_q4_0 {
-    float16_t d;
-    uint8_t qs[QK4_0 / 2];
-};
-struct block_q4_1 {
-    float16_t d;
-    float16_t m;
-    uint8_t qs[QK4_1 / 2];
-};
-
-#ifndef QK_K
-#define QK_K 256
-#endif
-
-#if QK_K == 256
-#define K_SCALE_SIZE 12
-#else
-#define K_SCALE_SIZE 4
-#endif
-
-struct block_q2_K {
-    uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits
-    uint8_t qs[QK_K/4];      // quants
-    float16_t d;           // super-block scale for quantized scales
-    float16_t dmin;        // super-block scale for quantized mins
-};
-// 84 bytes / block
-
-struct block_q3_K {
-    uint8_t hmask[QK_K/8];     // quants - high bit
-    uint8_t qs[QK_K/4];        // quants - low 2 bits
-#if QK_K == 64
-    uint8_t scales[2];
-#else
-    uint8_t scales[K_SCALE_SIZE]; // scales, quantized with 6 bits
-#endif
-    float16_t d;             // super-block scale
-};
-
-#if QK_K == 64
-typedef struct {
-    float16_t    d[2];          // super-block scales/mins
-    uint8_t scales[2];
-    uint8_t qs[QK_K/2];    // 4-bit quants
-} block_q4_K;
-#else
-struct block_q4_K {
-    float16_t d;             // super-block scale for quantized scales
-    float16_t dmin;          // super-block scale for quantized mins
-    uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
-    uint8_t qs[QK_K/2];        // 4--bit quants
-};
-#endif
-
-#if QK_K == 64
-struct block_q5_K {
-    float16_t  d;                     // super-block scales/mins
-    int8_t  scales[QK_K/16];     // 8-bit block scales
-    uint8_t qh[QK_K/8];          // quants, high bit
-    uint8_t qs[QK_K/2];          // quants, low 4 bits
-};
-#else
-struct block_q5_K {
-    float16_t d;                      // super-block scale for quantized scales
-    float16_t dmin;                   // super-block scale for quantized mins
-    uint8_t scales[3*QK_K/64];   // scales and mins, quantized with 6 bits
-    uint8_t qh[QK_K/8];          // quants, high bit
-    uint8_t qs[QK_K/2];          // quants, low 4 bits
-};
-// 176 bytes / block
-#endif
-
-struct block_q6_K {
-    uint8_t ql[QK_K/2];      // quants, lower 4 bits
-    uint8_t qh[QK_K/4];      // quants, upper 2 bits
-    int8_t  scales[QK_K/16]; // scales, quantized with 8 bits
-    float16_t d;                  // super-block scale
-};
-// 210 bytes / block
+#include "common.comp"
 
 #define nth 32