From f5fddb6d24c4ac4aae1a9bcd9ec222f739b0fc65 Mon Sep 17 00:00:00 2001
From: Francis Couture-Harpin <git@compilade.net>
Date: Fri, 10 Jan 2025 14:52:49 -0500
Subject: [PATCH] ggml-cuda : remove some superfluous comments for TQ2_0 tile
 loading

---
 ggml/src/ggml-cuda/mmq.cuh | 2 --
 1 file changed, 2 deletions(-)
diff --git a/ggml/src/ggml-cuda/mmq.cuh b/ggml/src/ggml-cuda/mmq.cuh
index 91c6d68ac..8d1370184 100644
--- a/ggml/src/ggml-cuda/mmq.cuh
+++ b/ggml/src/ggml-cuda/mmq.cuh
@@ -1848,13 +1848,11 @@ template <int mmq_y, int nwarps, bool need_check> static __device__ __forceinlin
 #ifdef INT8_MMA_AVAILABLE
             x_qs[i*MMQ_MMA_TILE_X_K_Q8_0 + k] = q;
 #else
-            // NOTE: this might assume WARP_SIZE is >= 32
             x_qs[i*(2*WARP_SIZE + 1) + k] = q;
 #endif // INT8_MMA_AVAILABLE
         }
     }
 
-    // TODO: does this work with WARP_SIZE != 32?
 #pragma unroll
     for (int i0 = 0; i0 < mmq_y; i0 += nwarps * WARP_SIZE/(QI2_0/2)) {
         int i = i0 + threadIdx.y*(2*WARP_SIZE/QI2_0) + threadIdx.x/(QI2_0/2);