llama.cpp/ggml-cuda.h

#ifdef  __cplusplus
extern "C" {
#endif

void dequantize_row_q4_0_cuda(const void * vx, float * y, int k, cudaStream_t stream);
void dequantize_row_q4_1_cuda(const void * vx, float * y, int k, cudaStream_t stream);
void dequantize_row_q4_2_cuda(const void * vx, float * y, int k, cudaStream_t stream);

#ifdef  __cplusplus
}
#endif
Improve cuBLAS performance by dequantizing on the GPU (#1065) 2023-04-20 03:14:14 +02:00			`#ifdef __cplusplus`
			`extern "C" {`
			`#endif`

			`void dequantize_row_q4_0_cuda(const void * vx, float * y, int k, cudaStream_t stream);`
			`void dequantize_row_q4_1_cuda(const void * vx, float * y, int k, cudaStream_t stream);`
			`void dequantize_row_q4_2_cuda(const void * vx, float * y, int k, cudaStream_t stream);`

			`#ifdef __cplusplus`
			`}`
			`#endif`