diff --git a/Makefile b/Makefile index 60c8922d4..951265f99 100644 --- a/Makefile +++ b/Makefile @@ -176,6 +176,7 @@ ifdef LLAMA_CUDA OBJS += ggml-cuda.o NVCC = nvcc NVCCFLAGS = --forward-unknown-to-host-compiler + NVCCV := $(shell $(NVCC) --version | tail -n 1) ifdef LLAMA_DEBUG NVCCFLAGS += -lineinfo endif # LLAMA_DEBUG diff --git a/ggml-cuda-kern.h b/ggml-cuda-kern.h index 7b279f02c..23551c648 100644 --- a/ggml-cuda-kern.h +++ b/ggml-cuda-kern.h @@ -14,7 +14,7 @@ template<> struct vec2_t_impl { typedef float2 type; }; template using vec2_t = typename vec2_t_impl::type; template inline __host__ __device__ vec2_t make_vec2_t(const T & x, const T & y); -template<> inline __host__ __device__ vec2_t make_vec2_t(const half & x, const half & y) { return __halves2half2(x, y); } +template<> inline __host__ __device__ vec2_t make_vec2_t(const half & x, const half & y) { return make_half2 (x, y); } template<> inline __host__ __device__ vec2_t make_vec2_t(const float & x, const float & y) { return make_float2(x, y); } // the cuda headers define operators for half2, but not for float2