mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-25 05:48:47 +01:00
cuda : get_row_rounding F32 (#4095)
* Fix #4017 * Update ggml-cuda.cu Co-authored-by: Jared Van Bortel <cebtenzzre@gmail.com> * Update ggml-cuda.cu Co-authored-by: Jared Van Bortel <cebtenzzre@gmail.com> --------- Co-authored-by: Jared Van Bortel <cebtenzzre@gmail.com>
This commit is contained in:
parent
4f447a4833
commit
b83e149ec6
@ -6356,6 +6356,7 @@ static int64_t get_row_rounding(ggml_type type) {
|
|||||||
case GGML_TYPE_Q8_0:
|
case GGML_TYPE_Q8_0:
|
||||||
return max_compute_capability >= CC_RDNA2 ? 128 : 64;
|
return max_compute_capability >= CC_RDNA2 ? 128 : 64;
|
||||||
case GGML_TYPE_F16:
|
case GGML_TYPE_F16:
|
||||||
|
case GGML_TYPE_F32:
|
||||||
return 1;
|
return 1;
|
||||||
case GGML_TYPE_Q2_K:
|
case GGML_TYPE_Q2_K:
|
||||||
return max_compute_capability >= CC_RDNA2 ? 128 : 32;
|
return max_compute_capability >= CC_RDNA2 ? 128 : 32;
|
||||||
@ -6378,6 +6379,7 @@ static int64_t get_row_rounding(ggml_type type) {
|
|||||||
case GGML_TYPE_Q8_0:
|
case GGML_TYPE_Q8_0:
|
||||||
return 64;
|
return 64;
|
||||||
case GGML_TYPE_F16:
|
case GGML_TYPE_F16:
|
||||||
|
case GGML_TYPE_F32:
|
||||||
return 1;
|
return 1;
|
||||||
case GGML_TYPE_Q2_K:
|
case GGML_TYPE_Q2_K:
|
||||||
case GGML_TYPE_Q3_K:
|
case GGML_TYPE_Q3_K:
|
||||||
|
Loading…
Reference in New Issue
Block a user