mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-12 05:17:21 +01:00
parent
9a818f7c42
commit
4dccb38d9a
@ -3841,8 +3841,8 @@ void dequantize_q3_K(device const block_q3_K *xb, short il, thread type4x4 & reg
|
|||||||
uint16_t scale_2 = scales[il%8], scale_1 = scales[8 + il%4];
|
uint16_t scale_2 = scales[il%8], scale_1 = scales[8 + il%4];
|
||||||
int16_t dl_int = (il/4)&1 ? (scale_2&kmask2) | ((scale_1&kmask1) << 2)
|
int16_t dl_int = (il/4)&1 ? (scale_2&kmask2) | ((scale_1&kmask1) << 2)
|
||||||
: (scale_2&kmask2) | ((scale_1&kmask1) << 4);
|
: (scale_2&kmask2) | ((scale_1&kmask1) << 4);
|
||||||
half dl = il<8 ? d_all * (dl_int - 32.h) : d_all * (dl_int / 16.h - 32.h);
|
float dl = il<8 ? d_all * (dl_int - 32.f) : d_all * (dl_int / 16.f - 32.f);
|
||||||
const half ml = 4.h * dl;
|
const float ml = 4.f * dl;
|
||||||
|
|
||||||
il = (il/2) & 3;
|
il = (il/2) & 3;
|
||||||
const half coef = il>1 ? (il>2 ? 1/64.h : 1/16.h) : (il>0 ? 1/4.h : 1.h);
|
const half coef = il>1 ? (il>2 ? 1/64.h : 1/16.h) : (il>0 ? 1/4.h : 1.h);
|
||||||
@ -3909,7 +3909,7 @@ void dequantize_q5_K(device const block_q5_K *xb, short il, thread type4x4 & reg
|
|||||||
uint8_t ul = 1 << (il/2);
|
uint8_t ul = 1 << (il/2);
|
||||||
il = il & 3;
|
il = il & 3;
|
||||||
const uchar2 sc = get_scale_min_k4_just2(is, il/2, xb->scales);
|
const uchar2 sc = get_scale_min_k4_just2(is, il/2, xb->scales);
|
||||||
const float d = il < 2 ? xb->d : xb->d / 16.h;
|
const float d = il < 2 ? xb->d : xb->d / 16.f;
|
||||||
const float min = xb->dmin;
|
const float min = xb->dmin;
|
||||||
const float dl = d * sc[0];
|
const float dl = d * sc[0];
|
||||||
const float ml = min * sc[1];
|
const float ml = min * sc[1];
|
||||||
@ -3942,17 +3942,17 @@ void dequantize_q6_K(device const block_q6_K *xb, short il, thread type4x4 & reg
|
|||||||
#if QK_K == 256
|
#if QK_K == 256
|
||||||
ql = ql + 64*(il/8) + 32*((il/2)&1) + 16*(il&1);
|
ql = ql + 64*(il/8) + 32*((il/2)&1) + 16*(il&1);
|
||||||
qh = qh + 32*(il/8) + 16*(il&1);
|
qh = qh + 32*(il/8) + 16*(il&1);
|
||||||
half sc = scales[(il%2) + 2 * ((il/2))];
|
float sc = scales[(il%2) + 2 * ((il/2))];
|
||||||
il = (il/2) & 3;
|
il = (il/2) & 3;
|
||||||
#else
|
#else
|
||||||
ql = ql + 16 * (il&1);
|
ql = ql + 16 * (il&1);
|
||||||
half sc = scales[il];
|
float sc = scales[il];
|
||||||
#endif
|
#endif
|
||||||
const uint16_t kmask1 = il>1 ? (il>2 ? 192 : 48) : (il>0 ? 12 : 3);
|
const uint16_t kmask1 = il>1 ? (il>2 ? 192 : 48) : (il>0 ? 12 : 3);
|
||||||
const uint16_t kmask2 = il>1 ? 0xF0 : 0x0F;
|
const uint16_t kmask2 = il>1 ? 0xF0 : 0x0F;
|
||||||
const half coef = il>1 ? 1.f/16.h : 1.h;
|
const float coef = il>1 ? 1.f/16.f : 1.f;
|
||||||
const half ml = d_all * sc * 32.h;
|
const float ml = d_all * sc * 32.f;
|
||||||
const half dl = d_all * sc * coef;
|
const float dl = d_all * sc * coef;
|
||||||
for (int i = 0; i < 16; ++i) {
|
for (int i = 0; i < 16; ++i) {
|
||||||
const half q = il&1 ? ((ql[i] & kmask2) | ((qh[i] & kmask1) << 2))
|
const half q = il&1 ? ((ql[i] & kmask2) | ((qh[i] & kmask1) << 2))
|
||||||
: ((ql[i] & kmask2) | ((qh[i] & kmask1) << 4));
|
: ((ql[i] & kmask2) | ((qh[i] & kmask1) << 4));
|
||||||
|
Loading…
x
Reference in New Issue
Block a user