diff --git a/ggml-common.h b/ggml-common.h index 66d984b5a..409fcf29e 100644 --- a/ggml-common.h +++ b/ggml-common.h @@ -1023,70 +1023,70 @@ GGML_TABLE_BEGIN(uint32_t, iq3s_grid, 512) GGML_TABLE_END() GGML_TABLE_BEGIN(uint32_t, i2s_i8s, 256) -0x00000000, 0x01000000, 0x00000000, 0xff000000, -0x00010000, 0x01010000, 0x00010000, 0xff010000, -0x00000000, 0x01000000, 0x00000000, 0xff000000, -0x00ff0000, 0x01ff0000, 0x00ff0000, 0xffff0000, -0x00000100, 0x01000100, 0x00000100, 0xff000100, -0x00010100, 0x01010100, 0x00010100, 0xff010100, -0x00000100, 0x01000100, 0x00000100, 0xff000100, -0x00ff0100, 0x01ff0100, 0x00ff0100, 0xffff0100, -0x00000000, 0x01000000, 0x00000000, 0xff000000, -0x00010000, 0x01010000, 0x00010000, 0xff010000, -0x00000000, 0x01000000, 0x00000000, 0xff000000, -0x00ff0000, 0x01ff0000, 0x00ff0000, 0xffff0000, -0x0000ff00, 0x0100ff00, 0x0000ff00, 0xff00ff00, -0x0001ff00, 0x0101ff00, 0x0001ff00, 0xff01ff00, -0x0000ff00, 0x0100ff00, 0x0000ff00, 0xff00ff00, -0x00ffff00, 0x01ffff00, 0x00ffff00, 0xffffff00, -0x00000001, 0x01000001, 0x00000001, 0xff000001, -0x00010001, 0x01010001, 0x00010001, 0xff010001, -0x00000001, 0x01000001, 0x00000001, 0xff000001, -0x00ff0001, 0x01ff0001, 0x00ff0001, 0xffff0001, -0x00000101, 0x01000101, 0x00000101, 0xff000101, -0x00010101, 0x01010101, 0x00010101, 0xff010101, -0x00000101, 0x01000101, 0x00000101, 0xff000101, -0x00ff0101, 0x01ff0101, 0x00ff0101, 0xffff0101, -0x00000001, 0x01000001, 0x00000001, 0xff000001, -0x00010001, 0x01010001, 0x00010001, 0xff010001, -0x00000001, 0x01000001, 0x00000001, 0xff000001, -0x00ff0001, 0x01ff0001, 0x00ff0001, 0xffff0001, -0x0000ff01, 0x0100ff01, 0x0000ff01, 0xff00ff01, -0x0001ff01, 0x0101ff01, 0x0001ff01, 0xff01ff01, -0x0000ff01, 0x0100ff01, 0x0000ff01, 0xff00ff01, -0x00ffff01, 0x01ffff01, 0x00ffff01, 0xffffff01, -0x00000000, 0x01000000, 0x00000000, 0xff000000, -0x00010000, 0x01010000, 0x00010000, 0xff010000, -0x00000000, 0x01000000, 0x00000000, 0xff000000, -0x00ff0000, 0x01ff0000, 0x00ff0000, 0xffff0000, -0x00000100, 0x01000100, 0x00000100, 0xff000100, -0x00010100, 0x01010100, 0x00010100, 0xff010100, -0x00000100, 0x01000100, 0x00000100, 0xff000100, -0x00ff0100, 0x01ff0100, 0x00ff0100, 0xffff0100, -0x00000000, 0x01000000, 0x00000000, 0xff000000, -0x00010000, 0x01010000, 0x00010000, 0xff010000, -0x00000000, 0x01000000, 0x00000000, 0xff000000, -0x00ff0000, 0x01ff0000, 0x00ff0000, 0xffff0000, -0x0000ff00, 0x0100ff00, 0x0000ff00, 0xff00ff00, -0x0001ff00, 0x0101ff00, 0x0001ff00, 0xff01ff00, -0x0000ff00, 0x0100ff00, 0x0000ff00, 0xff00ff00, -0x00ffff00, 0x01ffff00, 0x00ffff00, 0xffffff00, -0x000000ff, 0x010000ff, 0x000000ff, 0xff0000ff, -0x000100ff, 0x010100ff, 0x000100ff, 0xff0100ff, -0x000000ff, 0x010000ff, 0x000000ff, 0xff0000ff, -0x00ff00ff, 0x01ff00ff, 0x00ff00ff, 0xffff00ff, -0x000001ff, 0x010001ff, 0x000001ff, 0xff0001ff, -0x000101ff, 0x010101ff, 0x000101ff, 0xff0101ff, -0x000001ff, 0x010001ff, 0x000001ff, 0xff0001ff, -0x00ff01ff, 0x01ff01ff, 0x00ff01ff, 0xffff01ff, -0x000000ff, 0x010000ff, 0x000000ff, 0xff0000ff, -0x000100ff, 0x010100ff, 0x000100ff, 0xff0100ff, -0x000000ff, 0x010000ff, 0x000000ff, 0xff0000ff, -0x00ff00ff, 0x01ff00ff, 0x00ff00ff, 0xffff00ff, -0x0000ffff, 0x0100ffff, 0x0000ffff, 0xff00ffff, -0x0001ffff, 0x0101ffff, 0x0001ffff, 0xff01ffff, -0x0000ffff, 0x0100ffff, 0x0000ffff, 0xff00ffff, -0x00ffffff, 0x01ffffff, 0x00ffffff, 0xffffffff, + 0x00000000, 0x01000000, 0x00000000, 0xff000000, + 0x00010000, 0x01010000, 0x00010000, 0xff010000, + 0x00000000, 0x01000000, 0x00000000, 0xff000000, + 0x00ff0000, 0x01ff0000, 0x00ff0000, 0xffff0000, + 0x00000100, 0x01000100, 0x00000100, 0xff000100, + 0x00010100, 0x01010100, 0x00010100, 0xff010100, + 0x00000100, 0x01000100, 0x00000100, 0xff000100, + 0x00ff0100, 0x01ff0100, 0x00ff0100, 0xffff0100, + 0x00000000, 0x01000000, 0x00000000, 0xff000000, + 0x00010000, 0x01010000, 0x00010000, 0xff010000, + 0x00000000, 0x01000000, 0x00000000, 0xff000000, + 0x00ff0000, 0x01ff0000, 0x00ff0000, 0xffff0000, + 0x0000ff00, 0x0100ff00, 0x0000ff00, 0xff00ff00, + 0x0001ff00, 0x0101ff00, 0x0001ff00, 0xff01ff00, + 0x0000ff00, 0x0100ff00, 0x0000ff00, 0xff00ff00, + 0x00ffff00, 0x01ffff00, 0x00ffff00, 0xffffff00, + 0x00000001, 0x01000001, 0x00000001, 0xff000001, + 0x00010001, 0x01010001, 0x00010001, 0xff010001, + 0x00000001, 0x01000001, 0x00000001, 0xff000001, + 0x00ff0001, 0x01ff0001, 0x00ff0001, 0xffff0001, + 0x00000101, 0x01000101, 0x00000101, 0xff000101, + 0x00010101, 0x01010101, 0x00010101, 0xff010101, + 0x00000101, 0x01000101, 0x00000101, 0xff000101, + 0x00ff0101, 0x01ff0101, 0x00ff0101, 0xffff0101, + 0x00000001, 0x01000001, 0x00000001, 0xff000001, + 0x00010001, 0x01010001, 0x00010001, 0xff010001, + 0x00000001, 0x01000001, 0x00000001, 0xff000001, + 0x00ff0001, 0x01ff0001, 0x00ff0001, 0xffff0001, + 0x0000ff01, 0x0100ff01, 0x0000ff01, 0xff00ff01, + 0x0001ff01, 0x0101ff01, 0x0001ff01, 0xff01ff01, + 0x0000ff01, 0x0100ff01, 0x0000ff01, 0xff00ff01, + 0x00ffff01, 0x01ffff01, 0x00ffff01, 0xffffff01, + 0x00000000, 0x01000000, 0x00000000, 0xff000000, + 0x00010000, 0x01010000, 0x00010000, 0xff010000, + 0x00000000, 0x01000000, 0x00000000, 0xff000000, + 0x00ff0000, 0x01ff0000, 0x00ff0000, 0xffff0000, + 0x00000100, 0x01000100, 0x00000100, 0xff000100, + 0x00010100, 0x01010100, 0x00010100, 0xff010100, + 0x00000100, 0x01000100, 0x00000100, 0xff000100, + 0x00ff0100, 0x01ff0100, 0x00ff0100, 0xffff0100, + 0x00000000, 0x01000000, 0x00000000, 0xff000000, + 0x00010000, 0x01010000, 0x00010000, 0xff010000, + 0x00000000, 0x01000000, 0x00000000, 0xff000000, + 0x00ff0000, 0x01ff0000, 0x00ff0000, 0xffff0000, + 0x0000ff00, 0x0100ff00, 0x0000ff00, 0xff00ff00, + 0x0001ff00, 0x0101ff00, 0x0001ff00, 0xff01ff00, + 0x0000ff00, 0x0100ff00, 0x0000ff00, 0xff00ff00, + 0x00ffff00, 0x01ffff00, 0x00ffff00, 0xffffff00, + 0x000000ff, 0x010000ff, 0x000000ff, 0xff0000ff, + 0x000100ff, 0x010100ff, 0x000100ff, 0xff0100ff, + 0x000000ff, 0x010000ff, 0x000000ff, 0xff0000ff, + 0x00ff00ff, 0x01ff00ff, 0x00ff00ff, 0xffff00ff, + 0x000001ff, 0x010001ff, 0x000001ff, 0xff0001ff, + 0x000101ff, 0x010101ff, 0x000101ff, 0xff0101ff, + 0x000001ff, 0x010001ff, 0x000001ff, 0xff0001ff, + 0x00ff01ff, 0x01ff01ff, 0x00ff01ff, 0xffff01ff, + 0x000000ff, 0x010000ff, 0x000000ff, 0xff0000ff, + 0x000100ff, 0x010100ff, 0x000100ff, 0xff0100ff, + 0x000000ff, 0x010000ff, 0x000000ff, 0xff0000ff, + 0x00ff00ff, 0x01ff00ff, 0x00ff00ff, 0xffff00ff, + 0x0000ffff, 0x0100ffff, 0x0000ffff, 0xff00ffff, + 0x0001ffff, 0x0101ffff, 0x0001ffff, 0xff01ffff, + 0x0000ffff, 0x0100ffff, 0x0000ffff, 0xff00ffff, + 0x00ffffff, 0x01ffffff, 0x00ffffff, 0xffffffff, GGML_TABLE_END() #define NGRID_IQ1S 2048 diff --git a/ggml-quants.c b/ggml-quants.c index 665e381a3..4b5209279 100644 --- a/ggml-quants.c +++ b/ggml-quants.c @@ -3799,60 +3799,61 @@ void ggml_vec_dot_i2_i8_s(int n, float * restrict s, size_t bs, const void * res UNUSED(by); UNUSED(nrc); -// TODO -// #if defined(__AVX2__) -// __m256i accu = _mm256_setzero_si256(); +#if defined(__AVX2__) + __m256i accu = _mm256_setzero_si256(); -// for (int i=0; i