diff --git a/ggml/src/ggml-cpu.c b/ggml/src/ggml-cpu.c index 4b8ffb629..09ba49b13 100644 --- a/ggml/src/ggml-cpu.c +++ b/ggml/src/ggml-cpu.c @@ -304,6 +304,7 @@ static const struct ggml_type_traits_cpu type_traits_cpu[GGML_TYPE_COUNT] = { .nrows = 1, }, [GGML_TYPE_Q8_0] = { + .from_float_to_mat = quantize_mat_q8_0, .vec_dot = ggml_vec_dot_q8_0_q8_0, .vec_dot_type = GGML_TYPE_Q8_0, #if defined (__ARM_FEATURE_MATMUL_INT8) @@ -13692,9 +13693,7 @@ void ggml_cpu_init(void) { uint16_t u16; ggml_fp16_t fp16; } u = {i}; - // FIXME: this table is used in conversion functions outside of compute - // current code depends on ggml_init initializing this table - float f = ggml_table_f32_f16[i] = GGML_COMPUTE_FP16_TO_FP32(u.fp16); + float f = GGML_FP16_TO_FP32(u.fp16); ggml_table_gelu_f16[i] = GGML_FP32_TO_FP16(ggml_gelu_f32(f)); ggml_table_gelu_quick_f16[i] = GGML_FP32_TO_FP16(ggml_gelu_quick_f32(f)); } diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index 7dc3340a1..1ccf78d98 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -220,8 +220,10 @@ void ggml_log_callback_default(enum ggml_log_level level, const char * text, voi void * ggml_aligned_malloc(size_t size) { + const int alignment = 64; + #if defined(_MSC_VER) || defined(__MINGW32__) - return _aligned_malloc(size, TENSOR_ALIGNMENT); + return _aligned_malloc(size, alignment); #else if (size == 0) { GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_aligned_malloc!\n"); @@ -229,8 +231,9 @@ void * ggml_aligned_malloc(size_t size) { } void * aligned_memory = NULL; #ifdef GGML_USE_CPU_HBM - int result = hbw_posix_memalign(&aligned_memory, TENSOR_ALIGNMENT, size); + int result = hbw_posix_memalign(&aligned_memory, alignment, size); #elif TARGET_OS_OSX + GGML_UNUSED(alignment); kern_return_t alloc_status = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t *) &aligned_memory, size, VM_FLAGS_ANYWHERE); int result = EFAULT; switch (alloc_status) { @@ -248,7 +251,7 @@ void * ggml_aligned_malloc(size_t size) { break; } #else - int result = posix_memalign(&aligned_memory, TENSOR_ALIGNMENT, size); + int result = posix_memalign(&aligned_memory, alignment, size); #endif if (result != 0) { // Handle allocation failure