remove unsed

This commit is contained in:
Eddie-Wang1120 2024-06-07 18:29:59 +08:00
parent 5e59660173
commit 2a01a7ce0d
3 changed files with 38 additions and 25 deletions

View File

@ -3729,9 +3729,6 @@ static inline __m128i get_scale_shuffle(int i) {
//====================================== I2 =============================================== //====================================== I2 ===============================================
void ggml_vec_dot_i2_q8_0(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) { void ggml_vec_dot_i2_q8_0(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) {
const int qk = QK8_0;
const int nb = n / qk;
const uint8_t * restrict x = vx; const uint8_t * restrict x = vx;
const int8_t * restrict y = vy; const int8_t * restrict y = vy;

32
ggml.c
View File

@ -1814,7 +1814,6 @@ inline static void ggml_vec_cpy_f32 (const int n, float * y, const float * x)
inline static void ggml_vec_neg_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = -x[i]; } inline static void ggml_vec_neg_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = -x[i]; }
inline static void ggml_vec_mul_f32 (const int n, float * z, const float * x, const float * y) { for (int i = 0; i < n; ++i) z[i] = x[i]*y[i]; } inline static void ggml_vec_mul_f32 (const int n, float * z, const float * x, const float * y) { for (int i = 0; i < n; ++i) z[i] = x[i]*y[i]; }
inline static void ggml_vec_div_f32 (const int n, float * z, const float * x, const float * y) { for (int i = 0; i < n; ++i) z[i] = x[i]/y[i]; } inline static void ggml_vec_div_f32 (const int n, float * z, const float * x, const float * y) { for (int i = 0; i < n; ++i) z[i] = x[i]/y[i]; }
inline static void ggml_vec_mul_f32_bitnet (const int n, float * y, const float x) { for (int i = 0; i < n; ++i) y[i] = y[i] * x; }
static void ggml_vec_dot_f32(int n, float * restrict s, size_t bs, const float * restrict x, size_t bx, const float * restrict y, size_t by, int nrc) { static void ggml_vec_dot_f32(int n, float * restrict s, size_t bs, const float * restrict x, size_t bx, const float * restrict y, size_t by, int nrc) {
assert(nrc == 1); assert(nrc == 1);
@ -12434,7 +12433,7 @@ static void ggml_compute_forward_mul_mat_one_chunk(
return; return;
} }
const void * wdata = (src1->type == vec_dot_type) ? src1->data : params->wdata; void * wdata = (src1->type == vec_dot_type) ? src1->data : params->wdata;
size_t row_size = ggml_row_size(vec_dot_type, ne10); size_t row_size = ggml_row_size(vec_dot_type, ne10);
if (src0->type == 31) { if (src0->type == 31) {
row_size = ne10; row_size = ne10;
@ -12455,6 +12454,16 @@ static void ggml_compute_forward_mul_mat_one_chunk(
uint8_t *i_weight = (uint8_t*) (src0->data); uint8_t *i_weight = (uint8_t*) (src0->data);
float * scale = (float * )((i_weight) + (ne00 * ne01 / 4)); float * scale = (float * )((i_weight) + (ne00 * ne01 / 4));
float * act_scales = (float*) ((char *) wdata + ((ne11*nb11) / 4)); float * act_scales = (float*) ((char *) wdata + ((ne11*nb11) / 4));
// printf("src0->name:%s\n", src0->name);
// printf("src1->name:%s\n", src1->name);
// printf("ne03:%ld\n", ne03);
// printf("ne02:%ld\n", ne02);
// printf("ne01:%ld\n", ne01);
// printf("ne00:%ld\n", ne00);
// printf("ne13:%ld\n", ne13);
// printf("ne12:%ld\n", ne12);
// printf("ne11:%ld\n", ne11);
// printf("ne10:%ld\n", ne10);
for (int64_t iir1 = ir1_start; iir1 < ir1_end; iir1 += blck_1) { for (int64_t iir1 = ir1_start; iir1 < ir1_end; iir1 += blck_1) {
for (int64_t iir0 = ir0_start; iir0 < ir0_end; iir0 += blck_0) { for (int64_t iir0 = ir0_start; iir0 < ir0_end; iir0 += blck_0) {
@ -12472,7 +12481,9 @@ static void ggml_compute_forward_mul_mat_one_chunk(
const int64_t i3 = i13; const int64_t i3 = i13;
const char * src0_row = (const char*)src0->data + (0 + i02 * nb02 + i03 * nb03); const char * src0_row = (const char*)src0->data + (0 + i02 * nb02 + i03 * nb03);
// if (src0->type == 31) {
// printf("src0->%ld\n", (0 + i02 * nb02 + i03 * nb03));
// }
// desc: when src1 is not a contiguous memory block we have to calculate the offset using the strides // desc: when src1 is not a contiguous memory block we have to calculate the offset using the strides
// if it is, then we have either copied the data to params->wdata and made it contiguous or we are using // if it is, then we have either copied the data to params->wdata and made it contiguous or we are using
// the original src1 data pointer, so we should index using the indices directly // the original src1 data pointer, so we should index using the indices directly
@ -12481,22 +12492,29 @@ static void ggml_compute_forward_mul_mat_one_chunk(
(src1_cont || src1->type != vec_dot_type (src1_cont || src1->type != vec_dot_type
? (i11 + i12 * ne11 + i13 * ne12 * ne11) * row_size ? (i11 + i12 * ne11 + i13 * ne12 * ne11) * row_size
: (i11 * nb11 + i12 * nb12 + i13 * nb13)); : (i11 * nb11 + i12 * nb12 + i13 * nb13));
// if (src0->type == 31) {
// printf("src1->%ld\n", (i11 + i12 * ne11 + i13 * ne12 * ne11) * row_size);
// }
float * dst_col = (float*)((char*)dst->data + (i1 * nb1 + i2 * nb2 + i3 * nb3)); float * dst_col = (float*)((char*)dst->data + (i1 * nb1 + i2 * nb2 + i3 * nb3));
//for (int64_t ir0 = iir0; ir0 < iir0 + blck_0 && ir0 < ir0_end; ++ir0) { //for (int64_t ir0 = iir0; ir0 < iir0 + blck_0 && ir0 < ir0_end; ++ir0) {
// vec_dot(ne00, &dst_col[ir0], src0_row + ir0*nb01, src1_col); // vec_dot(ne00, &dst_col[ir0], src0_row + ir0*nb01, src1_col);
//} //}
// if (src0->type == 31) {
// printf("dst->%ld\n", (i1 * nb1 + i2 * nb2 + i3 * nb3));
// }
for (int64_t ir0 = iir0; ir0 < iir0 + blck_0 && ir0 < ir0_end; ir0 += num_rows_per_vec_dot) { for (int64_t ir0 = iir0; ir0 < iir0 + blck_0 && ir0 < ir0_end; ir0 += num_rows_per_vec_dot) {
if (src0->type == 31) { if (src0->type == 31) {
// printf("row->%ld\n", (ir0 * nb01 / 4));
vec_dot(ne00, &tmp[ir0 - iir0], (num_rows_per_vec_dot > 1 ? 16 : 0), src0_row + ir0 * nb01 / 4, (num_rows_per_vec_dot > 1 ? nb01 : 0), src1_col, (num_rows_per_vec_dot > 1 ? src1_col_stride : 0), num_rows_per_vec_dot); vec_dot(ne00, &tmp[ir0 - iir0], (num_rows_per_vec_dot > 1 ? 16 : 0), src0_row + ir0 * nb01 / 4, (num_rows_per_vec_dot > 1 ? nb01 : 0), src1_col, (num_rows_per_vec_dot > 1 ? src1_col_stride : 0), num_rows_per_vec_dot);
tmp[ir0 - iir0] = tmp[ir0 - iir0] * (*scale) * (act_scales[i11]); tmp[ir0 - iir0] = tmp[ir0 - iir0] / (act_scales[i11]) * (*scale);
} else { } else {
vec_dot(ne00, &tmp[ir0 - iir0], (num_rows_per_vec_dot > 1 ? 16 : 0), src0_row + ir0 * nb01, (num_rows_per_vec_dot > 1 ? nb01 : 0), src1_col, (num_rows_per_vec_dot > 1 ? src1_col_stride : 0), num_rows_per_vec_dot); vec_dot(ne00, &tmp[ir0 - iir0], (num_rows_per_vec_dot > 1 ? 16 : 0), src0_row + ir0 * nb01, (num_rows_per_vec_dot > 1 ? nb01 : 0), src1_col, (num_rows_per_vec_dot > 1 ? src1_col_stride : 0), num_rows_per_vec_dot);
} }
} }
// printf("num_rows_per_vec_dot->%ld\n", num_rows_per_vec_dot);
// printf("iir0->%ld\n", iir0);
for (int cn = 0; cn < num_rows_per_vec_dot; ++cn) { for (int cn = 0; cn < num_rows_per_vec_dot; ++cn) {
memcpy(&dst_col[iir0 + cn * nb1 / nb0], tmp + (cn * 16), (MIN(iir0 + blck_0, ir0_end) - iir0) * sizeof(float)); memcpy(&dst_col[iir0 + cn * nb1 / nb0], tmp + (cn * 16), (MIN(iir0 + blck_0, ir0_end) - iir0) * sizeof(float));
} }
@ -12561,7 +12579,7 @@ static void ggml_compute_forward_bitnet_mul_mat(
float rowmax = 0.00001; float rowmax = 0.00001;
ggml_vec_absmaxclamp_f32(ne10, &rowmax, (float *) ((char *) src1->data + i11*nb11 + i12*nb12 + i13*nb13), 0.00001); ggml_vec_absmaxclamp_f32(ne10, &rowmax, (float *) ((char *) src1->data + i11*nb11 + i12*nb12 + i13*nb13), 0.00001);
float s = 127 / rowmax; float s = 127 / rowmax;
act_scales[i11] = 1/s; act_scales[i11] = s;
ggml_vec_scaleroundclamp_f32_v2(ne10, ggml_vec_scaleroundclamp_f32_v2(ne10,
(float *) ((char *) src1->data + i11*nb11 + i12*nb12 + i13*nb13), (float *) ((char *) src1->data + i11*nb11 + i12*nb12 + i13*nb13),
(int8_t*) ((char *) wdata + ((i11*nb11 + i12*nb12 + i13*nb13) / 4)), (int8_t*) ((char *) wdata + ((i11*nb11 + i12*nb12 + i13*nb13) / 4)),

View File

@ -3192,9 +3192,7 @@ struct llama_model_loader {
llama_tensor_weight(const llama_file * file, uint16_t idx, const char * name, const struct gguf_context * gguf_ctx, ggml_tensor * tensor) : idx(idx), tensor(tensor) { llama_tensor_weight(const llama_file * file, uint16_t idx, const char * name, const struct gguf_context * gguf_ctx, ggml_tensor * tensor) : idx(idx), tensor(tensor) {
const int tensor_idx = gguf_find_tensor(gguf_ctx, name); const int tensor_idx = gguf_find_tensor(gguf_ctx, name);
printf("name:%s\n", name);
offs = gguf_get_data_offset(gguf_ctx) + gguf_get_tensor_offset(gguf_ctx, tensor_idx); offs = gguf_get_data_offset(gguf_ctx) + gguf_get_tensor_offset(gguf_ctx, tensor_idx);
printf("offs:%ld\n", offs + ggml_nbytes(tensor));
if (offs + ggml_nbytes(tensor) < offs || offs + ggml_nbytes(tensor) > file->size) { if (offs + ggml_nbytes(tensor) < offs || offs + ggml_nbytes(tensor) > file->size) {
throw std::runtime_error(format("tensor '%s' data is not within the file bounds, model is corrupted or incomplete", name)); throw std::runtime_error(format("tensor '%s' data is not within the file bounds, model is corrupted or incomplete", name));
} }