mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-13 05:42:22 +01:00
remove unsed
This commit is contained in:
parent
5e59660173
commit
2a01a7ce0d
@ -3729,9 +3729,6 @@ static inline __m128i get_scale_shuffle(int i) {
|
||||
//====================================== I2 ===============================================
|
||||
|
||||
void ggml_vec_dot_i2_q8_0(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) {
|
||||
const int qk = QK8_0;
|
||||
const int nb = n / qk;
|
||||
|
||||
const uint8_t * restrict x = vx;
|
||||
const int8_t * restrict y = vy;
|
||||
|
||||
|
36
ggml.c
36
ggml.c
@ -1814,7 +1814,6 @@ inline static void ggml_vec_cpy_f32 (const int n, float * y, const float * x)
|
||||
inline static void ggml_vec_neg_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = -x[i]; }
|
||||
inline static void ggml_vec_mul_f32 (const int n, float * z, const float * x, const float * y) { for (int i = 0; i < n; ++i) z[i] = x[i]*y[i]; }
|
||||
inline static void ggml_vec_div_f32 (const int n, float * z, const float * x, const float * y) { for (int i = 0; i < n; ++i) z[i] = x[i]/y[i]; }
|
||||
inline static void ggml_vec_mul_f32_bitnet (const int n, float * y, const float x) { for (int i = 0; i < n; ++i) y[i] = y[i] * x; }
|
||||
|
||||
static void ggml_vec_dot_f32(int n, float * restrict s, size_t bs, const float * restrict x, size_t bx, const float * restrict y, size_t by, int nrc) {
|
||||
assert(nrc == 1);
|
||||
@ -12434,7 +12433,7 @@ static void ggml_compute_forward_mul_mat_one_chunk(
|
||||
return;
|
||||
}
|
||||
|
||||
const void * wdata = (src1->type == vec_dot_type) ? src1->data : params->wdata;
|
||||
void * wdata = (src1->type == vec_dot_type) ? src1->data : params->wdata;
|
||||
size_t row_size = ggml_row_size(vec_dot_type, ne10);
|
||||
if (src0->type == 31) {
|
||||
row_size = ne10;
|
||||
@ -12454,7 +12453,17 @@ static void ggml_compute_forward_mul_mat_one_chunk(
|
||||
float tmp[32];
|
||||
uint8_t *i_weight = (uint8_t*) (src0->data);
|
||||
float * scale = (float * )((i_weight) + (ne00 * ne01 / 4));
|
||||
float* act_scales = (float*) ((char *) wdata + ((ne11*nb11) / 4));
|
||||
float * act_scales = (float*) ((char *) wdata + ((ne11*nb11) / 4));
|
||||
// printf("src0->name:%s\n", src0->name);
|
||||
// printf("src1->name:%s\n", src1->name);
|
||||
// printf("ne03:%ld\n", ne03);
|
||||
// printf("ne02:%ld\n", ne02);
|
||||
// printf("ne01:%ld\n", ne01);
|
||||
// printf("ne00:%ld\n", ne00);
|
||||
// printf("ne13:%ld\n", ne13);
|
||||
// printf("ne12:%ld\n", ne12);
|
||||
// printf("ne11:%ld\n", ne11);
|
||||
// printf("ne10:%ld\n", ne10);
|
||||
|
||||
for (int64_t iir1 = ir1_start; iir1 < ir1_end; iir1 += blck_1) {
|
||||
for (int64_t iir0 = ir0_start; iir0 < ir0_end; iir0 += blck_0) {
|
||||
@ -12472,7 +12481,9 @@ static void ggml_compute_forward_mul_mat_one_chunk(
|
||||
const int64_t i3 = i13;
|
||||
|
||||
const char * src0_row = (const char*)src0->data + (0 + i02 * nb02 + i03 * nb03);
|
||||
|
||||
// if (src0->type == 31) {
|
||||
// printf("src0->%ld\n", (0 + i02 * nb02 + i03 * nb03));
|
||||
// }
|
||||
// desc: when src1 is not a contiguous memory block we have to calculate the offset using the strides
|
||||
// if it is, then we have either copied the data to params->wdata and made it contiguous or we are using
|
||||
// the original src1 data pointer, so we should index using the indices directly
|
||||
@ -12481,22 +12492,29 @@ static void ggml_compute_forward_mul_mat_one_chunk(
|
||||
(src1_cont || src1->type != vec_dot_type
|
||||
? (i11 + i12 * ne11 + i13 * ne12 * ne11) * row_size
|
||||
: (i11 * nb11 + i12 * nb12 + i13 * nb13));
|
||||
// if (src0->type == 31) {
|
||||
// printf("src1->%ld\n", (i11 + i12 * ne11 + i13 * ne12 * ne11) * row_size);
|
||||
// }
|
||||
float * dst_col = (float*)((char*)dst->data + (i1 * nb1 + i2 * nb2 + i3 * nb3));
|
||||
|
||||
//for (int64_t ir0 = iir0; ir0 < iir0 + blck_0 && ir0 < ir0_end; ++ir0) {
|
||||
// vec_dot(ne00, &dst_col[ir0], src0_row + ir0*nb01, src1_col);
|
||||
//}
|
||||
|
||||
// if (src0->type == 31) {
|
||||
// printf("dst->%ld\n", (i1 * nb1 + i2 * nb2 + i3 * nb3));
|
||||
// }
|
||||
for (int64_t ir0 = iir0; ir0 < iir0 + blck_0 && ir0 < ir0_end; ir0 += num_rows_per_vec_dot) {
|
||||
if (src0->type == 31) {
|
||||
// printf("row->%ld\n", (ir0 * nb01 / 4));
|
||||
vec_dot(ne00, &tmp[ir0 - iir0], (num_rows_per_vec_dot > 1 ? 16 : 0), src0_row + ir0 * nb01 / 4, (num_rows_per_vec_dot > 1 ? nb01 : 0), src1_col, (num_rows_per_vec_dot > 1 ? src1_col_stride : 0), num_rows_per_vec_dot);
|
||||
tmp[ir0 - iir0] = tmp[ir0 - iir0] * (*scale) * (act_scales[i11]);
|
||||
}else {
|
||||
tmp[ir0 - iir0] = tmp[ir0 - iir0] / (act_scales[i11]) * (*scale);
|
||||
} else {
|
||||
vec_dot(ne00, &tmp[ir0 - iir0], (num_rows_per_vec_dot > 1 ? 16 : 0), src0_row + ir0 * nb01, (num_rows_per_vec_dot > 1 ? nb01 : 0), src1_col, (num_rows_per_vec_dot > 1 ? src1_col_stride : 0), num_rows_per_vec_dot);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// printf("num_rows_per_vec_dot->%ld\n", num_rows_per_vec_dot);
|
||||
// printf("iir0->%ld\n", iir0);
|
||||
for (int cn = 0; cn < num_rows_per_vec_dot; ++cn) {
|
||||
memcpy(&dst_col[iir0 + cn * nb1 / nb0], tmp + (cn * 16), (MIN(iir0 + blck_0, ir0_end) - iir0) * sizeof(float));
|
||||
}
|
||||
@ -12561,7 +12579,7 @@ static void ggml_compute_forward_bitnet_mul_mat(
|
||||
float rowmax = 0.00001;
|
||||
ggml_vec_absmaxclamp_f32(ne10, &rowmax, (float *) ((char *) src1->data + i11*nb11 + i12*nb12 + i13*nb13), 0.00001);
|
||||
float s = 127 / rowmax;
|
||||
act_scales[i11] = 1/s;
|
||||
act_scales[i11] = s;
|
||||
ggml_vec_scaleroundclamp_f32_v2(ne10,
|
||||
(float *) ((char *) src1->data + i11*nb11 + i12*nb12 + i13*nb13),
|
||||
(int8_t*) ((char *) wdata + ((i11*nb11 + i12*nb12 + i13*nb13) / 4)),
|
||||
|
@ -3192,9 +3192,7 @@ struct llama_model_loader {
|
||||
|
||||
llama_tensor_weight(const llama_file * file, uint16_t idx, const char * name, const struct gguf_context * gguf_ctx, ggml_tensor * tensor) : idx(idx), tensor(tensor) {
|
||||
const int tensor_idx = gguf_find_tensor(gguf_ctx, name);
|
||||
printf("name:%s\n", name);
|
||||
offs = gguf_get_data_offset(gguf_ctx) + gguf_get_tensor_offset(gguf_ctx, tensor_idx);
|
||||
printf("offs:%ld\n", offs + ggml_nbytes(tensor));
|
||||
if (offs + ggml_nbytes(tensor) < offs || offs + ggml_nbytes(tensor) > file->size) {
|
||||
throw std::runtime_error(format("tensor '%s' data is not within the file bounds, model is corrupted or incomplete", name));
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user