mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-26 03:12:23 +01:00
parent
c50e400163
commit
cafcd4f895
@ -71,7 +71,7 @@ void free_random_uniform_distribution(struct random_uniform_distribution * rnd)
|
|||||||
|
|
||||||
struct ggml_tensor * randomize_tensor_normal(struct ggml_tensor * tensor, struct random_normal_distribution * rnd) {
|
struct ggml_tensor * randomize_tensor_normal(struct ggml_tensor * tensor, struct random_normal_distribution * rnd) {
|
||||||
float scale = 1.0f; // xavier
|
float scale = 1.0f; // xavier
|
||||||
switch (tensor->n_dims) {
|
switch (ggml_n_dims(tensor)) {
|
||||||
case 1:
|
case 1:
|
||||||
scale /= sqrtf((float) tensor->ne[0]);
|
scale /= sqrtf((float) tensor->ne[0]);
|
||||||
for (int i0 = 0; i0 < tensor->ne[0]; i0++) {
|
for (int i0 = 0; i0 < tensor->ne[0]; i0++) {
|
||||||
@ -119,7 +119,7 @@ struct ggml_tensor * randomize_tensor_normal(struct ggml_tensor * tensor, struct
|
|||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor * randomize_tensor_uniform(struct ggml_tensor * tensor, struct random_uniform_distribution * rnd) {
|
struct ggml_tensor * randomize_tensor_uniform(struct ggml_tensor * tensor, struct random_uniform_distribution * rnd) {
|
||||||
switch (tensor->n_dims) {
|
switch (ggml_n_dims(tensor)) {
|
||||||
case 1:
|
case 1:
|
||||||
for (int i0 = 0; i0 < tensor->ne[0]; i0++) {
|
for (int i0 = 0; i0 < tensor->ne[0]; i0++) {
|
||||||
float * dst = (float *) ((char *) tensor->data + i0*tensor->nb[0]);
|
float * dst = (float *) ((char *) tensor->data + i0*tensor->nb[0]);
|
||||||
@ -183,25 +183,27 @@ float fclamp(const float v, const float min, const float max) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void assert_shape_1d(struct ggml_tensor * tensor, int64_t ne0) {
|
void assert_shape_1d(struct ggml_tensor * tensor, int64_t ne0) {
|
||||||
GGML_ASSERT(tensor->n_dims == 1);
|
|
||||||
GGML_ASSERT(tensor->ne[0] == ne0);
|
GGML_ASSERT(tensor->ne[0] == ne0);
|
||||||
|
GGML_ASSERT(tensor->ne[1] == 1);
|
||||||
|
GGML_ASSERT(tensor->ne[2] == 1);
|
||||||
|
GGML_ASSERT(tensor->ne[3] == 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void assert_shape_2d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1) {
|
void assert_shape_2d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1) {
|
||||||
GGML_ASSERT(tensor->n_dims == 2);
|
|
||||||
GGML_ASSERT(tensor->ne[0] == ne0);
|
GGML_ASSERT(tensor->ne[0] == ne0);
|
||||||
GGML_ASSERT(tensor->ne[1] == ne1);
|
GGML_ASSERT(tensor->ne[1] == ne1);
|
||||||
|
GGML_ASSERT(tensor->ne[2] == 1);
|
||||||
|
GGML_ASSERT(tensor->ne[3] == 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void assert_shape_3d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1, int64_t ne2) {
|
void assert_shape_3d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1, int64_t ne2) {
|
||||||
GGML_ASSERT(tensor->n_dims == 3);
|
|
||||||
GGML_ASSERT(tensor->ne[0] == ne0);
|
GGML_ASSERT(tensor->ne[0] == ne0);
|
||||||
GGML_ASSERT(tensor->ne[1] == ne1);
|
GGML_ASSERT(tensor->ne[1] == ne1);
|
||||||
GGML_ASSERT(tensor->ne[2] == ne2);
|
GGML_ASSERT(tensor->ne[2] == ne2);
|
||||||
|
GGML_ASSERT(tensor->ne[3] == 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void assert_shape_4d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1, int64_t ne2, int64_t ne3) {
|
void assert_shape_4d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1, int64_t ne2, int64_t ne3) {
|
||||||
GGML_ASSERT(tensor->n_dims == 4);
|
|
||||||
GGML_ASSERT(tensor->ne[0] == ne0);
|
GGML_ASSERT(tensor->ne[0] == ne0);
|
||||||
GGML_ASSERT(tensor->ne[1] == ne1);
|
GGML_ASSERT(tensor->ne[1] == ne1);
|
||||||
GGML_ASSERT(tensor->ne[2] == ne2);
|
GGML_ASSERT(tensor->ne[2] == ne2);
|
||||||
@ -225,8 +227,8 @@ int64_t get_example_targets_batch(
|
|||||||
bool sample_random_offsets
|
bool sample_random_offsets
|
||||||
) {
|
) {
|
||||||
GGML_ASSERT(samples_count > 0);
|
GGML_ASSERT(samples_count > 0);
|
||||||
GGML_ASSERT(tokens_input->n_dims == 2);
|
GGML_ASSERT(ggml_is_matrix(tokens_input));
|
||||||
GGML_ASSERT(target_probs->n_dims == 3);
|
GGML_ASSERT(ggml_is_3d(target_probs));
|
||||||
int64_t n_vocab = target_probs->ne[0];
|
int64_t n_vocab = target_probs->ne[0];
|
||||||
int64_t n_tokens = tokens_input->ne[0];
|
int64_t n_tokens = tokens_input->ne[0];
|
||||||
int64_t n_batch = tokens_input->ne[1];
|
int64_t n_batch = tokens_input->ne[1];
|
||||||
|
@ -1258,9 +1258,9 @@ static struct ggml_tensor * forward_lora(
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void sample_softmax(struct ggml_tensor * logits, struct ggml_tensor * probs, struct ggml_tensor * best_samples) {
|
static void sample_softmax(struct ggml_tensor * logits, struct ggml_tensor * probs, struct ggml_tensor * best_samples) {
|
||||||
assert(logits->n_dims == 2);
|
assert(ggml_is_matrix(logits));
|
||||||
assert(probs->n_dims == 2);
|
assert(ggml_is_matrix(probs));
|
||||||
assert(best_samples->n_dims == 1);
|
assert(ggml_is_vector(best_samples));
|
||||||
assert(logits->ne[1] == best_samples->ne[0]);
|
assert(logits->ne[1] == best_samples->ne[0]);
|
||||||
assert(logits->ne[0] == probs->ne[0]);
|
assert(logits->ne[0] == probs->ne[0]);
|
||||||
assert(logits->ne[1] == probs->ne[1]);
|
assert(logits->ne[1] == probs->ne[1]);
|
||||||
@ -1292,9 +1292,9 @@ static void sample_softmax_batch(
|
|||||||
struct ggml_context * ctx, struct ggml_tensor * logits, struct ggml_tensor * probs,
|
struct ggml_context * ctx, struct ggml_tensor * logits, struct ggml_tensor * probs,
|
||||||
struct ggml_tensor * best_samples
|
struct ggml_tensor * best_samples
|
||||||
) {
|
) {
|
||||||
GGML_ASSERT(best_samples->n_dims == 2);
|
GGML_ASSERT(ggml_is_matrix(best_samples));
|
||||||
GGML_ASSERT(logits->n_dims == 3);
|
GGML_ASSERT(ggml_is_3d(logits));
|
||||||
GGML_ASSERT(probs->n_dims == 3);
|
GGML_ASSERT(ggml_is_3d(probs));
|
||||||
int n_tokens = best_samples->ne[0];
|
int n_tokens = best_samples->ne[0];
|
||||||
int n_batch = best_samples->ne[1];
|
int n_batch = best_samples->ne[1];
|
||||||
int n_vocab = logits->ne[0];
|
int n_vocab = logits->ne[0];
|
||||||
@ -1334,7 +1334,7 @@ static void print_row(struct ggml_tensor * probs, int i) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void print_matrix(struct ggml_tensor * probs) {
|
static void print_matrix(struct ggml_tensor * probs) {
|
||||||
assert(probs->n_dims == 2);
|
assert(ggml_is_matrix(probs));
|
||||||
for (int i = 0; i < probs->ne[1]; ++i) {
|
for (int i = 0; i < probs->ne[1]; ++i) {
|
||||||
for (int k = 0; k < probs->ne[0]; ++k) {
|
for (int k = 0; k < probs->ne[0]; ++k) {
|
||||||
float p = ggml_get_f32_1d(probs, i*probs->ne[0] + k);
|
float p = ggml_get_f32_1d(probs, i*probs->ne[0] + k);
|
||||||
@ -1386,8 +1386,8 @@ static void get_example_targets(int example_id, struct ggml_tensor * tokens_inpu
|
|||||||
static void get_example_targets_batch(
|
static void get_example_targets_batch(
|
||||||
struct ggml_context * ctx, int example_id, struct ggml_tensor * tokens_input, struct ggml_tensor * targets
|
struct ggml_context * ctx, int example_id, struct ggml_tensor * tokens_input, struct ggml_tensor * targets
|
||||||
) {
|
) {
|
||||||
GGML_ASSERT(tokens_input->n_dims == 2);
|
GGML_ASSERT(ggml_is_matrix(tokens_input));
|
||||||
GGML_ASSERT( targets->n_dims == 3);
|
GGML_ASSERT(ggml_is_3d(targets));
|
||||||
int n_tokens = tokens_input->ne[0];
|
int n_tokens = tokens_input->ne[0];
|
||||||
int n_batch = tokens_input->ne[1];
|
int n_batch = tokens_input->ne[1];
|
||||||
GGML_ASSERT(n_tokens == targets->ne[1]);
|
GGML_ASSERT(n_tokens == targets->ne[1]);
|
||||||
|
@ -427,7 +427,7 @@ static void print_row(struct ggml_tensor * probs, int i) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void print_matrix(struct ggml_tensor * probs) {
|
static void print_matrix(struct ggml_tensor * probs) {
|
||||||
assert(probs->n_dims == 2);
|
assert(ggml_is_matrix(probs));
|
||||||
for (int i = 0; i < probs->ne[1]; ++i) {
|
for (int i = 0; i < probs->ne[1]; ++i) {
|
||||||
for (int k = 0; k < probs->ne[0]; ++k) {
|
for (int k = 0; k < probs->ne[0]; ++k) {
|
||||||
float p = get_f32_2d(probs, k, i);
|
float p = get_f32_2d(probs, k, i);
|
||||||
@ -639,7 +639,7 @@ static void load_vocab(const char *filename, Config *config, struct llama_vocab
|
|||||||
|
|
||||||
static void convert_weights_ak_to_gg(struct ggml_tensor * gg_weights, const float * karpathy_weights) {
|
static void convert_weights_ak_to_gg(struct ggml_tensor * gg_weights, const float * karpathy_weights) {
|
||||||
int ct;
|
int ct;
|
||||||
switch (gg_weights->n_dims){
|
switch (ggml_n_dims(gg_weights)) {
|
||||||
case 1:
|
case 1:
|
||||||
ct = 0;
|
ct = 0;
|
||||||
for (int i0 = 0; i0 < gg_weights->ne[0]; i0++){
|
for (int i0 = 0; i0 < gg_weights->ne[0]; i0++){
|
||||||
|
@ -1110,7 +1110,7 @@ static void write_tensor(struct llama_file * file, struct ggml_tensor * tensor,
|
|||||||
name = ggml_get_name(tensor);
|
name = ggml_get_name(tensor);
|
||||||
}
|
}
|
||||||
uint32_t name_len = strlen(name);
|
uint32_t name_len = strlen(name);
|
||||||
uint32_t nd = tensor->n_dims;
|
uint32_t nd = ggml_n_dims(tensor);
|
||||||
uint32_t ne[4] = { (uint32_t)tensor->ne[0],
|
uint32_t ne[4] = { (uint32_t)tensor->ne[0],
|
||||||
(uint32_t)tensor->ne[1],
|
(uint32_t)tensor->ne[1],
|
||||||
(uint32_t)tensor->ne[2],
|
(uint32_t)tensor->ne[2],
|
||||||
|
@ -195,7 +195,7 @@ static bool gguf_ex_read_1(const std::string & fname) {
|
|||||||
|
|
||||||
struct ggml_tensor * cur = ggml_get_tensor(ctx_data, name);
|
struct ggml_tensor * cur = ggml_get_tensor(ctx_data, name);
|
||||||
|
|
||||||
printf("%s: tensor[%d]: n_dims = %d, name = %s, data = %p\n", __func__, i, cur->n_dims, cur->name, cur->data);
|
printf("%s: tensor[%d]: n_dims = %d, name = %s, data = %p\n", __func__, i, ggml_n_dims(cur), cur->name, cur->data);
|
||||||
|
|
||||||
// print first 10 elements
|
// print first 10 elements
|
||||||
const float * data = (const float *) cur->data;
|
const float * data = (const float *) cur->data;
|
||||||
|
@ -514,7 +514,7 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
|
|||||||
ctx_size += padded_size;
|
ctx_size += padded_size;
|
||||||
if (verbosity >= 3) {
|
if (verbosity >= 3) {
|
||||||
printf("%s: tensor[%d]: n_dims = %d, name = %s, tensor_size=%zu, padded_size=%zu, offset=%zu\n", __func__, i,
|
printf("%s: tensor[%d]: n_dims = %d, name = %s, tensor_size=%zu, padded_size=%zu, offset=%zu\n", __func__, i,
|
||||||
cur->n_dims, cur->name, tensor_size, padded_size, offset);
|
ggml_n_dims(cur), cur->name, tensor_size, padded_size, offset);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -962,7 +962,7 @@ bool clip_model_quantize(const char * fname_inp, const char * fname_out, const i
|
|||||||
}
|
}
|
||||||
|
|
||||||
// quantize only 2D tensors
|
// quantize only 2D tensors
|
||||||
quantize &= (cur->n_dims == 2);
|
quantize &= (ggml_n_dims(cur) == 2);
|
||||||
|
|
||||||
if (quantize) {
|
if (quantize) {
|
||||||
new_type = type;
|
new_type = type;
|
||||||
@ -1035,7 +1035,7 @@ bool clip_model_quantize(const char * fname_inp, const char * fname_out, const i
|
|||||||
fout.put(0);
|
fout.put(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
printf("%s: n_dims = %d | quantize=%d | size = %f MB -> %f MB\n", name.c_str(), cur->n_dims, quantize,
|
printf("%s: n_dims = %d | quantize=%d | size = %f MB -> %f MB\n", name.c_str(), ggml_n_dims(cur), quantize,
|
||||||
orig_size / 1024.0 / 1024.0, new_size / 1024.0 / 1024.0);
|
orig_size / 1024.0 / 1024.0, new_size / 1024.0 / 1024.0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
94
ggml.c
94
ggml.c
@ -2054,24 +2054,37 @@ size_t ggml_element_size(const struct ggml_tensor * tensor) {
|
|||||||
return ggml_type_size(tensor->type);
|
return ggml_type_size(tensor->type);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool ggml_is_scalar(const struct ggml_tensor * tensor) {
|
bool ggml_is_scalar(const struct ggml_tensor * tensor) {
|
||||||
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
|
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
|
||||||
|
|
||||||
return tensor->ne[0] == 1 && tensor->ne[1] == 1 && tensor->ne[2] == 1 && tensor->ne[3] == 1;
|
return tensor->ne[0] == 1 && tensor->ne[1] == 1 && tensor->ne[2] == 1 && tensor->ne[3] == 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool ggml_is_vector(const struct ggml_tensor * tensor) {
|
bool ggml_is_vector(const struct ggml_tensor * tensor) {
|
||||||
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
|
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
|
||||||
|
|
||||||
return tensor->ne[1] == 1 && tensor->ne[2] == 1 && tensor->ne[3] == 1;
|
return tensor->ne[1] == 1 && tensor->ne[2] == 1 && tensor->ne[3] == 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool ggml_is_matrix(const struct ggml_tensor * tensor) {
|
bool ggml_is_matrix(const struct ggml_tensor * tensor) {
|
||||||
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
|
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
|
||||||
|
|
||||||
return tensor->ne[2] == 1 && tensor->ne[3] == 1;
|
return tensor->ne[2] == 1 && tensor->ne[3] == 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool ggml_is_3d(const struct ggml_tensor * tensor) {
|
||||||
|
return tensor->ne[3] == 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int ggml_n_dims(const struct ggml_tensor * tensor) {
|
||||||
|
for (int i = GGML_MAX_DIMS - 1; i >= 1; --i) {
|
||||||
|
if (tensor->ne[i] > 1) {
|
||||||
|
return i + 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
static inline bool ggml_can_mul_mat(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
|
static inline bool ggml_can_mul_mat(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
|
||||||
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
|
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
|
||||||
|
|
||||||
@ -2521,7 +2534,6 @@ static struct ggml_tensor * ggml_new_tensor_impl(
|
|||||||
/*.type =*/ type,
|
/*.type =*/ type,
|
||||||
/*.backend =*/ GGML_BACKEND_CPU,
|
/*.backend =*/ GGML_BACKEND_CPU,
|
||||||
/*.buffer =*/ NULL,
|
/*.buffer =*/ NULL,
|
||||||
/*.n_dims =*/ n_dims,
|
|
||||||
/*.ne =*/ { 1, 1, 1, 1 },
|
/*.ne =*/ { 1, 1, 1, 1 },
|
||||||
/*.nb =*/ { 0, 0, 0, 0 },
|
/*.nb =*/ { 0, 0, 0, 0 },
|
||||||
/*.op =*/ GGML_OP_NONE,
|
/*.op =*/ GGML_OP_NONE,
|
||||||
@ -2628,7 +2640,7 @@ struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor * ggml_dup_tensor(struct ggml_context * ctx, const struct ggml_tensor * src) {
|
struct ggml_tensor * ggml_dup_tensor(struct ggml_context * ctx, const struct ggml_tensor * src) {
|
||||||
return ggml_new_tensor(ctx, src->type, src->n_dims, src->ne);
|
return ggml_new_tensor(ctx, src->type, GGML_MAX_DIMS, src->ne);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_set_op_params(struct ggml_tensor * tensor, const void * params, size_t params_size) {
|
static void ggml_set_op_params(struct ggml_tensor * tensor, const void * params, size_t params_size) {
|
||||||
@ -3077,7 +3089,7 @@ struct ggml_tensor * ggml_format_name(struct ggml_tensor * tensor, const char *
|
|||||||
struct ggml_tensor * ggml_view_tensor(
|
struct ggml_tensor * ggml_view_tensor(
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
struct ggml_tensor * src) {
|
struct ggml_tensor * src) {
|
||||||
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, src->type, src->n_dims, src->ne, src, 0);
|
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, src->type, GGML_MAX_DIMS, src->ne, src, 0);
|
||||||
ggml_format_name(result, "%s (view)", src->name);
|
ggml_format_name(result, "%s (view)", src->name);
|
||||||
|
|
||||||
for (int i = 0; i < GGML_MAX_DIMS; i++) {
|
for (int i = 0; i < GGML_MAX_DIMS; i++) {
|
||||||
@ -3235,10 +3247,10 @@ static struct ggml_tensor * ggml_add_cast_impl(
|
|||||||
is_node = true;
|
is_node = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor * result = ggml_new_tensor(ctx, type, a->n_dims, a->ne);
|
struct ggml_tensor * result = ggml_new_tensor(ctx, type, GGML_MAX_DIMS, a->ne);
|
||||||
|
|
||||||
result->op = GGML_OP_ADD;
|
result->op = GGML_OP_ADD;
|
||||||
result->grad = is_node ? ggml_new_tensor(ctx, GGML_TYPE_F32, a->n_dims, a->ne) : NULL;
|
result->grad = is_node ? ggml_new_tensor(ctx, GGML_TYPE_F32, GGML_MAX_DIMS, a->ne) : NULL;
|
||||||
result->src[0] = a;
|
result->src[0] = a;
|
||||||
result->src[1] = b;
|
result->src[1] = b;
|
||||||
|
|
||||||
@ -3607,12 +3619,12 @@ struct ggml_tensor * ggml_sum_rows(
|
|||||||
is_node = true;
|
is_node = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
int64_t ne[4] = {1,1,1,1};
|
int64_t ne[GGML_MAX_DIMS] = { 1 };
|
||||||
for (int i=1; i<a->n_dims; ++i) {
|
for (int i = 1; i < GGML_MAX_DIMS; ++i) {
|
||||||
ne[i] = a->ne[i];
|
ne[i] = a->ne[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, a->n_dims, ne);
|
struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, ne);
|
||||||
|
|
||||||
result->op = GGML_OP_SUM_ROWS;
|
result->op = GGML_OP_SUM_ROWS;
|
||||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||||
@ -3633,8 +3645,8 @@ struct ggml_tensor * ggml_mean(
|
|||||||
is_node = true;
|
is_node = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
int64_t ne[GGML_MAX_DIMS] = { 1, a->ne[1], a->ne[2], a->ne[3] };
|
int64_t ne[4] = { 1, a->ne[1], a->ne[2], a->ne[3] };
|
||||||
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, a->n_dims, ne);
|
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
||||||
|
|
||||||
result->op = GGML_OP_MEAN;
|
result->op = GGML_OP_MEAN;
|
||||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||||
@ -3656,8 +3668,7 @@ struct ggml_tensor * ggml_argmax(
|
|||||||
is_node = true;
|
is_node = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
int64_t ne[GGML_MAX_DIMS] = { a->ne[1], 1, 1, 1 };
|
struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, a->ne[1]);
|
||||||
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_I32, a->n_dims, ne);
|
|
||||||
|
|
||||||
result->op = GGML_OP_ARGMAX;
|
result->op = GGML_OP_ARGMAX;
|
||||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||||
@ -3680,7 +3691,7 @@ struct ggml_tensor * ggml_repeat(
|
|||||||
is_node = true;
|
is_node = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, b->n_dims, b->ne);
|
struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, b->ne);
|
||||||
|
|
||||||
result->op = GGML_OP_REPEAT;
|
result->op = GGML_OP_REPEAT;
|
||||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||||
@ -3707,7 +3718,7 @@ struct ggml_tensor * ggml_repeat_back(
|
|||||||
return a;
|
return a;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, b->n_dims, b->ne);
|
struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, b->ne);
|
||||||
|
|
||||||
result->op = GGML_OP_REPEAT_BACK;
|
result->op = GGML_OP_REPEAT_BACK;
|
||||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||||
@ -4083,7 +4094,7 @@ struct ggml_tensor * ggml_mul_mat(
|
|||||||
}
|
}
|
||||||
|
|
||||||
const int64_t ne[4] = { a->ne[1], b->ne[1], b->ne[2], b->ne[3] };
|
const int64_t ne[4] = { a->ne[1], b->ne[1], b->ne[2], b->ne[3] };
|
||||||
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, MAX(a->n_dims, b->n_dims), ne);
|
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
||||||
|
|
||||||
result->op = GGML_OP_MUL_MAT;
|
result->op = GGML_OP_MUL_MAT;
|
||||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||||
@ -4117,7 +4128,7 @@ struct ggml_tensor * ggml_mul_mat_id(
|
|||||||
}
|
}
|
||||||
|
|
||||||
const int64_t ne[4] = { as[0]->ne[1], b->ne[1], b->ne[2], b->ne[3] };
|
const int64_t ne[4] = { as[0]->ne[1], b->ne[1], b->ne[2], b->ne[3] };
|
||||||
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, MAX(as[0]->n_dims, b->n_dims), ne);
|
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
||||||
|
|
||||||
ggml_set_op_params_i32(result, 0, id);
|
ggml_set_op_params_i32(result, 0, id);
|
||||||
ggml_set_op_params_i32(result, 1, n_as);
|
ggml_set_op_params_i32(result, 1, n_as);
|
||||||
@ -4155,7 +4166,7 @@ struct ggml_tensor * ggml_out_prod(
|
|||||||
|
|
||||||
// a is broadcastable to b for ne[2] and ne[3] -> use b->ne[2] and b->ne[3]
|
// a is broadcastable to b for ne[2] and ne[3] -> use b->ne[2] and b->ne[3]
|
||||||
const int64_t ne[4] = { a->ne[0], b->ne[0], b->ne[2], b->ne[3] };
|
const int64_t ne[4] = { a->ne[0], b->ne[0], b->ne[2], b->ne[3] };
|
||||||
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, MAX(a->n_dims, b->n_dims), ne);
|
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
||||||
|
|
||||||
result->op = GGML_OP_OUT_PROD;
|
result->op = GGML_OP_OUT_PROD;
|
||||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||||
@ -4440,7 +4451,7 @@ struct ggml_tensor * ggml_reshape(
|
|||||||
//GGML_ASSERT(false);
|
//GGML_ASSERT(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, b->n_dims, b->ne, a, 0);
|
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, GGML_MAX_DIMS, b->ne, a, 0);
|
||||||
ggml_format_name(result, "%s (reshaped)", a->name);
|
ggml_format_name(result, "%s (reshaped)", a->name);
|
||||||
|
|
||||||
result->op = GGML_OP_RESHAPE;
|
result->op = GGML_OP_RESHAPE;
|
||||||
@ -4818,7 +4829,7 @@ struct ggml_tensor * ggml_diag(
|
|||||||
}
|
}
|
||||||
|
|
||||||
const int64_t ne[4] = { a->ne[0], a->ne[0], a->ne[2], a->ne[3] };
|
const int64_t ne[4] = { a->ne[0], a->ne[0], a->ne[2], a->ne[3] };
|
||||||
struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, MAX(a->n_dims, 2), ne);
|
struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, 4, ne);
|
||||||
|
|
||||||
result->op = GGML_OP_DIAG;
|
result->op = GGML_OP_DIAG;
|
||||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||||
@ -5465,7 +5476,7 @@ struct ggml_tensor * ggml_pool_1d(
|
|||||||
is_node = true;
|
is_node = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
const int64_t ne[3] = {
|
const int64_t ne[2] = {
|
||||||
ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
|
ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
|
||||||
a->ne[1],
|
a->ne[1],
|
||||||
};
|
};
|
||||||
@ -5584,7 +5595,7 @@ struct ggml_tensor * ggml_argsort(
|
|||||||
enum ggml_sort_order order) {
|
enum ggml_sort_order order) {
|
||||||
bool is_node = false;
|
bool is_node = false;
|
||||||
|
|
||||||
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_I32, a->n_dims, a->ne);
|
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_I32, GGML_MAX_DIMS, a->ne);
|
||||||
|
|
||||||
ggml_set_op_params_i32(result, 0, (int32_t) order);
|
ggml_set_op_params_i32(result, 0, (int32_t) order);
|
||||||
|
|
||||||
@ -5631,7 +5642,7 @@ struct ggml_tensor * ggml_flash_attn(
|
|||||||
}
|
}
|
||||||
|
|
||||||
//struct ggml_tensor * result = ggml_dup_tensor(ctx, q);
|
//struct ggml_tensor * result = ggml_dup_tensor(ctx, q);
|
||||||
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, q->n_dims, q->ne);
|
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, GGML_MAX_DIMS, q->ne);
|
||||||
|
|
||||||
int32_t t = masked ? 1 : 0;
|
int32_t t = masked ? 1 : 0;
|
||||||
ggml_set_op_params(result, &t, sizeof(t));
|
ggml_set_op_params(result, &t, sizeof(t));
|
||||||
@ -5664,7 +5675,7 @@ struct ggml_tensor * ggml_flash_ff(
|
|||||||
}
|
}
|
||||||
|
|
||||||
//struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
|
//struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
|
||||||
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, a->n_dims, a->ne);
|
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, GGML_MAX_DIMS, a->ne);
|
||||||
|
|
||||||
result->op = GGML_OP_FLASH_FF;
|
result->op = GGML_OP_FLASH_FF;
|
||||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||||
@ -5780,7 +5791,6 @@ struct ggml_tensor * ggml_win_part(
|
|||||||
const int np = npx*npy;
|
const int np = npx*npy;
|
||||||
|
|
||||||
const int64_t ne[4] = { a->ne[0], w, w, np, };
|
const int64_t ne[4] = { a->ne[0], w, w, np, };
|
||||||
|
|
||||||
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
||||||
|
|
||||||
int32_t params[] = { npx, npy, w };
|
int32_t params[] = { npx, npy, w };
|
||||||
@ -14563,7 +14573,7 @@ static struct ggml_tensor * ggml_recompute_graph_node(
|
|||||||
return replacements->vals[i];
|
return replacements->vals[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor * clone = ggml_new_tensor(ctx, node->type, node->n_dims, node->ne);
|
struct ggml_tensor * clone = ggml_new_tensor(ctx, node->type, GGML_MAX_DIMS, node->ne);
|
||||||
|
|
||||||
// insert clone into replacements
|
// insert clone into replacements
|
||||||
GGML_ASSERT(replacements->set.keys[i] == NULL); // assert that we don't overwrite
|
GGML_ASSERT(replacements->set.keys[i] == NULL); // assert that we don't overwrite
|
||||||
@ -16564,7 +16574,7 @@ static void ggml_graph_export_leaf(const struct ggml_tensor * tensor, FILE * fou
|
|||||||
fprintf(fout, "%-6s %-12s %8d %" PRId64 " %" PRId64 " %" PRId64 " %" PRId64 " %16zu %16zu %16zu %16zu %16p %32s\n",
|
fprintf(fout, "%-6s %-12s %8d %" PRId64 " %" PRId64 " %" PRId64 " %" PRId64 " %16zu %16zu %16zu %16zu %16p %32s\n",
|
||||||
ggml_type_name(tensor->type),
|
ggml_type_name(tensor->type),
|
||||||
ggml_op_name (tensor->op),
|
ggml_op_name (tensor->op),
|
||||||
tensor->n_dims,
|
ggml_n_dims(tensor),
|
||||||
ne[0], ne[1], ne[2], ne[3],
|
ne[0], ne[1], ne[2], ne[3],
|
||||||
nb[0], nb[1], nb[2], nb[3],
|
nb[0], nb[1], nb[2], nb[3],
|
||||||
tensor->data,
|
tensor->data,
|
||||||
@ -16579,7 +16589,7 @@ static void ggml_graph_export_node(const struct ggml_tensor * tensor, const char
|
|||||||
arg,
|
arg,
|
||||||
ggml_type_name(tensor->type),
|
ggml_type_name(tensor->type),
|
||||||
ggml_op_name (tensor->op),
|
ggml_op_name (tensor->op),
|
||||||
tensor->n_dims,
|
ggml_n_dims(tensor),
|
||||||
ne[0], ne[1], ne[2], ne[3],
|
ne[0], ne[1], ne[2], ne[3],
|
||||||
nb[0], nb[1], nb[2], nb[3],
|
nb[0], nb[1], nb[2], nb[3],
|
||||||
tensor->data,
|
tensor->data,
|
||||||
@ -16669,11 +16679,9 @@ void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
|
|||||||
|
|
||||||
const uint32_t type = tensor->type;
|
const uint32_t type = tensor->type;
|
||||||
const uint32_t op = tensor->op;
|
const uint32_t op = tensor->op;
|
||||||
const uint32_t n_dims = tensor->n_dims;
|
|
||||||
|
|
||||||
fwrite(&type, sizeof(uint32_t), 1, fout);
|
fwrite(&type, sizeof(uint32_t), 1, fout);
|
||||||
fwrite(&op, sizeof(uint32_t), 1, fout);
|
fwrite(&op, sizeof(uint32_t), 1, fout);
|
||||||
fwrite(&n_dims, sizeof(uint32_t), 1, fout);
|
|
||||||
|
|
||||||
for (int j = 0; j < GGML_MAX_DIMS; ++j) {
|
for (int j = 0; j < GGML_MAX_DIMS; ++j) {
|
||||||
const uint64_t ne = tensor->ne[j];
|
const uint64_t ne = tensor->ne[j];
|
||||||
@ -16703,11 +16711,9 @@ void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
|
|||||||
|
|
||||||
const uint32_t type = tensor->type;
|
const uint32_t type = tensor->type;
|
||||||
const uint32_t op = tensor->op;
|
const uint32_t op = tensor->op;
|
||||||
const uint32_t n_dims = tensor->n_dims;
|
|
||||||
|
|
||||||
fwrite(&type, sizeof(uint32_t), 1, fout);
|
fwrite(&type, sizeof(uint32_t), 1, fout);
|
||||||
fwrite(&op, sizeof(uint32_t), 1, fout);
|
fwrite(&op, sizeof(uint32_t), 1, fout);
|
||||||
fwrite(&n_dims, sizeof(uint32_t), 1, fout);
|
|
||||||
|
|
||||||
for (int j = 0; j < GGML_MAX_DIMS; ++j) {
|
for (int j = 0; j < GGML_MAX_DIMS; ++j) {
|
||||||
const uint64_t ne = tensor->ne[j];
|
const uint64_t ne = tensor->ne[j];
|
||||||
@ -16879,12 +16885,10 @@ struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context *
|
|||||||
{
|
{
|
||||||
uint32_t type;
|
uint32_t type;
|
||||||
uint32_t op;
|
uint32_t op;
|
||||||
uint32_t n_dims;
|
|
||||||
|
|
||||||
for (uint32_t i = 0; i < n_leafs; ++i) {
|
for (uint32_t i = 0; i < n_leafs; ++i) {
|
||||||
type = *(const uint32_t *) ptr; ptr += sizeof(type);
|
type = *(const uint32_t *) ptr; ptr += sizeof(type);
|
||||||
op = *(const uint32_t *) ptr; ptr += sizeof(op);
|
op = *(const uint32_t *) ptr; ptr += sizeof(op);
|
||||||
n_dims = *(const uint32_t *) ptr; ptr += sizeof(n_dims);
|
|
||||||
|
|
||||||
int64_t ne[GGML_MAX_DIMS];
|
int64_t ne[GGML_MAX_DIMS];
|
||||||
size_t nb[GGML_MAX_DIMS];
|
size_t nb[GGML_MAX_DIMS];
|
||||||
@ -16900,7 +16904,7 @@ struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context *
|
|||||||
nb[j] = nb_cur;
|
nb[j] = nb_cur;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor * tensor = ggml_new_tensor(*ctx_eval, (enum ggml_type) type, n_dims, ne);
|
struct ggml_tensor * tensor = ggml_new_tensor(*ctx_eval, (enum ggml_type) type, GGML_MAX_DIMS, ne);
|
||||||
|
|
||||||
tensor->op = (enum ggml_op) op;
|
tensor->op = (enum ggml_op) op;
|
||||||
|
|
||||||
@ -16917,7 +16921,7 @@ struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context *
|
|||||||
|
|
||||||
ptr += ggml_nbytes(tensor);
|
ptr += ggml_nbytes(tensor);
|
||||||
|
|
||||||
fprintf(stderr, "%s: loaded leaf %d: '%16s', %3d dims, %9zu bytes\n", __func__, i, tensor->name, n_dims, ggml_nbytes(tensor));
|
fprintf(stderr, "%s: loaded leaf %d: '%16s', %9zu bytes\n", __func__, i, tensor->name, ggml_nbytes(tensor));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -16927,12 +16931,10 @@ struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context *
|
|||||||
{
|
{
|
||||||
uint32_t type;
|
uint32_t type;
|
||||||
uint32_t op;
|
uint32_t op;
|
||||||
uint32_t n_dims;
|
|
||||||
|
|
||||||
for (uint32_t i = 0; i < n_nodes; ++i) {
|
for (uint32_t i = 0; i < n_nodes; ++i) {
|
||||||
type = *(const uint32_t *) ptr; ptr += sizeof(type);
|
type = *(const uint32_t *) ptr; ptr += sizeof(type);
|
||||||
op = *(const uint32_t *) ptr; ptr += sizeof(op);
|
op = *(const uint32_t *) ptr; ptr += sizeof(op);
|
||||||
n_dims = *(const uint32_t *) ptr; ptr += sizeof(n_dims);
|
|
||||||
|
|
||||||
enum ggml_op eop = (enum ggml_op) op;
|
enum ggml_op eop = (enum ggml_op) op;
|
||||||
|
|
||||||
@ -17003,7 +17005,7 @@ struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context *
|
|||||||
} break;
|
} break;
|
||||||
default:
|
default:
|
||||||
{
|
{
|
||||||
tensor = ggml_new_tensor(*ctx_eval, (enum ggml_type) type, n_dims, ne);
|
tensor = ggml_new_tensor(*ctx_eval, (enum ggml_type) type, GGML_MAX_DIMS, ne);
|
||||||
|
|
||||||
tensor->op = eop;
|
tensor->op = eop;
|
||||||
} break;
|
} break;
|
||||||
@ -17022,7 +17024,7 @@ struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context *
|
|||||||
|
|
||||||
result->nodes[i] = tensor;
|
result->nodes[i] = tensor;
|
||||||
|
|
||||||
fprintf(stderr, "%s: loaded node %d: '%16s', %3d dims, %9zu bytes\n", __func__, i, tensor->name, n_dims, ggml_nbytes(tensor));
|
fprintf(stderr, "%s: loaded node %d: '%16s', %9zu bytes\n", __func__, i, tensor->name, ggml_nbytes(tensor));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -17160,7 +17162,7 @@ void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph
|
|||||||
fprintf(fp, "(%s)|", ggml_type_name(node->type));
|
fprintf(fp, "(%s)|", ggml_type_name(node->type));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (node->n_dims == 2) {
|
if (ggml_is_matrix(node)) {
|
||||||
fprintf(fp, "%d [%" PRId64 ", %" PRId64 "] | <x>%s", i, node->ne[0], node->ne[1], ggml_op_symbol(node->op));
|
fprintf(fp, "%d [%" PRId64 ", %" PRId64 "] | <x>%s", i, node->ne[0], node->ne[1], ggml_op_symbol(node->op));
|
||||||
} else {
|
} else {
|
||||||
fprintf(fp, "%d [%" PRId64 ", %" PRId64 ", %" PRId64 "] | <x>%s", i, node->ne[0], node->ne[1], node->ne[2], ggml_op_symbol(node->op));
|
fprintf(fp, "%d [%" PRId64 ", %" PRId64 ", %" PRId64 "] | <x>%s", i, node->ne[0], node->ne[1], node->ne[2], ggml_op_symbol(node->op));
|
||||||
@ -17427,7 +17429,7 @@ static enum ggml_opt_result ggml_opt_adam(
|
|||||||
int64_t i = 0;
|
int64_t i = 0;
|
||||||
for (int p = 0; p < np; ++p) {
|
for (int p = 0; p < np; ++p) {
|
||||||
const int64_t ne = ggml_nelements(ps[p]);
|
const int64_t ne = ggml_nelements(ps[p]);
|
||||||
const float p_decay = ((ps[p]->n_dims >= decay_min_ndim) ? decay : 0.0f) * sched;
|
const float p_decay = ((ggml_n_dims(ps[p]) >= decay_min_ndim) ? decay : 0.0f) * sched;
|
||||||
for (int64_t j = 0; j < ne; ++j) {
|
for (int64_t j = 0; j < ne; ++j) {
|
||||||
float x = ggml_get_f32_1d(ps[p], j);
|
float x = ggml_get_f32_1d(ps[p], j);
|
||||||
float g_ = g[i]*gnorm;
|
float g_ = g[i]*gnorm;
|
||||||
@ -19205,8 +19207,8 @@ void gguf_add_tensor(
|
|||||||
ctx->infos[idx].ne[i] = 1;
|
ctx->infos[idx].ne[i] = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx->infos[idx].n_dims = tensor->n_dims;
|
ctx->infos[idx].n_dims = ggml_n_dims(tensor);
|
||||||
for (int i = 0; i < tensor->n_dims; i++) {
|
for (uint32_t i = 0; i < ctx->infos[idx].n_dims; i++) {
|
||||||
ctx->infos[idx].ne[i] = tensor->ne[i];
|
ctx->infos[idx].ne[i] = tensor->ne[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
8
ggml.h
8
ggml.h
@ -502,7 +502,6 @@ extern "C" {
|
|||||||
|
|
||||||
struct ggml_backend_buffer * buffer;
|
struct ggml_backend_buffer * buffer;
|
||||||
|
|
||||||
int n_dims;
|
|
||||||
int64_t ne[GGML_MAX_DIMS]; // number of elements
|
int64_t ne[GGML_MAX_DIMS]; // number of elements
|
||||||
size_t nb[GGML_MAX_DIMS]; // stride in bytes:
|
size_t nb[GGML_MAX_DIMS]; // stride in bytes:
|
||||||
// nb[0] = ggml_type_size(type)
|
// nb[0] = ggml_type_size(type)
|
||||||
@ -534,7 +533,7 @@ extern "C" {
|
|||||||
|
|
||||||
void * extra; // extra things e.g. for ggml-cuda.cu
|
void * extra; // extra things e.g. for ggml-cuda.cu
|
||||||
|
|
||||||
char padding[12];
|
char padding[8];
|
||||||
};
|
};
|
||||||
|
|
||||||
static const size_t GGML_TENSOR_SIZE = sizeof(struct ggml_tensor);
|
static const size_t GGML_TENSOR_SIZE = sizeof(struct ggml_tensor);
|
||||||
@ -666,6 +665,11 @@ extern "C" {
|
|||||||
GGML_API bool ggml_is_transposed(const struct ggml_tensor * tensor);
|
GGML_API bool ggml_is_transposed(const struct ggml_tensor * tensor);
|
||||||
GGML_API bool ggml_is_contiguous(const struct ggml_tensor * tensor);
|
GGML_API bool ggml_is_contiguous(const struct ggml_tensor * tensor);
|
||||||
GGML_API bool ggml_is_permuted (const struct ggml_tensor * tensor);
|
GGML_API bool ggml_is_permuted (const struct ggml_tensor * tensor);
|
||||||
|
GGML_API bool ggml_is_scalar (const struct ggml_tensor * tensor);
|
||||||
|
GGML_API bool ggml_is_vector (const struct ggml_tensor * tensor);
|
||||||
|
GGML_API bool ggml_is_matrix (const struct ggml_tensor * tensor);
|
||||||
|
GGML_API bool ggml_is_3d (const struct ggml_tensor * tensor);
|
||||||
|
GGML_API int ggml_n_dims (const struct ggml_tensor * tensor); // returns 1 for scalars
|
||||||
|
|
||||||
GGML_API bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
|
GGML_API bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
|
||||||
|
|
||||||
|
@ -8471,7 +8471,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
|
|||||||
bool quantize = name.rfind("weight") == name.size() - 6; // ends with 'weight'?
|
bool quantize = name.rfind("weight") == name.size() - 6; // ends with 'weight'?
|
||||||
|
|
||||||
// quantize only 2D tensors
|
// quantize only 2D tensors
|
||||||
quantize &= (tensor->n_dims == 2);
|
quantize &= (ggml_n_dims(tensor) == 2);
|
||||||
quantize &= params->quantize_output_tensor || name != "output.weight";
|
quantize &= params->quantize_output_tensor || name != "output.weight";
|
||||||
quantize &= !params->only_copy;
|
quantize &= !params->only_copy;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user