mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-26 03:12:23 +01:00
ggml : add ggml_pool_1d and ggml_pool_2d
This commit is contained in:
parent
680e6f9177
commit
4523d10d0c
283
ggml.c
283
ggml.c
@ -3787,6 +3787,8 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
||||
"CLAMP",
|
||||
"CONV_1D",
|
||||
"CONV_2D",
|
||||
"POOL_1D",
|
||||
"POOL_2D",
|
||||
|
||||
"FLASH_ATTN",
|
||||
"FLASH_FF",
|
||||
@ -3805,7 +3807,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
||||
"CROSS_ENTROPY_LOSS_BACK",
|
||||
};
|
||||
|
||||
static_assert(GGML_OP_COUNT == 66, "GGML_OP_COUNT != 66");
|
||||
static_assert(GGML_OP_COUNT == 68, "GGML_OP_COUNT != 68");
|
||||
|
||||
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
||||
"none",
|
||||
@ -3865,6 +3867,8 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
||||
"clamp(x)",
|
||||
"conv_1d(x)",
|
||||
"conv_2d(x)",
|
||||
"pool_1d(x)",
|
||||
"pool_2d(x)",
|
||||
|
||||
"flash_attn(x)",
|
||||
"flash_ff(x)",
|
||||
@ -3883,7 +3887,9 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
||||
"cross_entropy_loss_back(x,y)",
|
||||
};
|
||||
|
||||
static_assert(GGML_OP_COUNT == 66, "GGML_OP_COUNT != 66");
|
||||
static_assert(GGML_OP_COUNT == 68, "GGML_OP_COUNT != 68");
|
||||
|
||||
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
|
||||
|
||||
static_assert(sizeof(struct ggml_object)%GGML_MEM_ALIGN == 0, "ggml_object size must be a multiple of GGML_MEM_ALIGN");
|
||||
static_assert(sizeof(struct ggml_tensor)%GGML_MEM_ALIGN == 0, "ggml_tensor size must be a multiple of GGML_MEM_ALIGN");
|
||||
@ -7214,6 +7220,98 @@ struct ggml_tensor* ggml_conv_1d_ph(
|
||||
return ggml_conv_1d(ctx, a, b, s, a->ne[0] / 2, d);
|
||||
}
|
||||
|
||||
|
||||
// ggml_pool_*
|
||||
|
||||
static int64_t ggml_calc_pool_output_size(int64_t ins, int ks, int s, int p) {
|
||||
return (ins + 2 * p - ks) / s + 1;
|
||||
}
|
||||
|
||||
// ggml_pool_2d
|
||||
|
||||
struct ggml_tensor* ggml_pool_1d(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
enum ggml_op_pool op,
|
||||
int k0,
|
||||
int s0,
|
||||
int p0) {
|
||||
|
||||
bool is_node = false;
|
||||
|
||||
if (a->grad) {
|
||||
GGML_ASSERT(false); // TODO: implement backward
|
||||
is_node = true;
|
||||
}
|
||||
|
||||
const int64_t ne[3] = {
|
||||
ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
|
||||
a->ne[1],
|
||||
};
|
||||
struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne);
|
||||
|
||||
ggml_scratch_save(ctx);
|
||||
struct ggml_tensor* c = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 4);
|
||||
((int32_t*)c->data)[0] = op;
|
||||
((int32_t*)c->data)[1] = k0;
|
||||
((int32_t*)c->data)[2] = s0;
|
||||
((int32_t*)c->data)[3] = p0;
|
||||
ggml_scratch_load(ctx);
|
||||
|
||||
result->op = GGML_OP_POOL_1D;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
result->src[0] = a;
|
||||
result->src[1] = c;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// ggml_pool_2d
|
||||
|
||||
struct ggml_tensor* ggml_pool_2d(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
enum ggml_op_pool op,
|
||||
int k0,
|
||||
int k1,
|
||||
int s0,
|
||||
int s1,
|
||||
int p0,
|
||||
int p1) {
|
||||
|
||||
bool is_node = false;
|
||||
|
||||
if (a->grad) {
|
||||
GGML_ASSERT(false); // TODO: implement backward
|
||||
is_node = true;
|
||||
}
|
||||
|
||||
const int64_t ne[3] = {
|
||||
ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
|
||||
ggml_calc_pool_output_size(a->ne[1], k1, s1, p1),
|
||||
a->ne[2],
|
||||
};
|
||||
struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
|
||||
|
||||
ggml_scratch_save(ctx);
|
||||
struct ggml_tensor* c = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 7);
|
||||
((int32_t*)c->data)[0] = op;
|
||||
((int32_t*)c->data)[1] = k0;
|
||||
((int32_t*)c->data)[2] = k1;
|
||||
((int32_t*)c->data)[3] = s0;
|
||||
((int32_t*)c->data)[4] = s1;
|
||||
((int32_t*)c->data)[5] = p0;
|
||||
((int32_t*)c->data)[6] = p1;
|
||||
ggml_scratch_load(ctx);
|
||||
|
||||
result->op = GGML_OP_POOL_2D;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
result->src[0] = a;
|
||||
result->src[1] = c;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// ggml_flash_attn
|
||||
|
||||
struct ggml_tensor * ggml_flash_attn(
|
||||
@ -13013,6 +13111,166 @@ static void ggml_compute_forward_conv_2d(
|
||||
};
|
||||
}
|
||||
|
||||
// ggml_compute_forward_pool_1d_sk_p0
|
||||
|
||||
static void ggml_compute_forward_pool_1d_sk_p0(
|
||||
const struct ggml_compute_params * params,
|
||||
const enum ggml_op_pool op,
|
||||
const struct ggml_tensor * src,
|
||||
const int k,
|
||||
struct ggml_tensor * dst) {
|
||||
assert(src->type == GGML_TYPE_F32);
|
||||
assert(params->ith == 0);
|
||||
|
||||
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
||||
return;
|
||||
}
|
||||
|
||||
const char * cdata = (const char *)src->data;
|
||||
const char * const data_end = cdata + ggml_nbytes(src);
|
||||
float * drow = (float *)dst->data;
|
||||
|
||||
const int64_t rs = dst->ne[0];
|
||||
|
||||
while (cdata < data_end) {
|
||||
const float * const srow = (const float *)cdata;
|
||||
|
||||
int j = 0;
|
||||
|
||||
for (int64_t i = 0; i < rs; ++i) {
|
||||
switch (op) {
|
||||
case GGML_OP_POOL_AVG: drow[i] = 0; break;
|
||||
case GGML_OP_POOL_MAX: drow[i] = -FLT_MAX; break;
|
||||
case GGML_OP_POOL_COUNT: GGML_ASSERT(false); break;
|
||||
}
|
||||
for (int ki = 0; ki < k; ++ki) {
|
||||
switch (op) {
|
||||
case GGML_OP_POOL_AVG: drow[i] += srow[j]; break;
|
||||
case GGML_OP_POOL_MAX: if (srow[j] > drow[i]) drow[i] = srow[j]; break;
|
||||
case GGML_OP_POOL_COUNT: GGML_ASSERT(false); break;
|
||||
}
|
||||
++j;
|
||||
}
|
||||
switch (op) {
|
||||
case GGML_OP_POOL_AVG: drow[i] /= k; break;
|
||||
case GGML_OP_POOL_MAX: break;
|
||||
case GGML_OP_POOL_COUNT: GGML_ASSERT(false); break;
|
||||
}
|
||||
}
|
||||
|
||||
cdata += src->nb[1];
|
||||
drow += rs;
|
||||
}
|
||||
}
|
||||
|
||||
// ggml_compute_forward_pool_1d
|
||||
|
||||
static void ggml_compute_forward_pool_1d(
|
||||
const struct ggml_compute_params* params,
|
||||
const struct ggml_tensor* src0,
|
||||
const struct ggml_tensor* opt0,
|
||||
struct ggml_tensor* dst) {
|
||||
GGML_ASSERT(opt0->ne[0] == 4);
|
||||
const int* opts = (const int*)opt0->data;
|
||||
enum ggml_op_pool op = opts[0];
|
||||
const int k0 = opts[1];
|
||||
const int s0 = opts[2];
|
||||
const int p0 = opts[3];
|
||||
GGML_ASSERT(p0 == 0); // padding not supported
|
||||
GGML_ASSERT(k0 == s0); // only s = k supported
|
||||
|
||||
ggml_compute_forward_pool_1d_sk_p0(params, op, src0, k0, dst);
|
||||
}
|
||||
|
||||
// ggml_compute_forward_pool_2d_sk_p0
|
||||
|
||||
static void ggml_compute_forward_pool_2d_sk_p0(
|
||||
const struct ggml_compute_params * params,
|
||||
const enum ggml_op_pool op,
|
||||
const struct ggml_tensor * src,
|
||||
const int k0,
|
||||
const int k1,
|
||||
struct ggml_tensor * dst) {
|
||||
assert(src->type == GGML_TYPE_F32);
|
||||
assert(params->ith == 0);
|
||||
|
||||
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
||||
return;
|
||||
}
|
||||
|
||||
const char * cdata = (const char*)src->data;
|
||||
const char * const data_end = cdata + ggml_nbytes(src);
|
||||
|
||||
const int64_t px = dst->ne[0];
|
||||
const int64_t py = dst->ne[1];
|
||||
const int64_t pa = px * py;
|
||||
|
||||
float * dplane = (float *)dst->data;
|
||||
|
||||
const int ka = k0 * k1;
|
||||
|
||||
while (cdata < data_end) {
|
||||
for (int oy = 0; oy < py; ++oy) {
|
||||
float * const drow = dplane + oy * px;
|
||||
for (int ox = 0; ox < px; ++ox) {
|
||||
float * const out = drow + ox;
|
||||
switch (op) {
|
||||
case GGML_OP_POOL_AVG: *out = 0; break;
|
||||
case GGML_OP_POOL_MAX: *out = -FLT_MAX; break;
|
||||
case GGML_OP_POOL_COUNT: GGML_ASSERT(false); break;
|
||||
}
|
||||
|
||||
const int ix = ox * k0;
|
||||
const int iy = oy * k1;
|
||||
|
||||
for (int ky = 0; ky < k1; ++ky) {
|
||||
const float * const srow = (const float *)(cdata + src->nb[1] * (iy + ky));
|
||||
for (int kx = 0; kx < k0; ++kx) {
|
||||
int j = ix + kx;
|
||||
switch (op) {
|
||||
case GGML_OP_POOL_AVG: *out += srow[j]; break;
|
||||
case GGML_OP_POOL_MAX: if (srow[j] > *out) *out = srow[j]; break;
|
||||
case GGML_OP_POOL_COUNT: GGML_ASSERT(false); break;
|
||||
}
|
||||
}
|
||||
}
|
||||
switch (op) {
|
||||
case GGML_OP_POOL_AVG: *out /= ka; break;
|
||||
case GGML_OP_POOL_MAX: break;
|
||||
case GGML_OP_POOL_COUNT: GGML_ASSERT(false); break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cdata += src->nb[2];
|
||||
dplane += pa;
|
||||
}
|
||||
}
|
||||
|
||||
// ggml_compute_forward_pool_2d
|
||||
|
||||
static void ggml_compute_forward_pool_2d(
|
||||
const struct ggml_compute_params * params,
|
||||
const struct ggml_tensor * src0,
|
||||
const struct ggml_tensor * opt0,
|
||||
struct ggml_tensor * dst) {
|
||||
GGML_ASSERT(opt0->ne[0] == 7);
|
||||
const int* opts = (const int*)opt0->data;
|
||||
enum ggml_op_pool op = opts[0];
|
||||
const int k0 = opts[1];
|
||||
const int k1 = opts[2];
|
||||
const int s0 = opts[3];
|
||||
const int s1 = opts[4];
|
||||
const int p0 = opts[5];
|
||||
const int p1 = opts[6];
|
||||
GGML_ASSERT(p0 == 0);
|
||||
GGML_ASSERT(p1 == 0); // padding not supported
|
||||
GGML_ASSERT(k0 == s0);
|
||||
GGML_ASSERT(k1 == s1); // only s = k supported
|
||||
|
||||
ggml_compute_forward_pool_2d_sk_p0(params, op, src0, k0, k1, dst);
|
||||
}
|
||||
|
||||
|
||||
// ggml_compute_forward_flash_attn
|
||||
|
||||
@ -14794,6 +15052,14 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
||||
{
|
||||
ggml_compute_forward_conv_2d(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor);
|
||||
} break;
|
||||
case GGML_OP_POOL_1D:
|
||||
{
|
||||
ggml_compute_forward_pool_1d(params, tensor->src[0], tensor->src[1], tensor);
|
||||
} break;
|
||||
case GGML_OP_POOL_2D:
|
||||
{
|
||||
ggml_compute_forward_pool_2d(params, tensor->src[0], tensor->src[1], tensor);
|
||||
} break;
|
||||
case GGML_OP_FLASH_ATTN:
|
||||
{
|
||||
const int32_t t = ggml_get_i32_1d(tensor->src[3], 0);
|
||||
@ -15494,6 +15760,14 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
||||
{
|
||||
GGML_ASSERT(false); // TODO: not implemented
|
||||
} break;
|
||||
case GGML_OP_POOL_1D:
|
||||
{
|
||||
GGML_ASSERT(false); // TODO: not implemented
|
||||
} break;
|
||||
case GGML_OP_POOL_2D:
|
||||
{
|
||||
GGML_ASSERT(false); // TODO: not implemented
|
||||
} break;
|
||||
case GGML_OP_FLASH_ATTN:
|
||||
{
|
||||
struct ggml_tensor * flash_grad = NULL;
|
||||
@ -16315,6 +16589,11 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
|
||||
|
||||
work_size = MAX(work_size, cur);
|
||||
} break;
|
||||
case GGML_OP_POOL_1D:
|
||||
case GGML_OP_POOL_2D:
|
||||
{
|
||||
n_tasks = 1;
|
||||
} break;
|
||||
case GGML_OP_FLASH_ATTN:
|
||||
{
|
||||
n_tasks = n_threads;
|
||||
|
27
ggml.h
27
ggml.h
@ -368,6 +368,8 @@ extern "C" {
|
||||
GGML_OP_CLAMP,
|
||||
GGML_OP_CONV_1D,
|
||||
GGML_OP_CONV_2D,
|
||||
GGML_OP_POOL_1D,
|
||||
GGML_OP_POOL_2D,
|
||||
|
||||
GGML_OP_FLASH_ATTN,
|
||||
GGML_OP_FLASH_FF,
|
||||
@ -1173,6 +1175,31 @@ extern "C" {
|
||||
int s,
|
||||
int d);
|
||||
|
||||
enum ggml_op_pool {
|
||||
GGML_OP_POOL_MAX,
|
||||
GGML_OP_POOL_AVG,
|
||||
GGML_OP_POOL_COUNT,
|
||||
};
|
||||
|
||||
GGML_API struct ggml_tensor* ggml_pool_1d(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
enum ggml_op_pool op,
|
||||
int k0, // kernel size
|
||||
int s0, // stride
|
||||
int p0); // padding
|
||||
|
||||
GGML_API struct ggml_tensor* ggml_pool_2d(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
enum ggml_op_pool op,
|
||||
int k0,
|
||||
int k1,
|
||||
int s0,
|
||||
int s1,
|
||||
int p0,
|
||||
int p1);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_flash_attn(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * q,
|
||||
|
Loading…
Reference in New Issue
Block a user