mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-24 13:28:50 +01:00
ggml : sync latest (SAM + SD operators, CUDA alibi) (#2709)
* ggml : sync latest (SAM + SD operators, CUDA alibi) ggml-ci * ggml : fix tabs
This commit is contained in:
parent
8e4364f2af
commit
ef3f333d37
@ -1868,10 +1868,10 @@ struct ggml_tensor * forward_batch_wo_cache_flash_attn_train(
|
|||||||
t12->grad = expand(gb, ggml_permute(ctx0, t15->grad, 0, 2, 3, 1)); assert_shape_4d(t12->grad, N, n_batch, n_embd/n_head, n_head);
|
t12->grad = expand(gb, ggml_permute(ctx0, t15->grad, 0, 2, 3, 1)); assert_shape_4d(t12->grad, N, n_batch, n_embd/n_head, n_head);
|
||||||
t11->grad = expand(gb, ggml_reshape_2d(ctx0, ggml_cont(ctx0, t12->grad), N*n_batch, n_embd)); assert_shape_2d(t11->grad, N*n_batch, n_embd);
|
t11->grad = expand(gb, ggml_reshape_2d(ctx0, ggml_cont(ctx0, t12->grad), N*n_batch, n_embd)); assert_shape_2d(t11->grad, N*n_batch, n_embd);
|
||||||
t10->grad = expand(gb, ggml_permute(ctx0, t14->grad, 0, 2, 1, 3)); assert_shape_4d(t10->grad, n_embd/n_head, n_head, N, n_batch);
|
t10->grad = expand(gb, ggml_permute(ctx0, t14->grad, 0, 2, 1, 3)); assert_shape_4d(t10->grad, n_embd/n_head, n_head, N, n_batch);
|
||||||
t09->grad = expand(gb, ggml_rope_back(ctx0, t10->grad, n_past, n_rot, rope_mode, n_ctx)); assert_shape_4d(t09->grad, n_embd/n_head, n_head, N, n_batch);
|
t09->grad = expand(gb, ggml_rope_back(ctx0, t10->grad, n_past, n_rot, rope_mode, n_ctx, 10000.0f, 1.0f, 0.0f, false)); assert_shape_4d(t09->grad, n_embd/n_head, n_head, N, n_batch);
|
||||||
t08->grad = expand(gb, ggml_reshape_2d(ctx0, t09->grad, n_embd, N*n_batch)); assert_shape_2d(t08->grad, n_embd, N*n_batch);
|
t08->grad = expand(gb, ggml_reshape_2d(ctx0, t09->grad, n_embd, N*n_batch)); assert_shape_2d(t08->grad, n_embd, N*n_batch);
|
||||||
t07->grad = expand(gb, ggml_permute(ctx0, t13->grad, 0, 2, 1, 3)); assert_shape_4d(t07->grad, n_embd/n_head, n_head, N, n_batch);
|
t07->grad = expand(gb, ggml_permute(ctx0, t13->grad, 0, 2, 1, 3)); assert_shape_4d(t07->grad, n_embd/n_head, n_head, N, n_batch);
|
||||||
t06->grad = expand(gb, ggml_rope_back(ctx0, t07->grad, n_past, n_rot, rope_mode, n_ctx)); assert_shape_4d(t06->grad, n_embd/n_head, n_head, N, n_batch);
|
t06->grad = expand(gb, ggml_rope_back(ctx0, t07->grad, n_past, n_rot, rope_mode, n_ctx, 10000.0f, 1.0f, 0.0f, false)); assert_shape_4d(t06->grad, n_embd/n_head, n_head, N, n_batch);
|
||||||
t05->grad = expand(gb, ggml_reshape_2d(ctx0, t06->grad, n_embd, N*n_batch)); assert_shape_2d(t05->grad, n_embd, N*n_batch);
|
t05->grad = expand(gb, ggml_reshape_2d(ctx0, t06->grad, n_embd, N*n_batch)); assert_shape_2d(t05->grad, n_embd, N*n_batch);
|
||||||
t04->grad = expand(gb, ggml_add_inplace(ctx0,
|
t04->grad = expand(gb, ggml_add_inplace(ctx0,
|
||||||
ggml_add_inplace(ctx0,
|
ggml_add_inplace(ctx0,
|
||||||
|
@ -76,7 +76,7 @@ struct ggml_allocr {
|
|||||||
};
|
};
|
||||||
|
|
||||||
#ifdef GGML_ALLOCATOR_DEBUG
|
#ifdef GGML_ALLOCATOR_DEBUG
|
||||||
static void add_allocated_tensor(struct ggml_allocator * alloc, struct ggml_tensor * tensor) {
|
static void add_allocated_tensor(struct ggml_allocr * alloc, struct ggml_tensor * tensor) {
|
||||||
for (int i = 0; i < 1024; i++) {
|
for (int i = 0; i < 1024; i++) {
|
||||||
if (alloc->allocated_tensors[i] == NULL) {
|
if (alloc->allocated_tensors[i] == NULL) {
|
||||||
alloc->allocated_tensors[i] = tensor;
|
alloc->allocated_tensors[i] = tensor;
|
||||||
@ -85,7 +85,7 @@ static void add_allocated_tensor(struct ggml_allocator * alloc, struct ggml_tens
|
|||||||
}
|
}
|
||||||
GGML_ASSERT(!"out of allocated_tensors");
|
GGML_ASSERT(!"out of allocated_tensors");
|
||||||
}
|
}
|
||||||
static void remove_allocated_tensor(struct ggml_allocator * alloc, struct ggml_tensor * tensor) {
|
static void remove_allocated_tensor(struct ggml_allocr * alloc, struct ggml_tensor * tensor) {
|
||||||
for (int i = 0; i < 1024; i++) {
|
for (int i = 0; i < 1024; i++) {
|
||||||
if (alloc->allocated_tensors[i] == tensor ||
|
if (alloc->allocated_tensors[i] == tensor ||
|
||||||
(alloc->allocated_tensors[i] != NULL && alloc->allocated_tensors[i]->data == tensor->data)) {
|
(alloc->allocated_tensors[i] != NULL && alloc->allocated_tensors[i]->data == tensor->data)) {
|
||||||
|
79
ggml-cuda.cu
79
ggml-cuda.cu
@ -259,6 +259,7 @@ static_assert(sizeof(block_q6_K) == sizeof(ggml_fp16_t) + 13*QK_K/16, "wrong q6_
|
|||||||
#define CUDA_CPY_BLOCK_SIZE 32
|
#define CUDA_CPY_BLOCK_SIZE 32
|
||||||
#define CUDA_SCALE_BLOCK_SIZE 256
|
#define CUDA_SCALE_BLOCK_SIZE 256
|
||||||
#define CUDA_ROPE_BLOCK_SIZE 256
|
#define CUDA_ROPE_BLOCK_SIZE 256
|
||||||
|
#define CUDA_ALIBI_BLOCK_SIZE 32
|
||||||
#define CUDA_DIAG_MASK_INF_BLOCK_SIZE 32
|
#define CUDA_DIAG_MASK_INF_BLOCK_SIZE 32
|
||||||
#define CUDA_QUANTIZE_BLOCK_SIZE 256
|
#define CUDA_QUANTIZE_BLOCK_SIZE 256
|
||||||
#define CUDA_DEQUANTIZE_BLOCK_SIZE 256
|
#define CUDA_DEQUANTIZE_BLOCK_SIZE 256
|
||||||
@ -3940,6 +3941,29 @@ static __global__ void rope_glm_f32(const float * x, float * dst, const int ncol
|
|||||||
dst[i + half_n_dims * 3] = x2*sin_block_theta + x3*cos_block_theta;
|
dst[i + half_n_dims * 3] = x2*sin_block_theta + x3*cos_block_theta;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static __global__ void alibi_f32(const float * x, float * dst, const int ncols, const int k_rows,
|
||||||
|
const int n_heads_log2_floor, const float m0, const float m1) {
|
||||||
|
const int col = blockDim.x*blockIdx.x + threadIdx.x;
|
||||||
|
|
||||||
|
if (col >= ncols) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const int row = blockDim.y*blockIdx.y + threadIdx.y;
|
||||||
|
const int i = row*ncols + col;
|
||||||
|
|
||||||
|
const int k = row/k_rows;
|
||||||
|
|
||||||
|
float m_k;
|
||||||
|
if (k < n_heads_log2_floor) {
|
||||||
|
m_k = powf(m0, k + 1);
|
||||||
|
} else {
|
||||||
|
m_k = powf(m1, 2 * (k - n_heads_log2_floor) + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
dst[i] = col * m_k + x[i];
|
||||||
|
}
|
||||||
|
|
||||||
static __global__ void diag_mask_inf_f32(const float * x, float * dst, const int ncols, const int rows_per_channel, const int n_past) {
|
static __global__ void diag_mask_inf_f32(const float * x, float * dst, const int ncols, const int rows_per_channel, const int n_past) {
|
||||||
const int col = blockDim.x*blockIdx.x + threadIdx.x;
|
const int col = blockDim.x*blockIdx.x + threadIdx.x;
|
||||||
const int row = blockDim.y*blockIdx.y + threadIdx.y;
|
const int row = blockDim.y*blockIdx.y + threadIdx.y;
|
||||||
@ -4766,6 +4790,15 @@ static void rope_glm_f32_cuda(const float * x, float * dst, const int ncols, con
|
|||||||
rope_glm_f32<<<block_nums, block_dims, 0, stream>>>(x, dst, ncols, p, block_p, theta_scale);
|
rope_glm_f32<<<block_nums, block_dims, 0, stream>>>(x, dst, ncols, p, block_p, theta_scale);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void alibi_f32_cuda(const float * x, float * dst, const int ncols, const int nrows,
|
||||||
|
const int k_rows, const int n_heads_log2_floor, const float m0,
|
||||||
|
const float m1, cudaStream_t stream) {
|
||||||
|
const dim3 block_dims(CUDA_ALIBI_BLOCK_SIZE, 1, 1);
|
||||||
|
const int num_blocks_x = (ncols + CUDA_ALIBI_BLOCK_SIZE - 1) / (CUDA_ALIBI_BLOCK_SIZE);
|
||||||
|
const dim3 block_nums(num_blocks_x, nrows, 1);
|
||||||
|
alibi_f32<<<block_nums, block_dims, 0, stream>>>(x, dst, ncols, k_rows, n_heads_log2_floor, m0, m1);
|
||||||
|
}
|
||||||
|
|
||||||
static void diag_mask_inf_f32_cuda(const float * x, float * dst, const int ncols_x, const int nrows_x, const int rows_per_channel, const int n_past, cudaStream_t stream) {
|
static void diag_mask_inf_f32_cuda(const float * x, float * dst, const int ncols_x, const int nrows_x, const int rows_per_channel, const int n_past, cudaStream_t stream) {
|
||||||
const dim3 block_dims(CUDA_DIAG_MASK_INF_BLOCK_SIZE, 1, 1);
|
const dim3 block_dims(CUDA_DIAG_MASK_INF_BLOCK_SIZE, 1, 1);
|
||||||
const int block_num_x = (ncols_x + CUDA_DIAG_MASK_INF_BLOCK_SIZE - 1) / CUDA_DIAG_MASK_INF_BLOCK_SIZE;
|
const int block_num_x = (ncols_x + CUDA_DIAG_MASK_INF_BLOCK_SIZE - 1) / CUDA_DIAG_MASK_INF_BLOCK_SIZE;
|
||||||
@ -5501,6 +5534,41 @@ inline void ggml_cuda_op_rope(
|
|||||||
(void) i1;
|
(void) i1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline void ggml_cuda_op_alibi(
|
||||||
|
const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, char * src0_ddq_i,
|
||||||
|
float * src0_ddf_i, float * src1_ddf_i, float * dst_ddf_i, int64_t i02, int64_t i01_low, int64_t i01_high, int i1,
|
||||||
|
cudaStream_t & cudaStream_main){
|
||||||
|
|
||||||
|
GGML_ASSERT(src0_ddf_i != nullptr);
|
||||||
|
GGML_ASSERT(dst_ddf_i != nullptr);
|
||||||
|
|
||||||
|
const int64_t ne00 = src0->ne[0];
|
||||||
|
const int64_t ne01 = src0->ne[1];
|
||||||
|
const int64_t ne02 = src0->ne[2];
|
||||||
|
const int64_t i01_diff = i01_high - i01_low;
|
||||||
|
|
||||||
|
const int n_past = ((int32_t *) dst->op_params)[0];
|
||||||
|
const int n_head = ((int32_t *) dst->op_params)[1];
|
||||||
|
float max_bias;
|
||||||
|
memcpy(&max_bias, (int32_t *) dst->op_params + 2, sizeof(float));
|
||||||
|
|
||||||
|
GGML_ASSERT(ne01 + n_past == ne00);
|
||||||
|
GGML_ASSERT(n_head == ne02);
|
||||||
|
|
||||||
|
const int n_heads_log2_floor = 1 << (int) floor(log2(n_head));
|
||||||
|
|
||||||
|
const float m0 = powf(2.0f, -(max_bias) / n_heads_log2_floor);
|
||||||
|
const float m1 = powf(2.0f, -(max_bias / 2.0f) / n_heads_log2_floor);
|
||||||
|
|
||||||
|
// compute
|
||||||
|
alibi_f32_cuda(src0_ddf_i, dst_ddf_i, ne00, i01_diff, ne01, n_heads_log2_floor, m0, m1, cudaStream_main);
|
||||||
|
|
||||||
|
(void) src1;
|
||||||
|
(void) src0_ddq_i;
|
||||||
|
(void) src1_ddf_i;
|
||||||
|
(void) i1;
|
||||||
|
}
|
||||||
|
|
||||||
inline void ggml_cuda_op_diag_mask_inf(
|
inline void ggml_cuda_op_diag_mask_inf(
|
||||||
const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, char * src0_ddq_i,
|
const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, char * src0_ddq_i,
|
||||||
float * src0_ddf_i, float * src1_ddf_i, float * dst_ddf_i, int64_t i02, int64_t i01_low, int64_t i01_high, int i1,
|
float * src0_ddf_i, float * src1_ddf_i, float * dst_ddf_i, int64_t i02, int64_t i01_low, int64_t i01_high, int i1,
|
||||||
@ -6121,6 +6189,11 @@ void ggml_cuda_rope(const ggml_tensor * src0, const ggml_tensor * src1, ggml_ten
|
|||||||
ggml_cuda_op(src0, src1, dst, ggml_cuda_op_rope, true, !is_glm); // flatten support not implemented for glm
|
ggml_cuda_op(src0, src1, dst, ggml_cuda_op_rope, true, !is_glm); // flatten support not implemented for glm
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ggml_cuda_alibi(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
||||||
|
GGML_ASSERT(src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32);
|
||||||
|
ggml_cuda_op(src0, src1, dst, ggml_cuda_op_alibi, true, true);
|
||||||
|
}
|
||||||
|
|
||||||
void ggml_cuda_nop(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
void ggml_cuda_nop(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
||||||
(void) src0;
|
(void) src0;
|
||||||
(void) src1;
|
(void) src1;
|
||||||
@ -6456,6 +6529,12 @@ bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_
|
|||||||
}
|
}
|
||||||
func = ggml_cuda_rope;
|
func = ggml_cuda_rope;
|
||||||
break;
|
break;
|
||||||
|
case GGML_OP_ALIBI:
|
||||||
|
if (!any_on_device) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
func = ggml_cuda_alibi;
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
115
ggml.h
115
ggml.h
@ -211,6 +211,7 @@
|
|||||||
#define GGML_MAX_OP_PARAMS 32
|
#define GGML_MAX_OP_PARAMS 32
|
||||||
#define GGML_DEFAULT_N_THREADS 4
|
#define GGML_DEFAULT_N_THREADS 4
|
||||||
|
|
||||||
|
|
||||||
#define GGML_EXIT_SUCCESS 0
|
#define GGML_EXIT_SUCCESS 0
|
||||||
#define GGML_EXIT_ABORTED 1
|
#define GGML_EXIT_ABORTED 1
|
||||||
|
|
||||||
@ -345,10 +346,12 @@ extern "C" {
|
|||||||
GGML_OP_ARGMAX,
|
GGML_OP_ARGMAX,
|
||||||
GGML_OP_REPEAT,
|
GGML_OP_REPEAT,
|
||||||
GGML_OP_REPEAT_BACK,
|
GGML_OP_REPEAT_BACK,
|
||||||
|
GGML_OP_CONCAT,
|
||||||
GGML_OP_SILU_BACK,
|
GGML_OP_SILU_BACK,
|
||||||
GGML_OP_NORM, // normalize
|
GGML_OP_NORM, // normalize
|
||||||
GGML_OP_RMS_NORM,
|
GGML_OP_RMS_NORM,
|
||||||
GGML_OP_RMS_NORM_BACK,
|
GGML_OP_RMS_NORM_BACK,
|
||||||
|
GGML_OP_GROUP_NORM,
|
||||||
|
|
||||||
GGML_OP_MUL_MAT,
|
GGML_OP_MUL_MAT,
|
||||||
GGML_OP_OUT_PROD,
|
GGML_OP_OUT_PROD,
|
||||||
@ -374,14 +377,19 @@ extern "C" {
|
|||||||
GGML_OP_CLAMP,
|
GGML_OP_CLAMP,
|
||||||
GGML_OP_CONV_1D,
|
GGML_OP_CONV_1D,
|
||||||
GGML_OP_CONV_2D,
|
GGML_OP_CONV_2D,
|
||||||
|
GGML_OP_CONV_TRANSPOSE_2D,
|
||||||
GGML_OP_POOL_1D,
|
GGML_OP_POOL_1D,
|
||||||
GGML_OP_POOL_2D,
|
GGML_OP_POOL_2D,
|
||||||
|
|
||||||
|
GGML_OP_UPSCALE, // nearest interpolate
|
||||||
|
|
||||||
GGML_OP_FLASH_ATTN,
|
GGML_OP_FLASH_ATTN,
|
||||||
GGML_OP_FLASH_FF,
|
GGML_OP_FLASH_FF,
|
||||||
GGML_OP_FLASH_ATTN_BACK,
|
GGML_OP_FLASH_ATTN_BACK,
|
||||||
GGML_OP_WIN_PART,
|
GGML_OP_WIN_PART,
|
||||||
GGML_OP_WIN_UNPART,
|
GGML_OP_WIN_UNPART,
|
||||||
|
GGML_OP_GET_REL_POS,
|
||||||
|
GGML_OP_ADD_REL_POS,
|
||||||
|
|
||||||
GGML_OP_UNARY,
|
GGML_OP_UNARY,
|
||||||
|
|
||||||
@ -805,6 +813,13 @@ extern "C" {
|
|||||||
struct ggml_tensor * a,
|
struct ggml_tensor * a,
|
||||||
struct ggml_tensor * b);
|
struct ggml_tensor * b);
|
||||||
|
|
||||||
|
// concat a and b on dim 2
|
||||||
|
// used in stable-diffusion
|
||||||
|
GGML_API struct ggml_tensor * ggml_concat(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a,
|
||||||
|
struct ggml_tensor * b);
|
||||||
|
|
||||||
GGML_API struct ggml_tensor * ggml_abs(
|
GGML_API struct ggml_tensor * ggml_abs(
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
struct ggml_tensor * a);
|
struct ggml_tensor * a);
|
||||||
@ -913,6 +928,19 @@ extern "C" {
|
|||||||
struct ggml_tensor * a,
|
struct ggml_tensor * a,
|
||||||
float eps);
|
float eps);
|
||||||
|
|
||||||
|
// group normalize along ne0*ne1*n_groups
|
||||||
|
// used in stable-diffusion
|
||||||
|
// TODO: eps is hardcoded to 1e-6 for now
|
||||||
|
GGML_API struct ggml_tensor * ggml_group_norm(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a,
|
||||||
|
int n_groups);
|
||||||
|
|
||||||
|
GGML_API struct ggml_tensor * ggml_group_norm_inplace(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a,
|
||||||
|
int n_groups);
|
||||||
|
|
||||||
// a - x
|
// a - x
|
||||||
// b - dy
|
// b - dy
|
||||||
// TODO: update with configurable eps
|
// TODO: update with configurable eps
|
||||||
@ -1213,6 +1241,15 @@ extern "C" {
|
|||||||
float freq_base,
|
float freq_base,
|
||||||
float freq_scale);
|
float freq_scale);
|
||||||
|
|
||||||
|
// xPos RoPE, in-place, returns view(a)
|
||||||
|
GGML_API struct ggml_tensor * ggml_rope_xpos_inplace(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a,
|
||||||
|
int n_past,
|
||||||
|
int n_dims,
|
||||||
|
float base,
|
||||||
|
bool down);
|
||||||
|
|
||||||
// rotary position embedding backward, i.e compute dx from dy
|
// rotary position embedding backward, i.e compute dx from dy
|
||||||
// a - dy
|
// a - dy
|
||||||
GGML_API struct ggml_tensor * ggml_rope_back(
|
GGML_API struct ggml_tensor * ggml_rope_back(
|
||||||
@ -1221,7 +1258,11 @@ extern "C" {
|
|||||||
int n_past,
|
int n_past,
|
||||||
int n_dims,
|
int n_dims,
|
||||||
int mode,
|
int mode,
|
||||||
int n_ctx);
|
int n_ctx,
|
||||||
|
float freq_base,
|
||||||
|
float freq_scale,
|
||||||
|
float xpos_base,
|
||||||
|
bool xpos_down);
|
||||||
|
|
||||||
// alibi position embedding
|
// alibi position embedding
|
||||||
// in-place, returns view(a)
|
// in-place, returns view(a)
|
||||||
@ -1248,6 +1289,15 @@ extern "C" {
|
|||||||
int p0, // padding
|
int p0, // padding
|
||||||
int d0); // dilation
|
int d0); // dilation
|
||||||
|
|
||||||
|
// conv_1d with padding = half
|
||||||
|
// alias for ggml_conv_1d(a, b, s, a->ne[0]/2, d)
|
||||||
|
GGML_API struct ggml_tensor* ggml_conv_1d_ph(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a,
|
||||||
|
struct ggml_tensor * b,
|
||||||
|
int s,
|
||||||
|
int d);
|
||||||
|
|
||||||
GGML_API struct ggml_tensor * ggml_conv_2d(
|
GGML_API struct ggml_tensor * ggml_conv_2d(
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
struct ggml_tensor * a,
|
struct ggml_tensor * a,
|
||||||
@ -1259,14 +1309,38 @@ extern "C" {
|
|||||||
int d0,
|
int d0,
|
||||||
int d1);
|
int d1);
|
||||||
|
|
||||||
// conv_1d with padding = half
|
|
||||||
// alias for ggml_conv_1d(a, b, s, a->ne[0]/2, d)
|
// kernel size is a->ne[0] x a->ne[1]
|
||||||
GGML_API struct ggml_tensor * ggml_conv_1d_ph(
|
// stride is equal to kernel size
|
||||||
|
// padding is zero
|
||||||
|
// example:
|
||||||
|
// a: 16 16 3 768
|
||||||
|
// b: 1024 1024 3 1
|
||||||
|
// res: 64 64 768 1
|
||||||
|
// used in sam
|
||||||
|
GGML_API struct ggml_tensor * ggml_conv_2d_sk_p0(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a,
|
||||||
|
struct ggml_tensor * b);
|
||||||
|
|
||||||
|
// kernel size is a->ne[0] x a->ne[1]
|
||||||
|
// stride is 1
|
||||||
|
// padding is half
|
||||||
|
// example:
|
||||||
|
// a: 3 3 256 256
|
||||||
|
// b: 64 64 256 1
|
||||||
|
// res: 64 64 256 1
|
||||||
|
// used in sam
|
||||||
|
GGML_API struct ggml_tensor * ggml_conv_2d_s1_ph(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a,
|
||||||
|
struct ggml_tensor * b);
|
||||||
|
|
||||||
|
GGML_API struct ggml_tensor * ggml_conv_transpose_2d_p0(
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
struct ggml_tensor * a,
|
struct ggml_tensor * a,
|
||||||
struct ggml_tensor * b,
|
struct ggml_tensor * b,
|
||||||
int s,
|
int stride);
|
||||||
int d);
|
|
||||||
|
|
||||||
enum ggml_op_pool {
|
enum ggml_op_pool {
|
||||||
GGML_OP_POOL_MAX,
|
GGML_OP_POOL_MAX,
|
||||||
@ -1293,6 +1367,13 @@ extern "C" {
|
|||||||
int p0,
|
int p0,
|
||||||
int p1);
|
int p1);
|
||||||
|
|
||||||
|
// nearest interpolate
|
||||||
|
// used in stable-diffusion
|
||||||
|
GGML_API struct ggml_tensor * ggml_upscale(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a,
|
||||||
|
int scale_factor);
|
||||||
|
|
||||||
GGML_API struct ggml_tensor * ggml_flash_attn(
|
GGML_API struct ggml_tensor * ggml_flash_attn(
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
struct ggml_tensor * q,
|
struct ggml_tensor * q,
|
||||||
@ -1346,6 +1427,27 @@ extern "C" {
|
|||||||
struct ggml_tensor * a,
|
struct ggml_tensor * a,
|
||||||
enum ggml_unary_op op);
|
enum ggml_unary_op op);
|
||||||
|
|
||||||
|
// used in sam
|
||||||
|
GGML_API struct ggml_tensor * ggml_get_rel_pos(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a,
|
||||||
|
int qh,
|
||||||
|
int kh);
|
||||||
|
|
||||||
|
// used in sam
|
||||||
|
|
||||||
|
GGML_API struct ggml_tensor * ggml_add_rel_pos(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a,
|
||||||
|
struct ggml_tensor * pw,
|
||||||
|
struct ggml_tensor * ph);
|
||||||
|
|
||||||
|
GGML_API struct ggml_tensor * ggml_add_rel_pos_inplace(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a,
|
||||||
|
struct ggml_tensor * pw,
|
||||||
|
struct ggml_tensor * ph);
|
||||||
|
|
||||||
// custom operators
|
// custom operators
|
||||||
|
|
||||||
typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);
|
typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);
|
||||||
@ -1500,6 +1602,7 @@ extern "C" {
|
|||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
struct ggml_tensor * tensor);
|
struct ggml_tensor * tensor);
|
||||||
|
|
||||||
|
|
||||||
GGML_API void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
|
GGML_API void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
|
||||||
|
|
||||||
GGML_API struct ggml_cgraph ggml_build_forward (struct ggml_tensor * tensor);
|
GGML_API struct ggml_cgraph ggml_build_forward (struct ggml_tensor * tensor);
|
||||||
|
@ -1,14 +1,16 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
cp -rpv ../ggml/src/ggml.c ./ggml.c
|
cp -rpv ../ggml/src/ggml.c ./ggml.c
|
||||||
cp -rpv ../ggml/src/ggml-cuda.h ./ggml-cuda.h
|
cp -rpv ../ggml/src/ggml-alloc.c ./ggml-alloc.c
|
||||||
cp -rpv ../ggml/src/ggml-cuda.cu ./ggml-cuda.cu
|
cp -rpv ../ggml/src/ggml-cuda.h ./ggml-cuda.h
|
||||||
cp -rpv ../ggml/src/ggml-opencl.h ./ggml-opencl.h
|
cp -rpv ../ggml/src/ggml-cuda.cu ./ggml-cuda.cu
|
||||||
cp -rpv ../ggml/src/ggml-opencl.cpp ./ggml-opencl.cpp
|
cp -rpv ../ggml/src/ggml-opencl.h ./ggml-opencl.h
|
||||||
cp -rpv ../ggml/src/ggml-metal.h ./ggml-metal.h
|
cp -rpv ../ggml/src/ggml-opencl.cpp ./ggml-opencl.cpp
|
||||||
cp -rpv ../ggml/src/ggml-metal.m ./ggml-metal.m
|
cp -rpv ../ggml/src/ggml-metal.h ./ggml-metal.h
|
||||||
cp -rpv ../ggml/src/ggml-metal.metal ./ggml-metal.metal
|
cp -rpv ../ggml/src/ggml-metal.m ./ggml-metal.m
|
||||||
cp -rpv ../ggml/include/ggml/ggml.h ./ggml.h
|
cp -rpv ../ggml/src/ggml-metal.metal ./ggml-metal.metal
|
||||||
|
cp -rpv ../ggml/include/ggml/ggml.h ./ggml.h
|
||||||
|
cp -rpv ../ggml/include/ggml/ggml-alloc.h ./ggml-alloc.h
|
||||||
|
|
||||||
cp -rpv ../ggml/tests/test-opt.cpp ./tests/test-opt.cpp
|
cp -rpv ../ggml/tests/test-opt.cpp ./tests/test-opt.cpp
|
||||||
cp -rpv ../ggml/tests/test-grad0.cpp ./tests/test-grad0.cpp
|
cp -rpv ../ggml/tests/test-grad0.cpp ./tests/test-grad0.cpp
|
||||||
|
Loading…
Reference in New Issue
Block a user