mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-11 21:10:24 +01:00
llava : change API to pure C style for Rust FFI bindgen (#6079)
Co-authored-by: Lou Ting <louting.t@alibaba-inc.com>
This commit is contained in:
parent
3020327f6c
commit
4e9a7f7f7f
@ -1235,16 +1235,16 @@ struct clip_image_f32 * clip_image_f32_init() {
|
|||||||
|
|
||||||
void clip_image_u8_free(struct clip_image_u8 * img) { delete img; }
|
void clip_image_u8_free(struct clip_image_u8 * img) { delete img; }
|
||||||
void clip_image_f32_free(struct clip_image_f32 * img) { delete img; }
|
void clip_image_f32_free(struct clip_image_f32 * img) { delete img; }
|
||||||
void clip_image_u8_batch_free(struct clip_image_u8_batch & batch) {
|
void clip_image_u8_batch_free(struct clip_image_u8_batch * batch) {
|
||||||
if (batch.size > 0) {
|
if (batch->size > 0) {
|
||||||
delete[] batch.data;
|
delete[] batch->data;
|
||||||
batch.size = 0;
|
batch->size = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
void clip_image_f32_batch_free(struct clip_image_f32_batch & batch) {
|
void clip_image_f32_batch_free(struct clip_image_f32_batch * batch) {
|
||||||
if (batch.size > 0) {
|
if (batch->size > 0) {
|
||||||
delete[] batch.data;
|
delete[] batch->data;
|
||||||
batch.size = 0;
|
batch->size = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1497,7 +1497,7 @@ static std::vector<clip_image_u8*> divide_to_patches_u8(const clip_image_u8 & im
|
|||||||
|
|
||||||
// returns the normalized float tensor for llava-1.5, for spatial_unpad with anyres processing for llava-1.6 it returns the normalized image patch tensors as a vector
|
// returns the normalized float tensor for llava-1.5, for spatial_unpad with anyres processing for llava-1.6 it returns the normalized image patch tensors as a vector
|
||||||
// res_imgs memory is being allocated here, previous allocations will be freed if found
|
// res_imgs memory is being allocated here, previous allocations will be freed if found
|
||||||
bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, clip_image_f32_batch & res_imgs) {
|
bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, clip_image_f32_batch * res_imgs) {
|
||||||
bool pad_to_square = true;
|
bool pad_to_square = true;
|
||||||
if (!ctx->has_vision_encoder) {
|
if (!ctx->has_vision_encoder) {
|
||||||
printf("This gguf file seems to have no vision encoder\n");
|
printf("This gguf file seems to have no vision encoder\n");
|
||||||
@ -1509,11 +1509,11 @@ bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, cli
|
|||||||
pad_to_square = false;
|
pad_to_square = false;
|
||||||
}
|
}
|
||||||
// free the previous res_imgs if any set
|
// free the previous res_imgs if any set
|
||||||
if (res_imgs.size > 0) {
|
if (res_imgs->size > 0) {
|
||||||
clip_image_f32_batch_free(res_imgs);
|
clip_image_f32_batch_free(res_imgs);
|
||||||
}
|
}
|
||||||
res_imgs.data = nullptr;
|
res_imgs->data = nullptr;
|
||||||
res_imgs.size = 0;
|
res_imgs->size = 0;
|
||||||
|
|
||||||
// the logic below is to pad the shorter side to the longer side with a background color: rgb(122, 116, 104)
|
// the logic below is to pad the shorter side to the longer side with a background color: rgb(122, 116, 104)
|
||||||
// see https://github.com/haotian-liu/LLaVA/blob/e854a2bf85118c504f6f16bf5c3c7c92f8fa8c6b/llava/conversation.py#L113-L156
|
// see https://github.com/haotian-liu/LLaVA/blob/e854a2bf85118c504f6f16bf5c3c7c92f8fa8c6b/llava/conversation.py#L113-L156
|
||||||
@ -1568,11 +1568,11 @@ bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, cli
|
|||||||
bicubic_resize(*img, *image_original_resize, params.image_size, params.image_size); // in python this is "shortest_edge", but all CLIP are square
|
bicubic_resize(*img, *image_original_resize, params.image_size, params.image_size); // in python this is "shortest_edge", but all CLIP are square
|
||||||
patches.insert(patches.begin(), image_original_resize);
|
patches.insert(patches.begin(), image_original_resize);
|
||||||
// clip_image_f32_batch_init(patches.size());
|
// clip_image_f32_batch_init(patches.size());
|
||||||
res_imgs.size = patches.size();
|
res_imgs->size = patches.size();
|
||||||
res_imgs.data = new clip_image_f32[res_imgs.size];
|
res_imgs->data = new clip_image_f32[res_imgs->size];
|
||||||
int num=0;
|
int num=0;
|
||||||
for (auto& patch : patches) {
|
for (auto& patch : patches) {
|
||||||
normalize_image_u8_to_f32(patch, &res_imgs.data[num], ctx->image_mean, ctx->image_std);
|
normalize_image_u8_to_f32(patch, &res_imgs->data[num], ctx->image_mean, ctx->image_std);
|
||||||
num++;
|
num++;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1660,9 +1660,9 @@ bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, cli
|
|||||||
// }
|
// }
|
||||||
// res_imgs.push_back(res);
|
// res_imgs.push_back(res);
|
||||||
|
|
||||||
res_imgs.size = 1;
|
res_imgs->size = 1;
|
||||||
res_imgs.data = new clip_image_f32[res_imgs.size];
|
res_imgs->data = new clip_image_f32[res_imgs->size];
|
||||||
res_imgs.data[0] = *res;
|
res_imgs->data[0] = *res;
|
||||||
clip_image_f32_free(res);
|
clip_image_f32_free(res);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
@ -60,8 +60,8 @@ CLIP_API struct clip_image_f32 * clip_image_f32_init();
|
|||||||
|
|
||||||
CLIP_API void clip_image_u8_free (struct clip_image_u8 * img);
|
CLIP_API void clip_image_u8_free (struct clip_image_u8 * img);
|
||||||
CLIP_API void clip_image_f32_free(struct clip_image_f32 * img);
|
CLIP_API void clip_image_f32_free(struct clip_image_f32 * img);
|
||||||
CLIP_API void clip_image_u8_batch_free (struct clip_image_u8_batch & batch);
|
CLIP_API void clip_image_u8_batch_free (struct clip_image_u8_batch * batch);
|
||||||
CLIP_API void clip_image_f32_batch_free(struct clip_image_f32_batch & batch);
|
CLIP_API void clip_image_f32_batch_free(struct clip_image_f32_batch * batch);
|
||||||
|
|
||||||
CLIP_API bool clip_image_load_from_file(const char * fname, struct clip_image_u8 * img);
|
CLIP_API bool clip_image_load_from_file(const char * fname, struct clip_image_u8 * img);
|
||||||
|
|
||||||
@ -69,7 +69,7 @@ CLIP_API bool clip_image_load_from_file(const char * fname, struct clip_image_u8
|
|||||||
CLIP_API bool clip_image_load_from_bytes(const unsigned char * bytes, size_t bytes_length, struct clip_image_u8 * img);
|
CLIP_API bool clip_image_load_from_bytes(const unsigned char * bytes, size_t bytes_length, struct clip_image_u8 * img);
|
||||||
|
|
||||||
/** preprocess img and store the result in res_imgs, pad_to_square may be overriden to false depending on model configuration */
|
/** preprocess img and store the result in res_imgs, pad_to_square may be overriden to false depending on model configuration */
|
||||||
CLIP_API bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, clip_image_f32_batch & res_imgs );
|
CLIP_API bool clip_image_preprocess(struct clip_ctx * ctx, const struct clip_image_u8 * img, struct clip_image_f32_batch * res_imgs );
|
||||||
|
|
||||||
CLIP_API struct ggml_tensor * clip_get_newline_tensor(const struct clip_ctx * ctx);
|
CLIP_API struct ggml_tensor * clip_get_newline_tensor(const struct clip_ctx * ctx);
|
||||||
|
|
||||||
|
@ -223,7 +223,7 @@ static bool encode_image_with_clip(clip_ctx * ctx_clip, int n_threads, const cli
|
|||||||
clip_image_f32_batch img_res_v;
|
clip_image_f32_batch img_res_v;
|
||||||
img_res_v.size = 0;
|
img_res_v.size = 0;
|
||||||
img_res_v.data = nullptr;
|
img_res_v.data = nullptr;
|
||||||
if (!clip_image_preprocess(ctx_clip, img, img_res_v)) {
|
if (!clip_image_preprocess(ctx_clip, img, &img_res_v)) {
|
||||||
fprintf(stderr, "%s: unable to preprocess image\n", __func__);
|
fprintf(stderr, "%s: unable to preprocess image\n", __func__);
|
||||||
delete[] img_res_v.data;
|
delete[] img_res_v.data;
|
||||||
return false;
|
return false;
|
||||||
|
@ -29,9 +29,9 @@ struct llava_image_embed {
|
|||||||
};
|
};
|
||||||
|
|
||||||
/** sanity check for clip <-> llava embed size match */
|
/** sanity check for clip <-> llava embed size match */
|
||||||
LLAVA_API bool llava_validate_embed_size(const llama_context * ctx_llama, const clip_ctx * ctx_clip);
|
LLAVA_API bool llava_validate_embed_size(const struct llama_context * ctx_llama, const struct clip_ctx * ctx_clip);
|
||||||
|
|
||||||
LLAVA_API bool llava_image_embed_make_with_clip_img(clip_ctx * ctx_clip, int n_threads, const clip_image_u8 * img, float ** image_embd_out, int * n_img_pos_out);
|
LLAVA_API bool llava_image_embed_make_with_clip_img(struct clip_ctx * ctx_clip, int n_threads, const struct clip_image_u8 * img, float ** image_embd_out, int * n_img_pos_out);
|
||||||
|
|
||||||
/** build an image embed from image file bytes */
|
/** build an image embed from image file bytes */
|
||||||
LLAVA_API struct llava_image_embed * llava_image_embed_make_with_bytes(struct clip_ctx * ctx_clip, int n_threads, const unsigned char * image_bytes, int image_bytes_length);
|
LLAVA_API struct llava_image_embed * llava_image_embed_make_with_bytes(struct clip_ctx * ctx_clip, int n_threads, const unsigned char * image_bytes, int image_bytes_length);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user