fix warnings

This commit is contained in:
caitianchi 2024-05-29 04:09:44 +08:00
parent 02eb445d73
commit 07f48f9669
7 changed files with 42 additions and 41 deletions

View File

@ -577,7 +577,6 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
/*.no_alloc =*/ true, /*.no_alloc =*/ true,
}; };
LOG_TEE("%s: ctx->buf_compute_meta.size(): %d \n", __func__, ctx->buf_compute_meta.size());
struct ggml_context * ctx0 = ggml_init(params); struct ggml_context * ctx0 = ggml_init(params);
struct ggml_cgraph * gf = ggml_new_graph(ctx0); struct ggml_cgraph * gf = ggml_new_graph(ctx0);
@ -1446,7 +1445,7 @@ bool clip_image_load_from_bytes(const unsigned char * bytes, size_t bytes_length
return true; return true;
} }
static void normalize_image_u8_to_f32(struct clip_ctx * ctx, const clip_image_u8* src, clip_image_f32* dst) { void normalize_image_u8_to_f32(struct clip_ctx * ctx, const clip_image_u8* src, clip_image_f32* dst) {
dst->nx = src->nx; dst->nx = src->nx;
dst->ny = src->ny; dst->ny = src->ny;
dst->buf.resize(src->buf.size()); dst->buf.resize(src->buf.size());
@ -1511,7 +1510,7 @@ int clip_n_patches(const struct clip_ctx * ctx) {
return n_patches; return n_patches;
} }
std::vector<std::vector<std::vector<float>>> get_1d_sincos_pos_embed_from_grid_new(int embed_dim, const std::vector<std::vector<float>>& pos) { static std::vector<std::vector<std::vector<float>>> get_1d_sincos_pos_embed_from_grid_new(int embed_dim, const std::vector<std::vector<float>>& pos) {
assert(embed_dim % 2 == 0); assert(embed_dim % 2 == 0);
int H = pos.size(); int H = pos.size();
int W = pos[0].size(); int W = pos[0].size();
@ -1535,7 +1534,7 @@ std::vector<std::vector<std::vector<float>>> get_1d_sincos_pos_embed_from_grid_n
return emb; return emb;
} }
std::vector<std::vector<std::vector<float>>> get_2d_sincos_pos_embed_from_grid(int embed_dim, const std::vector<std::vector<std::vector<float>>>& grid) { static std::vector<std::vector<std::vector<float>>> get_2d_sincos_pos_embed_from_grid(int embed_dim, const std::vector<std::vector<std::vector<float>>>& grid) {
assert(embed_dim % 2 == 0); assert(embed_dim % 2 == 0);
std::vector<std::vector<std::vector<float>>> emb_h = get_1d_sincos_pos_embed_from_grid_new(embed_dim / 2, grid[0]); // (H, W, D/2) std::vector<std::vector<std::vector<float>>> emb_h = get_1d_sincos_pos_embed_from_grid_new(embed_dim / 2, grid[0]); // (H, W, D/2)
std::vector<std::vector<std::vector<float>>> emb_w = get_1d_sincos_pos_embed_from_grid_new(embed_dim / 2, grid[1]); // (H, W, D/2) std::vector<std::vector<std::vector<float>>> emb_w = get_1d_sincos_pos_embed_from_grid_new(embed_dim / 2, grid[1]); // (H, W, D/2)
@ -1555,7 +1554,7 @@ std::vector<std::vector<std::vector<float>>> get_2d_sincos_pos_embed_from_grid(i
return emb; return emb;
} }
std::vector<std::vector<float>> get_2d_sincos_pos_embed(int embed_dim, const std::pair<int, int> image_size) { static std::vector<std::vector<float>> get_2d_sincos_pos_embed(int embed_dim, const std::pair<int, int> image_size) {
int grid_h_size = image_size.first; int grid_h_size = image_size.first;
int grid_w_size = image_size.second; int grid_w_size = image_size.second;

View File

@ -69,7 +69,7 @@ CLIP_API bool clip_image_load_from_file(const char * fname, struct clip_image_u8
/** interpret bytes as an image file with length bytes_length, and use the result to populate img */ /** interpret bytes as an image file with length bytes_length, and use the result to populate img */
CLIP_API bool clip_image_load_from_bytes(const unsigned char * bytes, size_t bytes_length, struct clip_image_u8 * img); CLIP_API bool clip_image_load_from_bytes(const unsigned char * bytes, size_t bytes_length, struct clip_image_u8 * img);
static void normalize_image_u8_to_f32(struct clip_ctx * ctx, const clip_image_u8* src, clip_image_f32* dst); CLIP_API void normalize_image_u8_to_f32(struct clip_ctx * ctx, const clip_image_u8* src, clip_image_f32* dst);
CLIP_API struct ggml_tensor * clip_get_newline_tensor(const struct clip_ctx * ctx); CLIP_API struct ggml_tensor * clip_get_newline_tensor(const struct clip_ctx * ctx);

View File

@ -21,8 +21,9 @@ static void llama_log_callback_logTee(ggml_log_level level, const char * text, v
LOG_TEE("%s", text); LOG_TEE("%s", text);
} }
struct minicpmv_context * minicpmv_init(gpt_params * params, const std::string & fname, int &n_past){ static struct minicpmv_context * minicpmv_init(gpt_params * params, const std::string & fname, int &n_past){
auto image_embed_slices = minicpmv_image_embed(params, fname); auto embeds = minicpmv_image_embed(params, fname);
auto image_embed_slices = embeds->image_embeds;
if (!image_embed_slices[0][0]) { if (!image_embed_slices[0][0]) {
std::cerr << "error: failed to load image " << fname << ". Terminating\n\n"; std::cerr << "error: failed to load image " << fname << ". Terminating\n\n";
return NULL; return NULL;
@ -52,14 +53,13 @@ struct minicpmv_context * minicpmv_init(gpt_params * params, const std::string &
float t_process_image_ms = (t_process_image_end_us - t_process_image_start_us) / 1000.0; float t_process_image_ms = (t_process_image_end_us - t_process_image_start_us) / 1000.0;
LOG_TEE("\n%s: llama process image in %8.2f ms.\n", __func__, t_process_image_ms); LOG_TEE("\n%s: llama process image in %8.2f ms.\n", __func__, t_process_image_ms);
llava_image_embed_free_slice(image_embed_slices); llava_image_embed_free_uhd(embeds);
return ctx_llava; return ctx_llava;
} }
struct llama_sampling_context * llama_init(struct minicpmv_context * ctx_llava, gpt_params * params, std::string prompt, int &n_past, bool is_first = false){ static struct llama_sampling_context * llama_init(struct minicpmv_context * ctx_llava, gpt_params * params, std::string prompt, int &n_past, bool is_first = false){
std::string user_prompt = prompt; std::string user_prompt = prompt;
if (!is_first) user_prompt = "<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n" + prompt; if (!is_first) user_prompt = "<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n" + prompt;
const int max_tgt_len = params->n_predict < 0 ? 256 : params->n_predict;
eval_string(ctx_llava->ctx_llama, user_prompt.c_str(), params->n_batch, &n_past, false); eval_string(ctx_llava->ctx_llama, user_prompt.c_str(), params->n_batch, &n_past, false);
eval_string(ctx_llava->ctx_llama, "<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n", params->n_batch, &n_past, false); eval_string(ctx_llava->ctx_llama, "<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n", params->n_batch, &n_past, false);
@ -71,7 +71,7 @@ struct llama_sampling_context * llama_init(struct minicpmv_context * ctx_llava,
return ctx_sampling; return ctx_sampling;
} }
const char * llama_loop(struct minicpmv_context * ctx_llava,struct llama_sampling_context * ctx_sampling, int &n_past){ static const char * llama_loop(struct minicpmv_context * ctx_llava,struct llama_sampling_context * ctx_sampling, int &n_past){
const char * tmp = sample(ctx_sampling, ctx_llava->ctx_llama, &n_past); const char * tmp = sample(ctx_sampling, ctx_llava->ctx_llama, &n_past);
return tmp; return tmp;

View File

@ -108,11 +108,11 @@ bool llava_eval_image_embed(llama_context * ctx_llama, const struct llava_image_
return true; return true;
} }
int ensure_divide(int length, int patch_size) { static int ensure_divide(int length, int patch_size) {
return std::max(static_cast<int>(std::round(static_cast<float>(length) / patch_size) * patch_size), patch_size); return std::max(static_cast<int>(std::round(static_cast<float>(length) / patch_size) * patch_size), patch_size);
} }
std::pair<int, int> uhd_find_best_resize(std::pair<int, int> original_size, int scale_resolution, int patch_size, bool allow_upscale = false) { static std::pair<int, int> uhd_find_best_resize(std::pair<int, int> original_size, int scale_resolution, int patch_size, bool allow_upscale = false) {
int width = original_size.first; int width = original_size.first;
int height = original_size.second; int height = original_size.second;
if ((width * height > scale_resolution * scale_resolution) || allow_upscale) { if ((width * height > scale_resolution * scale_resolution) || allow_upscale) {
@ -129,7 +129,7 @@ inline float clip(float x, float lower, float upper) {
return std::max(lower, std::min(x, upper)); return std::max(lower, std::min(x, upper));
} }
std::pair<int, int> uhd_get_refine_size(std::pair<int, int> original_size, std::pair<int, int> grid, int scale_resolution, int patch_size, bool allow_upscale = false) { static std::pair<int, int> uhd_get_refine_size(std::pair<int, int> original_size, std::pair<int, int> grid, int scale_resolution, int patch_size, bool allow_upscale = false) {
int width, height; int width, height;
std::tie(width, height) = original_size; std::tie(width, height) = original_size;
int grid_x, grid_y; int grid_x, grid_y;
@ -218,7 +218,7 @@ static bool bicubic_resize(const clip_image_u8 &img, clip_image_u8 &dst, int tar
// -> https://arxiv.org/pdf/2403.11703 // -> https://arxiv.org/pdf/2403.11703
// -> https://github.com/thunlp/LLaVA-UHD // -> https://github.com/thunlp/LLaVA-UHD
// -> https://github.com/thunlp/LLaVA-UHD/blob/302301bc2175f7e717fb8548516188e89f649753/llava_uhd/train/llava-uhd/slice_logic.py#L118 // -> https://github.com/thunlp/LLaVA-UHD/blob/302301bc2175f7e717fb8548516188e89f649753/llava_uhd/train/llava-uhd/slice_logic.py#L118
std::vector<std::vector<clip_image_u8 *>> uhd_slice_image(const clip_image_u8 * img, const int max_slice_nums=9, const int scale_resolution=448, const int patch_size=14, const bool never_split=false) { static std::vector<std::vector<clip_image_u8 *>> uhd_slice_image(const clip_image_u8 * img, const int max_slice_nums=9, const int scale_resolution=448, const int patch_size=14) {
const std::pair<int, int> original_size={img->nx,img->ny}; const std::pair<int, int> original_size={img->nx,img->ny};
const int original_width = img->nx; const int original_width = img->nx;
const int original_height = img->ny; const int original_height = img->ny;
@ -311,30 +311,30 @@ std::vector<std::vector<clip_image_u8 *>> uhd_slice_image(const clip_image_u8 *
return images; return images;
} }
std::vector<std::vector<struct llava_image_embed *>> llava_image_embed_make_with_bytes_uhd(struct clip_ctx * ctx_clip, int n_threads, const clip_image_u8 * img) { struct uhd_image_embed * llava_image_embed_make_with_bytes_uhd(struct clip_ctx * ctx_clip, int n_threads, const clip_image_u8 * img) {
std::vector<std::vector<clip_image_u8 *>> imgs = uhd_slice_image(img); std::vector<std::vector<clip_image_u8 *>> imgs = uhd_slice_image(img);
for (size_t i = 0; i < imgs.size(); ++i){ for (size_t i = 0; i < imgs.size(); ++i){
for (size_t j = 0; j < imgs[i].size(); ++j) { for (size_t j = 0; j < imgs[i].size(); ++j) {
LOG_TEE("%s: %d %d\n", __func__,imgs[i][j]->nx,imgs[i][j]->ny); LOG_TEE("%s: %d %d\n", __func__,imgs[i][j]->nx,imgs[i][j]->ny);
} }
} }
std::vector<std::vector<llava_image_embed *>> results; struct uhd_image_embed * results = new uhd_image_embed();
for (size_t i = 0; i < imgs.size(); ++i){ for (size_t i = 0; i < imgs.size(); ++i){
results.push_back(std::vector<llava_image_embed *>()); results->image_embeds.push_back(std::vector<llava_image_embed *>());
for (size_t j = 0; j < imgs[i].size(); ++j) { for (size_t j = 0; j < imgs[i].size(); ++j) {
float* image_embed = NULL; float* image_embed = NULL;
int n_image_pos = 0; int n_image_pos = 0;
bool image_embed_result = llava_image_embed_make_with_clip_img(ctx_clip, n_threads, imgs[i][j], &image_embed, &n_image_pos); bool image_embed_result = llava_image_embed_make_with_clip_img(ctx_clip, n_threads, imgs[i][j], &image_embed, &n_image_pos);
if (!image_embed_result) { if (!image_embed_result) {
LOG_TEE("%s: coulnd't embed the image\n", __func__); LOG_TEE("%s: coulnd't embed the image\n", __func__);
return std::vector<std::vector<struct llava_image_embed *>>(); return NULL;
} }
auto result = (llava_image_embed*)malloc(sizeof(llava_image_embed)); auto result = (llava_image_embed*)malloc(sizeof(llava_image_embed));
result->embed = image_embed; result->embed = image_embed;
result->n_image_pos = n_image_pos; result->n_image_pos = n_image_pos;
results[i].push_back(result); results->image_embeds[i].push_back(result);
} }
} }
return results; return results;
@ -374,7 +374,8 @@ static bool load_file_to_bytes(const char* path, unsigned char** bytesOut, long
} }
bool llava_image_embed_make_with_clip_img_ollama(clip_ctx * ctx_clip, int n_threads, const clip_image_u8 * img, float ** image_embd_out, int * n_img_pos_out) { bool llava_image_embed_make_with_clip_img_ollama(clip_ctx * ctx_clip, int n_threads, const clip_image_u8 * img, float ** image_embd_out, int * n_img_pos_out) {
auto image_embed_slices = llava_image_embed_make_with_bytes_uhd(ctx_clip, n_threads, img); auto embeds = llava_image_embed_make_with_bytes_uhd(ctx_clip, n_threads, img);
auto image_embed_slices = embeds->image_embeds;
if (!image_embed_slices[0][0]){ if (!image_embed_slices[0][0]){
LOG_TEE("%s: failed to embeding image\n", __func__); LOG_TEE("%s: failed to embeding image\n", __func__);
return false; return false;
@ -416,35 +417,35 @@ bool llava_image_embed_make_with_clip_img_ollama(clip_ctx * ctx_clip, int n_thre
return true; return true;
} }
std::vector<std::vector<struct llava_image_embed *>> llava_image_embed_make_with_filename_uhd(struct clip_ctx * ctx_clip, int n_threads, const char * image_path) { struct uhd_image_embed * llava_image_embed_make_with_filename_uhd(struct clip_ctx * ctx_clip, int n_threads, const char * image_path) {
unsigned char* image_bytes; unsigned char* image_bytes;
long image_bytes_length; long image_bytes_length;
auto loaded = load_file_to_bytes(image_path, &image_bytes, &image_bytes_length); auto loaded = load_file_to_bytes(image_path, &image_bytes, &image_bytes_length);
if (!loaded) { if (!loaded) {
LOG_TEE("%s: failed to load %s\n", __func__, image_path); LOG_TEE("%s: failed to load %s\n", __func__, image_path);
return std::vector<std::vector<struct llava_image_embed *>>(); return NULL;
} }
clip_image_u8 * img = clip_image_u8_init(); clip_image_u8 * img = clip_image_u8_init();
if (!clip_image_load_from_bytes(image_bytes, image_bytes_length, img)) { if (!clip_image_load_from_bytes(image_bytes, image_bytes_length, img)) {
clip_image_u8_free(img); clip_image_u8_free(img);
LOG_TEE("%s: can't load image from bytes, is it a valid image?", __func__); LOG_TEE("%s: can't load image from bytes, is it a valid image?", __func__);
return std::vector<std::vector<struct llava_image_embed *>>(); return NULL;
} }
std::vector<std::vector<struct llava_image_embed *>> embeds = llava_image_embed_make_with_bytes_uhd(ctx_clip, n_threads, img); struct uhd_image_embed * embeds = llava_image_embed_make_with_bytes_uhd(ctx_clip, n_threads, img);
clip_image_u8_free(img); clip_image_u8_free(img);
free(image_bytes); free(image_bytes);
return embeds; return embeds;
} }
void llava_image_embed_free_uhd(std::vector<std::vector<struct llava_image_embed *>> embed) { void llava_image_embed_free_uhd(struct uhd_image_embed * embed) {
for (size_t i = 0; i < embed.size(); ++i){ for (size_t i = 0; i < embed->image_embeds.size(); ++i){
for (size_t j = 0; j < embed[i].size(); ++j){ for (size_t j = 0; j < embed->image_embeds[i].size(); ++j){
free(embed[i][j]->embed); free(embed->image_embeds[i][j]->embed);
free(embed[i][j]); free(embed->image_embeds[i][j]->embed);
} }
embed[i] = std::vector<struct llava_image_embed *>(); embed->image_embeds[i] = std::vector<struct llava_image_embed *>();
} }
embed = std::vector<std::vector<struct llava_image_embed *>>(); embed->image_embeds = std::vector<std::vector<struct llava_image_embed *>>();
} }

View File

@ -18,6 +18,9 @@
#endif #endif
struct clip_ctx; struct clip_ctx;
struct uhd_image_embed {
std::vector<std::vector<struct llava_image_embed *>> image_embeds;
};
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
@ -34,11 +37,11 @@ MINICPMV_API bool llava_validate_embed_size(const struct llama_context * ctx_lla
MINICPMV_API bool llava_image_embed_make_with_clip_img(struct clip_ctx * ctx_clip, int n_threads, const struct clip_image_u8 * img, float ** image_embd_out, int * n_img_pos_out); MINICPMV_API bool llava_image_embed_make_with_clip_img(struct clip_ctx * ctx_clip, int n_threads, const struct clip_image_u8 * img, float ** image_embd_out, int * n_img_pos_out);
/** build an image embed from image file bytes */ /** build an image embed from image file bytes */
MINICPMV_API std::vector<std::vector<struct llava_image_embed *>> llava_image_embed_make_with_bytes_uhd(struct clip_ctx * ctx_clip, int n_threads, const unsigned char * image_bytes, int image_bytes_length); MINICPMV_API struct uhd_image_embed * llava_image_embed_make_with_bytes_uhd(struct clip_ctx * ctx_clip, int n_threads, const clip_image_u8 * img);
/** build an image embed from a path to an image filename */ /** build an image embed from a path to an image filename */
MINICPMV_API bool llava_image_embed_make_with_clip_img_ollama(struct clip_ctx * ctx_clip, int n_threads, const struct clip_image_u8 * img, float ** image_embd_out, int * n_img_pos_out); MINICPMV_API bool llava_image_embed_make_with_clip_img_ollama(struct clip_ctx * ctx_clip, int n_threads, const struct clip_image_u8 * img, float ** image_embd_out, int * n_img_pos_out);
MINICPMV_API std::vector<std::vector<struct llava_image_embed *>> llava_image_embed_make_with_filename_uhd(struct clip_ctx * ctx_clip, int n_threads, const char * image_path); MINICPMV_API struct uhd_image_embed * llava_image_embed_make_with_filename_uhd(struct clip_ctx * ctx_clip, int n_threads, const char * image_path);
MINICPMV_API void llava_image_embed_free_uhd(std::vector<std::vector<struct llava_image_embed *>> embed); MINICPMV_API void llava_image_embed_free_uhd(struct uhd_image_embed * embed);
/** free an embedding made with llava_image_embed_make_* */ /** free an embedding made with llava_image_embed_make_* */
/** write the image represented by embed into the llama context with batch size n_batch, starting at context pos n_past. on completion, n_past points to the next position in the context after the image embed. */ /** write the image represented by embed into the llama context with batch size n_batch, starting at context pos n_past. on completion, n_past points to the next position in the context after the image embed. */

View File

@ -23,8 +23,6 @@ struct llama_model * llava_init(gpt_params * params) {
} }
struct minicpmv_context * llava_init_context(gpt_params * params, llama_model * model) { struct minicpmv_context * llava_init_context(gpt_params * params, llama_model * model) {
const char * clip_path = params->mmproj.c_str();
auto prompt = params->prompt; auto prompt = params->prompt;
if (prompt.empty()) { if (prompt.empty()) {
prompt = "describe the image in detail."; prompt = "describe the image in detail.";
@ -65,9 +63,9 @@ struct clip_ctx * clip_init_context(gpt_params * params) {
return ctx_clip; return ctx_clip;
} }
std::vector<std::vector<struct llava_image_embed *>> minicpmv_image_embed(gpt_params * params, const std::string & fname){ struct uhd_image_embed * minicpmv_image_embed(gpt_params * params, const std::string & fname){
auto ctx_clip = clip_init_context(params); auto ctx_clip = clip_init_context(params);
auto image_embed_and_slices = llava_image_embed_make_with_filename_slice(ctx_clip, params->n_threads, fname.c_str()); auto image_embed_and_slices = llava_image_embed_make_with_filename_uhd(ctx_clip, params->n_threads, fname.c_str());
if (ctx_clip) { if (ctx_clip) {
clip_free(ctx_clip); clip_free(ctx_clip);
ctx_clip = NULL; ctx_clip = NULL;

View File

@ -34,7 +34,7 @@ MINICPMV_API struct minicpmv_context * llava_init_context(gpt_params * params, l
MINICPMV_API void llava_free(struct minicpmv_context * ctx_llava); MINICPMV_API void llava_free(struct minicpmv_context * ctx_llava);
MINICPMV_API struct clip_ctx * clip_init_context(gpt_params * params); MINICPMV_API struct clip_ctx * clip_init_context(gpt_params * params);
MINICPMV_API std::vector<std::vector<struct llava_image_embed *>> minicpmv_image_embed(gpt_params * params, const std::string & fname); MINICPMV_API struct uhd_image_embed * minicpmv_image_embed(gpt_params * params, const std::string & fname);
MINICPMV_API bool eval_tokens(struct llama_context * ctx_llama, std::vector<llama_token> tokens, int n_batch, int * n_past); MINICPMV_API bool eval_tokens(struct llama_context * ctx_llama, std::vector<llama_token> tokens, int n_batch, int * n_past);
MINICPMV_API bool eval_id(struct llama_context * ctx_llama, int id, int * n_past); MINICPMV_API bool eval_id(struct llama_context * ctx_llama, int id, int * n_past);