From 6e9ebda9eacd1c9519439b26eabf0a595b3059c8 Mon Sep 17 00:00:00 2001 From: caitianchi Date: Mon, 13 Jan 2025 22:00:39 +0800 Subject: [PATCH 1/9] init --- Makefile | 6 +-- convert_hf_to_gguf.py | 1 + examples/llava/README-minicpmo2.6.md | 44 +++++++++++++++++++ examples/llava/clip.cpp | 17 +++++++ examples/llava/llava.cpp | 10 +---- examples/llava/minicpmv-cli.cpp | 10 ++++- .../minicpmv-convert-image-encoder-to-gguf.py | 19 +++++--- examples/llava/minicpmv-surgery.py | 2 +- 8 files changed, 91 insertions(+), 18 deletions(-) create mode 100644 examples/llava/README-minicpmo2.6.md diff --git a/Makefile b/Makefile index 19ae0d5f1c87b..18d3815b88b94 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ -ifndef LLAMA_MAKEFILE -$(error The Makefile build is deprecated. Use the CMake build instead. For more details, see https://github.com/ggerganov/llama.cpp/blob/master/docs/build.md) -endif +# ifndef LLAMA_MAKEFILE +# $(error The Makefile build is deprecated. Use the CMake build instead. For more details, see https://github.com/ggerganov/llama.cpp/blob/master/docs/build.md) +# endif # Define the default target now so that it is always the first target BUILD_TARGETS = \ diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 4dc9837ab2d04..63151346945f8 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -579,6 +579,7 @@ def get_vocab_base_pre(self, tokenizer) -> str: # NOTE: if you get an error here, you need to update the convert_hf_to_gguf_update.py script # or pull the latest version of the model from Huggingface # don't edit the hashes manually! + res = "qwen2" if chkhsh == "0ef9807a4087ebef797fc749390439009c3b9eda9ad1a097abbe738f486c01e5": # ref: https://huggingface.co/meta-llama/Meta-Llama-3-8B res = "llama-bpe" diff --git a/examples/llava/README-minicpmo2.6.md b/examples/llava/README-minicpmo2.6.md new file mode 100644 index 0000000000000..fc65f622551e6 --- /dev/null +++ b/examples/llava/README-minicpmo2.6.md @@ -0,0 +1,44 @@ +## MiniCPM-o 2.6 + +### Prepare models and code + +Download [MiniCPM-o-2_6](https://huggingface.co/openbmb/MiniCPM-o-2_6) PyTorch model from huggingface to "MiniCPM-o-2_6" folder. + +Clone llama.cpp: +```bash +git clone git@github.com:OpenBMB/llama.cpp.git +cd llama.cpp +git checkout minicpm-omni +``` + +### Usage of MiniCPM-o 2.6 + +Convert PyTorch model to gguf files (You can also download the converted [gguf](https://huggingface.co/openbmb/MiniCPM-o-2_6-gguf) by us) + +```bash +python ./examples/llava/minicpmv-surgery.py -m ../MiniCPM-o-2_6 +python ./examples/llava/minicpmv-convert-image-encoder-to-gguf.py -m ../MiniCPM-o-2_6 --minicpmv-projector ../MiniCPM-o-2_6/minicpmo.projector --output-dir ../MiniCPM-o-2_6/ --image-mean 0.5 0.5 0.5 --image-std 0.5 0.5 0.5 --minicpmv_version 4 +python ./convert_hf_to_gguf.py ../MiniCPM-o-2_6/model + +# quantize int4 version +./llama-quantize ../MiniCPM-o-2_6/model/ggml-model-f16.gguf ../MiniCPM-o-2_6/model/ggml-model-Q4_K_M.gguf Q4_K_M +``` + +Build for Linux or Mac + +```bash +make +make llama-minicpmv-cli +``` + +Inference on Linux or Mac +``` +# run f16 version +./llama-minicpmv-cli -m ../MiniCPM-o-2_6/model/ggml-model-f16.gguf --mmproj ../MiniCPM-o-2_6/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg -p "What is in the image?" + +# run quantized int4 version +./llama-minicpmv-cli -m ../MiniCPM-o-2_6/model/ggml-model-Q4_K_M.gguf --mmproj ../MiniCPM-o-2_6/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg -p "What is in the image?" + +# or run in interactive mode +./llama-minicpmv-cli -m ../MiniCPM-o-2_6/model/ggml-model-Q4_K_M.gguf --mmproj ../MiniCPM-o-2_6/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg -i +``` diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp index 7a8a3156bfdef..076d07cee8259 100644 --- a/examples/llava/clip.cpp +++ b/examples/llava/clip.cpp @@ -718,6 +718,9 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32 else if (ctx->minicpmv_version == 3) { pos_embed = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, 3584, pos_w * pos_h, 1); } + else if (ctx->minicpmv_version == 4) { + pos_embed = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, 3584, pos_w * pos_h, 1); + } ggml_set_name(pos_embed, "pos_embed"); ggml_set_input(pos_embed); } @@ -1053,6 +1056,11 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32 n_head = hidden_size/d_head; num_query = 64; } + else if (ctx->minicpmv_version == 4) { + hidden_size = 3584; + n_head = hidden_size/d_head; + num_query = 64; + } struct ggml_tensor * Q = ggml_add(ctx0, ggml_mul_mat(ctx0, model.mm_model_attn_q_w, q), model.mm_model_attn_q_b); Q = ggml_scale_inplace(ctx0, Q, 1.0f / sqrt((float)d_head)); @@ -2335,6 +2343,9 @@ int clip_n_patches_by_img(const struct clip_ctx * ctx, struct clip_image_f32 * i else if (ctx->minicpmv_version == 3) { n_patches = 64; } + else if (ctx->minicpmv_version == 4) { + n_patches = 64; + } } else if (ctx->proj_type == PROJECTOR_TYPE_MERGER) { int patch_size = params.patch_size * 2; int x_patch = img->nx / patch_size + (int)(img->nx % patch_size > 0); @@ -2543,6 +2554,9 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima else if (ctx->minicpmv_version == 3) { embed_dim = 3584; } + else if (ctx->minicpmv_version == 4) { + embed_dim = 3584; + } auto pos_embed_t = get_2d_sincos_pos_embed(embed_dim, std::make_pair(pos_w, pos_h)); float * pos_embed_data = (float *)malloc(ggml_nbytes(pos_embed)); @@ -2786,6 +2800,9 @@ int clip_n_mmproj_embd(const struct clip_ctx * ctx) { else if (ctx->minicpmv_version == 3) { return 3584; } + else if (ctx->minicpmv_version == 4) { + return 3584; + } } if (ctx->proj_type == PROJECTOR_TYPE_MERGER) { return ctx->vision_model.mm_1_b->ne[0]; diff --git a/examples/llava/llava.cpp b/examples/llava/llava.cpp index c598caf3dd1eb..f8f16ab585032 100644 --- a/examples/llava/llava.cpp +++ b/examples/llava/llava.cpp @@ -216,7 +216,7 @@ static bool clip_llava_handle_patches(clip_ctx * ctx_clip, std::vector return true; } -static clip_image_f32 * only_v2_5_reshape_by_patch(clip_image_f32 * image, int patch_size) { +static clip_image_f32 * reshape_by_patch(clip_image_f32 * image, int patch_size) { int width = image->nx; int height = image->ny; int num_patches = (height / patch_size) * (width / patch_size); @@ -277,13 +277,7 @@ static bool encode_image_with_clip(clip_ctx * ctx_clip, int n_threads, const cli encoded = clip_image_encode(ctx_clip, n_threads, &img_res_v.data[i], image_embd_v[i]); } else { - int has_minicpmv_projector = clip_is_minicpmv(ctx_clip); - if (has_minicpmv_projector == 2) { - encoded = clip_image_encode(ctx_clip, n_threads, only_v2_5_reshape_by_patch(&img_res_v.data[i], patch_size), image_embd_v[i]); - } - else if (has_minicpmv_projector == 3) { - encoded = clip_image_encode(ctx_clip, n_threads, &img_res_v.data[i], image_embd_v[i]); - } + encoded = clip_image_encode(ctx_clip, n_threads, reshape_by_patch(&img_res_v.data[i], patch_size), image_embd_v[i]); } if (!encoded) { diff --git a/examples/llava/minicpmv-cli.cpp b/examples/llava/minicpmv-cli.cpp index 38c44e130e15c..53d902d616e85 100644 --- a/examples/llava/minicpmv-cli.cpp +++ b/examples/llava/minicpmv-cli.cpp @@ -140,6 +140,9 @@ static void process_image(struct llava_context * ctx_llava, struct llava_image_e else if (has_minicpmv_projector == 3) { system_prompt = "<|im_start|>user\n"; } + else if (has_minicpmv_projector == 4) { + system_prompt = "<|im_start|>user\n"; + } LOG_INF("%s: image token past: %d\n", __func__, n_past); eval_string(ctx_llava->ctx_llama, (system_prompt+"").c_str(), params->n_batch, &n_past, false); process_eval_image_embed(ctx_llava, embeds, params->n_batch, &n_past, idx++); @@ -227,6 +230,9 @@ static struct common_sampler * llama_init(struct llava_context * ctx_llava, comm else if (has_minicpmv_projector == 3) { user_prompt = "<|im_start|>user\n" + prompt; } + else if (has_minicpmv_projector == 4) { + user_prompt = "<|im_start|>user\n" + prompt; + } } eval_string(ctx_llava->ctx_llama, user_prompt.c_str(), params->n_batch, &n_past, false); @@ -236,6 +242,9 @@ static struct common_sampler * llama_init(struct llava_context * ctx_llava, comm else if (has_minicpmv_projector == 3) { eval_string(ctx_llava->ctx_llama, "<|im_end|><|im_start|>assistant\n", params->n_batch, &n_past, false); } + else if (has_minicpmv_projector == 4) { + eval_string(ctx_llava->ctx_llama, "<|im_end|><|im_start|>assistant\n", params->n_batch, &n_past, false); + } // generate the response @@ -308,7 +317,6 @@ int main(int argc, char ** argv) { const auto * tmp = llama_loop(ctx_llava, smpl, n_past); response += tmp; if (strcmp(tmp, "") == 0) break; - if (strstr(tmp, "###")) break; // Yi-VL behavior printf("%s", tmp);// mistral llava-1.6 if (strstr(response.c_str(), "")) break; // minicpm-v fflush(stdout); diff --git a/examples/llava/minicpmv-convert-image-encoder-to-gguf.py b/examples/llava/minicpmv-convert-image-encoder-to-gguf.py index ea773742a832b..ade04fd9c5c00 100644 --- a/examples/llava/minicpmv-convert-image-encoder-to-gguf.py +++ b/examples/llava/minicpmv-convert-image-encoder-to-gguf.py @@ -501,7 +501,7 @@ def bytes_to_unicode(): default_image_std = [0.26862954, 0.26130258, 0.27577711] ap.add_argument('--image-mean', type=float, nargs='+', help='Mean of the images for normalization (overrides processor) ', default=None) ap.add_argument('--image-std', type=float, nargs='+', help='Standard deviation of the images for normalization (overrides processor)', default=None) -ap.add_argument('--minicpmv_version', type=int, help='minicpmv_version: MiniCPM-V-2 use 1; MiniCPM-V-2.5 use 2; MiniCPM-V-2.6 use 3', default=2) +ap.add_argument('--minicpmv_version', type=int, help='minicpmv_version: MiniCPM-V-2 use 1; MiniCPM-V-2.5 use 2; MiniCPM-V-2.6 use 3; MiniCPM-o-2.6 use 4', default=2) # with proper args = ap.parse_args() @@ -545,13 +545,20 @@ def bytes_to_unicode(): minicpmv_version = args.minicpmv_version emb_dim = 4096 +block_count = 26 if minicpmv_version == 1: emb_dim = 2304 + block_count = 26 elif minicpmv_version == 2: emb_dim = 4096 + block_count = 27 elif minicpmv_version == 3: emb_dim = 3584 - + block_count = 27 +elif minicpmv_version == 4: + emb_dim = 3584 + block_count = 27 + default_vision_config = { "hidden_size": 1152, "image_size": 980, @@ -567,7 +574,10 @@ def bytes_to_unicode(): if minicpmv_version == 3: vision_config = SiglipVisionConfig(**default_vision_config) model = SiglipVisionTransformer(vision_config) - +elif minicpmv_version == 4: + vision_config = SiglipVisionConfig(**default_vision_config) + model = SiglipVisionTransformer(vision_config) + processor = None # if model.attn_pool is not None: # model.attn_pool = torch.nn.Identity() @@ -587,7 +597,7 @@ def bytes_to_unicode(): fname_middle = "mmproj-" has_text_encoder = False has_minicpmv_projector = True - minicpmv_version = 3 + minicpmv_version = 4 elif args.vision_only: fname_middle = "vision-" has_text_encoder = False @@ -625,7 +635,6 @@ def bytes_to_unicode(): fout.add_uint32("clip.vision.projection_dim", 0) fout.add_uint32(add_key_str(KEY_ATTENTION_HEAD_COUNT, VISION), 16) fout.add_float32(add_key_str(KEY_ATTENTION_LAYERNORM_EPS, VISION), 1e-6) - block_count = 26 fout.add_uint32(add_key_str(KEY_BLOCK_COUNT, VISION), block_count) if processor is not None: diff --git a/examples/llava/minicpmv-surgery.py b/examples/llava/minicpmv-surgery.py index 748ff5c57824e..ba82116582b1f 100644 --- a/examples/llava/minicpmv-surgery.py +++ b/examples/llava/minicpmv-surgery.py @@ -8,7 +8,7 @@ args = ap.parse_args() # find the model part that includes the the multimodal projector weights -model = AutoModel.from_pretrained(args.model, trust_remote_code=True, local_files_only=True) +model = AutoModel.from_pretrained(args.model, trust_remote_code=True, local_files_only=True, torch_dtype=torch.bfloat16) checkpoint = model.state_dict() # get a list of mm tensor names From ee3fae86d5bf08c1d8f20e6c64315f6f359415c2 Mon Sep 17 00:00:00 2001 From: caitianchi Date: Wed, 15 Jan 2025 12:20:31 +0800 Subject: [PATCH 2/9] add readme --- examples/llava/README-minicpmo2.6.md | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/llava/README-minicpmo2.6.md b/examples/llava/README-minicpmo2.6.md index fc65f622551e6..ea220224ac8aa 100644 --- a/examples/llava/README-minicpmo2.6.md +++ b/examples/llava/README-minicpmo2.6.md @@ -1,4 +1,5 @@ ## MiniCPM-o 2.6 +Currently, this readme only supports minicpm-omni's image capabilities, and we will update the full-mode support as soon as possible. ### Prepare models and code From ed02c70ce49a5616ddfa7848b5a91d8b61efa554 Mon Sep 17 00:00:00 2001 From: caitianchi Date: Thu, 16 Jan 2025 13:57:36 +0800 Subject: [PATCH 3/9] update readme --- examples/llava/README-minicpmo2.6.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/llava/README-minicpmo2.6.md b/examples/llava/README-minicpmo2.6.md index ea220224ac8aa..10b637eb75bc9 100644 --- a/examples/llava/README-minicpmo2.6.md +++ b/examples/llava/README-minicpmo2.6.md @@ -18,7 +18,7 @@ Convert PyTorch model to gguf files (You can also download the converted [gguf]( ```bash python ./examples/llava/minicpmv-surgery.py -m ../MiniCPM-o-2_6 -python ./examples/llava/minicpmv-convert-image-encoder-to-gguf.py -m ../MiniCPM-o-2_6 --minicpmv-projector ../MiniCPM-o-2_6/minicpmo.projector --output-dir ../MiniCPM-o-2_6/ --image-mean 0.5 0.5 0.5 --image-std 0.5 0.5 0.5 --minicpmv_version 4 +python ./examples/llava/minicpmv-convert-image-encoder-to-gguf.py -m ../MiniCPM-o-2_6 --minicpmv-projector ../MiniCPM-o-2_6/minicpmv.projector --output-dir ../MiniCPM-o-2_6/ --image-mean 0.5 0.5 0.5 --image-std 0.5 0.5 0.5 --minicpmv_version 4 python ./convert_hf_to_gguf.py ../MiniCPM-o-2_6/model # quantize int4 version From 52c6abe0df092b531546564a79c9f79f6561c5b1 Mon Sep 17 00:00:00 2001 From: caitianchi Date: Thu, 16 Jan 2025 14:11:41 +0800 Subject: [PATCH 4/9] no use make --- Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 18d3815b88b94..19ae0d5f1c87b 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ -# ifndef LLAMA_MAKEFILE -# $(error The Makefile build is deprecated. Use the CMake build instead. For more details, see https://github.com/ggerganov/llama.cpp/blob/master/docs/build.md) -# endif +ifndef LLAMA_MAKEFILE +$(error The Makefile build is deprecated. Use the CMake build instead. For more details, see https://github.com/ggerganov/llama.cpp/blob/master/docs/build.md) +endif # Define the default target now so that it is always the first target BUILD_TARGETS = \ From c69f9baf2b0fd692fb974496a2573eb12c379c16 Mon Sep 17 00:00:00 2001 From: caitianchi Date: Thu, 16 Jan 2025 14:16:38 +0800 Subject: [PATCH 5/9] update readme --- examples/llava/README-minicpmo2.6.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/examples/llava/README-minicpmo2.6.md b/examples/llava/README-minicpmo2.6.md index 10b637eb75bc9..8713a43d64fd8 100644 --- a/examples/llava/README-minicpmo2.6.md +++ b/examples/llava/README-minicpmo2.6.md @@ -25,11 +25,12 @@ python ./convert_hf_to_gguf.py ../MiniCPM-o-2_6/model ./llama-quantize ../MiniCPM-o-2_6/model/ggml-model-f16.gguf ../MiniCPM-o-2_6/model/ggml-model-Q4_K_M.gguf Q4_K_M ``` -Build for Linux or Mac +Build llama.cpp using `CMake`: +https://github.com/ggerganov/llama.cpp/blob/master/docs/build.md ```bash -make -make llama-minicpmv-cli +cmake -B build +cmake --build build --config Release ``` Inference on Linux or Mac From 27d0ec81eae603a42a34a7185ba483920b33f539 Mon Sep 17 00:00:00 2001 From: caitianchi Date: Sat, 18 Jan 2025 22:12:13 +0800 Subject: [PATCH 6/9] update fix code --- examples/llava/clip.cpp | 12 ++++++++++-- examples/llava/llava.cpp | 3 +++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp index 076d07cee8259..4251f884e3537 100644 --- a/examples/llava/clip.cpp +++ b/examples/llava/clip.cpp @@ -2049,6 +2049,7 @@ static std::vector> uhd_slice_image(const clip_imag images[images.size()-1].push_back(patch); } } + delete refine_image; } return images; } @@ -2087,6 +2088,13 @@ bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, cli clip_image_f32_free(res); } } + for (size_t i = 0; i < imgs.size(); ++i) { + for (size_t j = 0; j < imgs[i].size(); ++j) { + if (imgs[i][j] != nullptr) { + delete imgs[i][j]; + } + } + } return true; } else if (ctx->has_qwen2vl_merger) { @@ -2525,8 +2533,8 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima // -> https://huggingface.co/HuggingFaceM4/siglip-so400m-14-980-flash-attn2-navit/blob/d66538faeba44480d0bfaa42145eef26f9423199/modeling_siglip.py#L316 struct ggml_tensor * positions = ggml_graph_get_tensor(gf, "positions"); int* positions_data = (int*)malloc(ggml_nbytes(positions)); - int bucket_coords_h[70]; - int bucket_coords_w[70]; + int bucket_coords_h[1024]; + int bucket_coords_w[1024]; for (int i = 0; i < pos_h; i++){ bucket_coords_h[i] = std::floor(70.0*i/pos_h); } diff --git a/examples/llava/llava.cpp b/examples/llava/llava.cpp index f8f16ab585032..2cac7933d2f2a 100644 --- a/examples/llava/llava.cpp +++ b/examples/llava/llava.cpp @@ -307,6 +307,9 @@ static bool encode_image_with_clip(clip_ctx * ctx_clip, int n_threads, const cli load_image_size->height = img->ny; clip_add_load_image_size(ctx_clip, load_image_size); LOG_INF("%s: load_image_size %d %d\n", __func__, load_image_size->width, load_image_size->height); + delete[] img_res_v.data; + img_res_v.size = 0; + img_res_v.data = nullptr; } else if (strcmp(mm_patch_merge_type, "spatial_unpad") != 0) { // flat / default llava-1.5 type embedding From 136644489660c6a268c808dee95b3adb1e954c4f Mon Sep 17 00:00:00 2001 From: caitianchi Date: Sat, 18 Jan 2025 23:49:48 +0800 Subject: [PATCH 7/9] fix editorconfig-checker --- examples/llava/minicpmv-convert-image-encoder-to-gguf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/llava/minicpmv-convert-image-encoder-to-gguf.py b/examples/llava/minicpmv-convert-image-encoder-to-gguf.py index ade04fd9c5c00..9b196757f07c9 100644 --- a/examples/llava/minicpmv-convert-image-encoder-to-gguf.py +++ b/examples/llava/minicpmv-convert-image-encoder-to-gguf.py @@ -558,7 +558,7 @@ def bytes_to_unicode(): elif minicpmv_version == 4: emb_dim = 3584 block_count = 27 - + default_vision_config = { "hidden_size": 1152, "image_size": 980, @@ -577,7 +577,7 @@ def bytes_to_unicode(): elif minicpmv_version == 4: vision_config = SiglipVisionConfig(**default_vision_config) model = SiglipVisionTransformer(vision_config) - + processor = None # if model.attn_pool is not None: # model.attn_pool = torch.nn.Identity() From fee2aa687a8cb56b09143968be809a91a93f5093 Mon Sep 17 00:00:00 2001 From: caitianchi Date: Mon, 20 Jan 2025 10:40:16 +0800 Subject: [PATCH 8/9] no change convert py --- convert_hf_to_gguf.py | 1 - 1 file changed, 1 deletion(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 63151346945f8..4dc9837ab2d04 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -579,7 +579,6 @@ def get_vocab_base_pre(self, tokenizer) -> str: # NOTE: if you get an error here, you need to update the convert_hf_to_gguf_update.py script # or pull the latest version of the model from Huggingface # don't edit the hashes manually! - res = "qwen2" if chkhsh == "0ef9807a4087ebef797fc749390439009c3b9eda9ad1a097abbe738f486c01e5": # ref: https://huggingface.co/meta-llama/Meta-Llama-3-8B res = "llama-bpe" From 27f5e8a10a3c8916f18b786e8d24db4c672b9891 Mon Sep 17 00:00:00 2001 From: caitianchi Date: Tue, 21 Jan 2025 17:08:45 +0800 Subject: [PATCH 9/9] use clip_image_u8_free --- examples/llava/clip.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp index 4251f884e3537..24073c5a9b15f 100644 --- a/examples/llava/clip.cpp +++ b/examples/llava/clip.cpp @@ -2049,7 +2049,7 @@ static std::vector> uhd_slice_image(const clip_imag images[images.size()-1].push_back(patch); } } - delete refine_image; + clip_image_u8_free(refine_image); } return images; } @@ -2091,7 +2091,7 @@ bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, cli for (size_t i = 0; i < imgs.size(); ++i) { for (size_t j = 0; j < imgs[i].size(); ++j) { if (imgs[i][j] != nullptr) { - delete imgs[i][j]; + clip_image_u8_free(imgs[i][j]); } } }