Reduce warnings. (ggml-org#439)

ycros · web-flow · commit f6ba36dff6df · 2023-09-16T18:52:09.000+08:00
diff --git a/ggml-opencl.cpp b/ggml-opencl.cpp
@@ -1529,7 +1529,7 @@ static void ggml_cl_mul_mat_f32(const ggml_tensor * src0, const ggml_tensor * sr
                                             &queue, &ev_sgemm);
 
             if (status != clblast::StatusCode::kSuccess) {
- 				printf("\nF32 Matmul Failed (%d): [dims: %lld,%lld,%lld,%lld] You may be out of VRAM. Please check if you have enough.\n",status,ne00,ne01,ne10,ne11);
+ 				printf("\nF32 Matmul Failed (%d): [dims: %ld,%ld,%ld,%ld] You may be out of VRAM. Please check if you have enough.\n",static_cast<int>(status),ne00,ne01,ne10,ne11);
                 GGML_ASSERT(false);
             }
 
@@ -1634,7 +1634,7 @@ static void ggml_cl_mul_mat_f16(const ggml_tensor * src0, const ggml_tensor * sr
                                             &queue, &ev_sgemm);
 
             if (status != clblast::StatusCode::kSuccess) {
-				printf("\nF16 Matmul Failed (%d): [dims: %lld,%lld,%lld,%lld] You may be out of VRAM. Please check if you have enough.\n",status,ne00,ne01,ne10,ne11);
+				printf("\nF16 Matmul Failed (%d): [dims: %ld,%ld,%ld,%ld] You may be out of VRAM. Please check if you have enough.\n",static_cast<int>(status),ne00,ne01,ne10,ne11);
                 GGML_ASSERT(false);
             }
 
@@ -1754,7 +1754,7 @@ static void ggml_cl_mul_mat_q_f32(const ggml_tensor * src0, const ggml_tensor *
                                             &queue, events.data() + ev_idx++);
 
                 if (status != clblast::StatusCode::kSuccess) {
-					printf("\nQF32 Matmul Failed (%d): [dims: %lld,%lld,%lld,%lld] You may be out of VRAM. Please check if you have enough.\n",status,ne00,ne01,ne10,ne11);
+					printf("\nQF32 Matmul Failed (%d): [dims: %ld,%ld,%ld,%ld] You may be out of VRAM. Please check if you have enough.\n",static_cast<int>(status),ne00,ne01,ne10,ne11);
                     GGML_ASSERT(false);
                 }
             }
diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp
@@ -691,7 +691,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
 
             const struct rwkv_file_header & header = rwkv_ctx_v3->instance->model.header;
             const size_t n_vocab = header.n_vocab;
-            printf("\nDetected Vocab: %d",n_vocab);
+            printf("\nDetected Vocab: %zu",n_vocab);
             if(n_vocab>60000)
             {
                 printf("\nUsing WORLD TOKENIZER");
@@ -729,7 +729,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
             auto statebufsiz = rwkv_v2_get_state_buffer_element_count(rwkv_ctx_v2) * sizeof(float) + padding;
             auto logitbufsiz = rwkv_v2_get_logits_buffer_element_count(rwkv_ctx_v2) * sizeof(float) + padding;
 
-            printf("\nRWKV old Init: State Buffer:%u, Logit Buffer:%u\n", statebufsiz, logitbufsiz);
+            printf("\nRWKV old Init: State Buffer:%lu, Logit Buffer:%lu\n", statebufsiz, logitbufsiz);
             rwkv_ctx_v2->state_out = (float *)malloc(statebufsiz);
             rwkv_ctx_v2->logits_out = (float *)malloc(logitbufsiz);
             rwkv_ctx_v2->state_in = nullptr;
@@ -757,7 +757,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
             auto statebufsiz = rwkv_get_state_buffer_element_count(rwkv_ctx_v3) * sizeof(float) + padding;
             auto logitbufsiz = rwkv_get_logits_buffer_element_count(rwkv_ctx_v3) * sizeof(float) + padding;
 
-            printf("\nRWKV Init: State Buffer:%u, Logit Buffer:%u\n", statebufsiz, logitbufsiz);
+            printf("\nRWKV Init: State Buffer:%lu, Logit Buffer:%lu\n", statebufsiz, logitbufsiz);
             rwkv_ctx_v3->state_out = (float *)malloc(statebufsiz);
             rwkv_ctx_v3->logits_out = (float *)malloc(logitbufsiz);
             rwkv_ctx_v3->state_in = nullptr;
@@ -1284,7 +1284,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
     //prepare banned tokens
     if(banned_token_ids.size()==0 && banned_tokens.size()>0)
     {
-        printf("\n[First Run] Banning %d token sequences...",banned_tokens.size());
+        printf("\n[First Run] Banning %zu token sequences...",banned_tokens.size());
         for(int v=0;v<n_vocab;++v)
         {
             std::string word = FileFormatTokenizeID(v,file_format);
@@ -1297,7 +1297,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
                 }
             }
         }
-        printf("\nBanned a total of %d tokens.\n",banned_token_ids.size());
+        printf("\nBanned a total of %zu tokens.\n",banned_token_ids.size());
     }
 
     if(debugmode!=-1)
@@ -1337,7 +1337,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
         //print progress
         if (!startedsampling && debugmode!=-1)
         {
-            printf("\rProcessing Prompt%s (%d / %d tokens)", (blasmode ? " [BLAS]" : ""), input_consumed, embd_inp.size());
+            printf("\rProcessing Prompt%s (%d / %zu tokens)", (blasmode ? " [BLAS]" : ""), input_consumed, embd_inp.size());
         }
         fflush(stdout);
 
diff --git a/otherarch/ggml_v1.c b/otherarch/ggml_v1.c
@@ -1,6 +1,3 @@
-// Defines CLOCK_MONOTONIC and asprintf on Linux
-#define _GNU_SOURCE
-
 #include "ggml_v1.h"
 
 #if defined(_MSC_VER) || defined(__MINGW32__)
diff --git a/otherarch/ggml_v2-opencl.cpp b/otherarch/ggml_v2-opencl.cpp
@@ -573,7 +573,7 @@ static void ggml_v2_cl_mul_mat_f32(const ggml_v2_tensor * src0, const ggml_v2_te
                                             &queue, &ev_sgemm);
 
             if (status != clblast::StatusCode::kSuccess) {
-                printf("\nF32 Matmul Failed (%d): [dims: %lld,%lld,%lld,%lld] You may be out of VRAM. Please check if you have enough.\n",status,ne00,ne01,ne10,ne11);
+                printf("\nF32 Matmul Failed (%d): [dims: %ld,%ld,%ld,%ld] You may be out of VRAM. Please check if you have enough.\n",static_cast<int>(status),ne00,ne01,ne10,ne11);
                 GGML_V2_ASSERT(false);
             }
 
@@ -672,7 +672,7 @@ static void ggml_v2_cl_mul_mat_f16(const ggml_v2_tensor * src0, const ggml_v2_te
                                             &queue, &ev_sgemm);
 
             if (status != clblast::StatusCode::kSuccess) {
-                printf("\nF16 Matmul Failed (%d): [dims: %lld,%lld,%lld,%lld] You may be out of VRAM. Please check if you have enough.\n",status,ne00,ne01,ne10,ne11);
+                printf("\nF16 Matmul Failed (%d): [dims: %ld,%ld,%ld,%ld] You may be out of VRAM. Please check if you have enough.\n",static_cast<int>(status),ne00,ne01,ne10,ne11);
                 GGML_V2_ASSERT(false);
             }
 
@@ -780,7 +780,7 @@ static void ggml_v2_cl_mul_mat_q_f32(const ggml_v2_tensor * src0, const ggml_v2_
                                             &queue, &ev_sgemm);
 
                 if (status != clblast::StatusCode::kSuccess) {
-                    printf("\nQF32 Matmul Failed (%d): [dims: %lld,%lld,%lld,%lld] You may be out of VRAM. Please check if you have enough.\n",status,ne00,ne01,ne10,ne11);
+                    printf("\nQF32 Matmul Failed (%d): [dims: %ld,%ld,%ld,%ld] You may be out of VRAM. Please check if you have enough.\n",static_cast<int>(status),ne00,ne01,ne10,ne11);
                     GGML_V2_ASSERT(false);
                 }
             }
diff --git a/otherarch/ggml_v2.c b/otherarch/ggml_v2.c
@@ -1,6 +1,3 @@
-// Defines CLOCK_MONOTONIC on Linux
-#define _GNU_SOURCE
-
 #include "ggml_v2.h"
 
 #if defined(_MSC_VER) || defined(__MINGW32__)
diff --git a/otherarch/gpt2_v2.cpp b/otherarch/gpt2_v2.cpp
@@ -150,7 +150,7 @@ ModelLoadResult gpt2_v2_model_load(const std::string & fname, gpt2_v2_model & mo
         params.mem_size   = ctx_size;
         params.mem_buffer = NULL;
         params.no_alloc   = false;
-       
+
 
         model.ctx = ggml_v2_init(params);
         if (!model.ctx) {
@@ -237,7 +237,7 @@ ModelLoadResult gpt2_v2_model_load(const std::string & fname, gpt2_v2_model & mo
 
         const int n_mem      = n_layer*n_ctx;
         const int n_elements = n_embd*n_mem;
-       
+
         model.memory_k = ggml_v2_new_tensor_1d(ctx, memory_type, n_elements*1.5);
         model.memory_v = ggml_v2_new_tensor_1d(ctx, memory_type, n_elements*1.5);
 
@@ -287,7 +287,7 @@ ModelLoadResult gpt2_v2_model_load(const std::string & fname, gpt2_v2_model & mo
             }
 
             if (tensor->ne[0] != ne[0] || tensor->ne[1] != ne[1]) {
-                fprintf(stderr, "%s: tensor '%s' has wrong shape in model file: got [%lld, %lld], expected [%lld, %lld]\n",
+                fprintf(stderr, "%s: tensor '%s' has wrong shape in model file: got [%ld, %ld], expected [%d, %d]\n",
                         __func__, name.data(), tensor->ne[0], tensor->ne[1], ne[0], ne[1]);
                 return ModelLoadResult::FAIL;
             }
@@ -379,7 +379,7 @@ bool gpt2_v2_eval(
     params.mem_size   = buf_size;
     params.mem_buffer = buf;
     params.no_alloc   = false;
-    
+
 
     struct ggml_v2_context * ctx0 = ggml_v2_init(params);
     struct ggml_v2_cgraph gf = {};
diff --git a/otherarch/gptj_v2.cpp b/otherarch/gptj_v2.cpp
@@ -150,7 +150,7 @@ ModelLoadResult gptj_v2_model_load(const std::string & fname, gptj_v2_model & mo
         params.mem_size   = ctx_size;
         params.mem_buffer = NULL;
         params.no_alloc   = false;
-        
+
 
         model.ctx = ggml_v2_init(params);
         if (!model.ctx) {
@@ -281,7 +281,7 @@ ModelLoadResult gptj_v2_model_load(const std::string & fname, gptj_v2_model & mo
                 fprintf(stderr, "%s: tensor '%s' has wrong size in model file\n", __func__, name.data());
                 return ModelLoadResult::FAIL;
             }
-          
+
 
             if (tensor->ne[0] != ne[0] || tensor->ne[1] != ne[1]) {
 
@@ -294,11 +294,11 @@ ModelLoadResult gptj_v2_model_load(const std::string & fname, gptj_v2_model & mo
                 }
                 else
                 {
-                    fprintf(stderr, "%s: tensor '%s' has wrong shape in model file: got [%d, %d], expected [%d, %d]\n",
+                    fprintf(stderr, "%s: tensor '%s' has wrong shape in model file: got [%ld, %ld], expected [%d, %d]\n",
                             __func__, name.data(), tensor->ne[0], tensor->ne[1], ne[0], ne[1]);
                     return ModelLoadResult::FAIL;
                 }
-               
+
             }
 
             // for debugging
@@ -387,7 +387,7 @@ bool gptj_v2_eval(
     params.mem_size   = buf_size;
     params.mem_buffer = buf;
     params.no_alloc   = false;
-    
+
 
     struct ggml_v2_context * ctx0 = ggml_v2_init(params);
     struct ggml_v2_cgraph gf = {};
diff --git a/otherarch/gptj_v3.cpp b/otherarch/gptj_v3.cpp
@@ -304,7 +304,7 @@ ModelLoadResult gptj_model_load(const std::string & fname, gptj_model & model, g
                 }
                 else
                 {
-                    fprintf(stderr, "%s: tensor '%s' has wrong shape in model file: got [%d, %d], expected [%d, %d]\n",
+                    fprintf(stderr, "%s: tensor '%s' has wrong shape in model file: got [%ld, %ld], expected [%d, %d]\n",
                             __func__, name.data(), tensor->ne[0], tensor->ne[1], ne[0], ne[1]);
                     return ModelLoadResult::FAIL;
                 }
diff --git a/otherarch/llama_v3.h b/otherarch/llama_v3.h
@@ -243,10 +243,9 @@ extern "C" {
     // Various functions for loading a ggml llama model.
     // Allocate (almost) all memory needed for the model.
     // Return NULL on failure
-    LLAMA_V3_API DEPRECATED(struct llama_v3_context * llama_v3_init_from_file(
+    LLAMA_V3_API struct llama_v3_context * llama_v3_init_from_file(
                              const char * path_model,
-            struct llama_v3_context_params   params),
-            "please use llama_v3_load_model_from_file combined with llama_v3_new_context_with_model instead");
+            struct llama_v3_context_params   params);
 
     // Frees all allocated memory
     LLAMA_V3_API void llama_v3_free(struct llama_v3_context * ctx);
@@ -263,12 +262,11 @@ extern "C" {
     // The model needs to be reloaded before applying a new adapter, otherwise the adapter
     // will be applied on top of the previous one
     // Returns 0 on success
-    LLAMA_V3_API DEPRECATED(int llama_v3_apply_lora_from_file(
+    LLAMA_V3_API int llama_v3_apply_lora_from_file(
             struct llama_v3_context * ctx,
                       const char * path_lora,
                       const char * path_base_model,
-                             int   n_threads),
-            "please use llama_v3_model_apply_lora_from_file instead");
+                             int   n_threads);
 
     LLAMA_V3_API int llama_v3_model_apply_lora_from_file(
             const struct llama_v3_model * model,
diff --git a/otherarch/rwkv_v2.cpp b/otherarch/rwkv_v2.cpp
@@ -367,8 +367,8 @@ struct rwkv_v2_context * rwkv_v2_init_from_file(const char * file_path, uint32_t
     // Verify order of dimensions
     struct ggml_v2_tensor * emb = model->emb;
     RWKV_V2_ASSERT_NULL(emb->n_dims == 2, "Unexpected dimension count of embedding matrix %d", emb->n_dims);
-    RWKV_V2_ASSERT_NULL(emb->ne[0] == model->n_embed, "Unexpected dimension of embedding matrix %lld", emb->ne[0]);
-    RWKV_V2_ASSERT_NULL(emb->ne[1] == model->n_vocab, "Unexpected dimension of embedding matrix %lld", emb->ne[1]);
+    RWKV_V2_ASSERT_NULL(emb->ne[0] == model->n_embed, "Unexpected dimension of embedding matrix %ld", emb->ne[0]);
+    RWKV_V2_ASSERT_NULL(emb->ne[1] == model->n_vocab, "Unexpected dimension of embedding matrix %ld", emb->ne[1]);
 
     int32_t n_embed = model->n_embed;
     int32_t n_layer = model->n_layer;

Original file line number	Diff line number	Diff line change
`@@ -1529,7 +1529,7 @@ static void ggml_cl_mul_mat_f32(const ggml_tensor * src0, const ggml_tensor * sr`
`1529`	`1529`	`&queue, &ev_sgemm);`
`1530`	`1530`
`1531`	`1531`	`if (status != clblast::StatusCode::kSuccess) {`
`1532`		`- printf("\nF32 Matmul Failed (%d): [dims: %lld,%lld,%lld,%lld] You may be out of VRAM. Please check if you have enough.\n",status,ne00,ne01,ne10,ne11);`
	`1532`	`+ printf("\nF32 Matmul Failed (%d): [dims: %ld,%ld,%ld,%ld] You may be out of VRAM. Please check if you have enough.\n",static_cast<int>(status),ne00,ne01,ne10,ne11);`
`1533`	`1533`	`GGML_ASSERT(false);`
`1534`	`1534`	`}`
`1535`	`1535`
`@@ -1634,7 +1634,7 @@ static void ggml_cl_mul_mat_f16(const ggml_tensor * src0, const ggml_tensor * sr`
`1634`	`1634`	`&queue, &ev_sgemm);`
`1635`	`1635`
`1636`	`1636`	`if (status != clblast::StatusCode::kSuccess) {`
`1637`		`- printf("\nF16 Matmul Failed (%d): [dims: %lld,%lld,%lld,%lld] You may be out of VRAM. Please check if you have enough.\n",status,ne00,ne01,ne10,ne11);`
	`1637`	`+ printf("\nF16 Matmul Failed (%d): [dims: %ld,%ld,%ld,%ld] You may be out of VRAM. Please check if you have enough.\n",static_cast<int>(status),ne00,ne01,ne10,ne11);`
`1638`	`1638`	`GGML_ASSERT(false);`
`1639`	`1639`	`}`
`1640`	`1640`
`@@ -1754,7 +1754,7 @@ static void ggml_cl_mul_mat_q_f32(const ggml_tensor * src0, const ggml_tensor *`
`1754`	`1754`	`&queue, events.data() + ev_idx++);`
`1755`	`1755`
`1756`	`1756`	`if (status != clblast::StatusCode::kSuccess) {`
`1757`		`- printf("\nQF32 Matmul Failed (%d): [dims: %lld,%lld,%lld,%lld] You may be out of VRAM. Please check if you have enough.\n",status,ne00,ne01,ne10,ne11);`
	`1757`	`+ printf("\nQF32 Matmul Failed (%d): [dims: %ld,%ld,%ld,%ld] You may be out of VRAM. Please check if you have enough.\n",static_cast<int>(status),ne00,ne01,ne10,ne11);`
`1758`	`1758`	`GGML_ASSERT(false);`
`1759`	`1759`	`}`
`1760`	`1760`	`}`
Original file line number	Diff line number	Diff line change
`@@ -691,7 +691,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in`
`691`	`691`
`692`	`692`	`const struct rwkv_file_header & header = rwkv_ctx_v3->instance->model.header;`
`693`	`693`	`const size_t n_vocab = header.n_vocab;`
`694`		`- printf("\nDetected Vocab: %d",n_vocab);`
	`694`	`+ printf("\nDetected Vocab: %zu",n_vocab);`
`695`	`695`	`if(n_vocab>60000)`
`696`	`696`	`{`
`697`	`697`	`printf("\nUsing WORLD TOKENIZER");`
`@@ -729,7 +729,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in`
`729`	`729`	`auto statebufsiz = rwkv_v2_get_state_buffer_element_count(rwkv_ctx_v2) * sizeof(float) + padding;`
`730`	`730`	`auto logitbufsiz = rwkv_v2_get_logits_buffer_element_count(rwkv_ctx_v2) * sizeof(float) + padding;`
`731`	`731`
`732`		`- printf("\nRWKV old Init: State Buffer:%u, Logit Buffer:%u\n", statebufsiz, logitbufsiz);`
	`732`	`+ printf("\nRWKV old Init: State Buffer:%lu, Logit Buffer:%lu\n", statebufsiz, logitbufsiz);`
`733`	`733`	`rwkv_ctx_v2->state_out = (float *)malloc(statebufsiz);`
`734`	`734`	`rwkv_ctx_v2->logits_out = (float *)malloc(logitbufsiz);`
`735`	`735`	`rwkv_ctx_v2->state_in = nullptr;`
`@@ -757,7 +757,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in`
`757`	`757`	`auto statebufsiz = rwkv_get_state_buffer_element_count(rwkv_ctx_v3) * sizeof(float) + padding;`
`758`	`758`	`auto logitbufsiz = rwkv_get_logits_buffer_element_count(rwkv_ctx_v3) * sizeof(float) + padding;`
`759`	`759`
`760`		`- printf("\nRWKV Init: State Buffer:%u, Logit Buffer:%u\n", statebufsiz, logitbufsiz);`
	`760`	`+ printf("\nRWKV Init: State Buffer:%lu, Logit Buffer:%lu\n", statebufsiz, logitbufsiz);`
`761`	`761`	`rwkv_ctx_v3->state_out = (float *)malloc(statebufsiz);`
`762`	`762`	`rwkv_ctx_v3->logits_out = (float *)malloc(logitbufsiz);`
`763`	`763`	`rwkv_ctx_v3->state_in = nullptr;`
`@@ -1284,7 +1284,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o`
`1284`	`1284`	`//prepare banned tokens`
`1285`	`1285`	`if(banned_token_ids.size()==0 && banned_tokens.size()>0)`
`1286`	`1286`	`{`
`1287`		`- printf("\n[First Run] Banning %d token sequences...",banned_tokens.size());`
	`1287`	`+ printf("\n[First Run] Banning %zu token sequences...",banned_tokens.size());`
`1288`	`1288`	`for(int v=0;v<n_vocab;++v)`
`1289`	`1289`	`{`
`1290`	`1290`	`std::string word = FileFormatTokenizeID(v,file_format);`
`@@ -1297,7 +1297,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o`
`1297`	`1297`	`}`
`1298`	`1298`	`}`
`1299`	`1299`	`}`
`1300`		`- printf("\nBanned a total of %d tokens.\n",banned_token_ids.size());`
	`1300`	`+ printf("\nBanned a total of %zu tokens.\n",banned_token_ids.size());`
`1301`	`1301`	`}`
`1302`	`1302`
`1303`	`1303`	`if(debugmode!=-1)`
`@@ -1337,7 +1337,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o`
`1337`	`1337`	`//print progress`
`1338`	`1338`	`if (!startedsampling && debugmode!=-1)`
`1339`	`1339`	`{`
`1340`		`- printf("\rProcessing Prompt%s (%d / %d tokens)", (blasmode ? " [BLAS]" : ""), input_consumed, embd_inp.size());`
	`1340`	`+ printf("\rProcessing Prompt%s (%d / %zu tokens)", (blasmode ? " [BLAS]" : ""), input_consumed, embd_inp.size());`
`1341`	`1341`	`}`
`1342`	`1342`	`fflush(stdout);`
`1343`	`1343`
Original file line number	Diff line number	Diff line change
`@@ -573,7 +573,7 @@ static void ggml_v2_cl_mul_mat_f32(const ggml_v2_tensor * src0, const ggml_v2_te`
`573`	`573`	`&queue, &ev_sgemm);`
`574`	`574`
`575`	`575`	`if (status != clblast::StatusCode::kSuccess) {`
`576`		`- printf("\nF32 Matmul Failed (%d): [dims: %lld,%lld,%lld,%lld] You may be out of VRAM. Please check if you have enough.\n",status,ne00,ne01,ne10,ne11);`
	`576`	`+ printf("\nF32 Matmul Failed (%d): [dims: %ld,%ld,%ld,%ld] You may be out of VRAM. Please check if you have enough.\n",static_cast<int>(status),ne00,ne01,ne10,ne11);`
`577`	`577`	`GGML_V2_ASSERT(false);`
`578`	`578`	`}`
`579`	`579`
`@@ -672,7 +672,7 @@ static void ggml_v2_cl_mul_mat_f16(const ggml_v2_tensor * src0, const ggml_v2_te`
`672`	`672`	`&queue, &ev_sgemm);`
`673`	`673`
`674`	`674`	`if (status != clblast::StatusCode::kSuccess) {`
`675`		`- printf("\nF16 Matmul Failed (%d): [dims: %lld,%lld,%lld,%lld] You may be out of VRAM. Please check if you have enough.\n",status,ne00,ne01,ne10,ne11);`
	`675`	`+ printf("\nF16 Matmul Failed (%d): [dims: %ld,%ld,%ld,%ld] You may be out of VRAM. Please check if you have enough.\n",static_cast<int>(status),ne00,ne01,ne10,ne11);`
`676`	`676`	`GGML_V2_ASSERT(false);`
`677`	`677`	`}`
`678`	`678`
`@@ -780,7 +780,7 @@ static void ggml_v2_cl_mul_mat_q_f32(const ggml_v2_tensor * src0, const ggml_v2_`
`780`	`780`	`&queue, &ev_sgemm);`
`781`	`781`
`782`	`782`	`if (status != clblast::StatusCode::kSuccess) {`
`783`		`- printf("\nQF32 Matmul Failed (%d): [dims: %lld,%lld,%lld,%lld] You may be out of VRAM. Please check if you have enough.\n",status,ne00,ne01,ne10,ne11);`
	`783`	`+ printf("\nQF32 Matmul Failed (%d): [dims: %ld,%ld,%ld,%ld] You may be out of VRAM. Please check if you have enough.\n",static_cast<int>(status),ne00,ne01,ne10,ne11);`
`784`	`784`	`GGML_V2_ASSERT(false);`
`785`	`785`	`}`
`786`	`786`	`}`
Original file line number	Diff line number	Diff line change
`@@ -304,7 +304,7 @@ ModelLoadResult gptj_model_load(const std::string & fname, gptj_model & model, g`
`304`	`304`	`}`
`305`	`305`	`else`
`306`	`306`	`{`
`307`		`- fprintf(stderr, "%s: tensor '%s' has wrong shape in model file: got [%d, %d], expected [%d, %d]\n",`
	`307`	`+ fprintf(stderr, "%s: tensor '%s' has wrong shape in model file: got [%ld, %ld], expected [%d, %d]\n",`
`308`	`308`	`__func__, name.data(), tensor->ne[0], tensor->ne[1], ne[0], ne[1]);`
`309`	`309`	`return ModelLoadResult::FAIL;`
`310`	`310`	`}`