llama : fix integer overflow during quantization (#6063)

ggerganov · web-flow · commit 4755afd1cbd4 · 2024-03-14T22:58:41.000+02:00
diff --git a/llama.cpp b/llama.cpp
@@ -11977,7 +11977,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
     return new_type;
 }
 
-static int32_t llama_tensor_quantize_internal(enum ggml_type new_type, const float * f32_data, void * new_data, const int chunk_size, int nrows, int n_per_row, const float * imatrix, std::vector<std::thread> & workers, const int nthread) {
+static size_t llama_tensor_quantize_internal(enum ggml_type new_type, const float * f32_data, void * new_data, const int chunk_size, int nrows, int n_per_row, const float * imatrix, std::vector<std::thread> & workers, const int nthread) {
     std::mutex mutex;
     int counter = 0;
     size_t new_size = 0;

Original file line number	Diff line number	Diff line change
`@@ -11977,7 +11977,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n`
`11977`	`11977`	`return new_type;`
`11978`	`11978`	`}`
`11979`	`11979`
`11980`		`-static int32_t llama_tensor_quantize_internal(enum ggml_type new_type, const float * f32_data, void * new_data, const int chunk_size, int nrows, int n_per_row, const float * imatrix, std::vector<std::thread> & workers, const int nthread) {`
	`11980`	`+static size_t llama_tensor_quantize_internal(enum ggml_type new_type, const float * f32_data, void * new_data, const int chunk_size, int nrows, int n_per_row, const float * imatrix, std::vector<std::thread> & workers, const int nthread) {`
`11981`	`11981`	`std::mutex mutex;`
`11982`	`11982`	`int counter = 0;`
`11983`	`11983`	`size_t new_size = 0;`