Skip to content

Commit 880f579

Browse files
authored
llama : fix integer overflow during quantization (#4284)
happens with multi-threaded quantization of Qwen-72B ggml-ci
1 parent 8d6d9f0 commit 880f579

File tree

1 file changed

+13
-10
lines changed

1 file changed

+13
-10
lines changed

llama.cpp

+13-10
Original file line numberDiff line numberDiff line change
@@ -7655,18 +7655,21 @@ static void llama_convert_tensor_internal(
76557655
return;
76567656
}
76577657

7658-
auto block_size = tensor->type == GGML_TYPE_F16 ? 1 : (size_t)ggml_blck_size(tensor->type);
7659-
auto block_size_bytes = ggml_type_size(tensor->type);
7658+
size_t block_size = tensor->type == GGML_TYPE_F16 ? 1 : (size_t)ggml_blck_size(tensor->type);
7659+
size_t block_size_bytes = ggml_type_size(tensor->type);
76607660

76617661
GGML_ASSERT(nelements % block_size == 0);
7662-
auto nblocks = nelements / block_size;
7663-
auto blocks_per_thread = nblocks / nthread;
7664-
auto spare_blocks = nblocks - (blocks_per_thread * nthread); // if blocks aren't divisible by thread count
7665-
7666-
for (auto tnum = 0, in_buff_offs = 0, out_buff_offs = 0; tnum < nthread; tnum++) {
7667-
auto thr_blocks = blocks_per_thread + (tnum == nthread - 1 ? spare_blocks : 0); // num blocks for this thread
7668-
auto thr_elems = thr_blocks * block_size; // number of elements for this thread
7669-
auto thr_block_bytes = thr_blocks * block_size_bytes; // number of input bytes for this thread
7662+
size_t nblocks = nelements / block_size;
7663+
size_t blocks_per_thread = nblocks / nthread;
7664+
size_t spare_blocks = nblocks - (blocks_per_thread * nthread); // if blocks aren't divisible by thread count
7665+
7666+
size_t in_buff_offs = 0;
7667+
size_t out_buff_offs = 0;
7668+
7669+
for (int tnum = 0; tnum < nthread; tnum++) {
7670+
size_t thr_blocks = blocks_per_thread + (tnum == nthread - 1 ? spare_blocks : 0); // num blocks for this thread
7671+
size_t thr_elems = thr_blocks * block_size; // number of elements for this thread
7672+
size_t thr_block_bytes = thr_blocks * block_size_bytes; // number of input bytes for this thread
76707673

76717674
auto compute = [qtype] (ggml_type typ, uint8_t * inbuf, float * outbuf, int nels) {
76727675
if (typ == GGML_TYPE_F16) {

0 commit comments

Comments
 (0)