@@ -7655,18 +7655,21 @@ static void llama_convert_tensor_internal(
7655
7655
return ;
7656
7656
}
7657
7657
7658
- auto block_size = tensor->type == GGML_TYPE_F16 ? 1 : (size_t)ggml_blck_size(tensor->type);
7659
- auto block_size_bytes = ggml_type_size(tensor->type);
7658
+ size_t block_size = tensor->type == GGML_TYPE_F16 ? 1 : (size_t )ggml_blck_size (tensor->type );
7659
+ size_t block_size_bytes = ggml_type_size (tensor->type );
7660
7660
7661
7661
GGML_ASSERT (nelements % block_size == 0 );
7662
- auto nblocks = nelements / block_size;
7663
- auto blocks_per_thread = nblocks / nthread;
7664
- auto spare_blocks = nblocks - (blocks_per_thread * nthread); // if blocks aren't divisible by thread count
7665
-
7666
- for (auto tnum = 0, in_buff_offs = 0, out_buff_offs = 0; tnum < nthread; tnum++) {
7667
- auto thr_blocks = blocks_per_thread + (tnum == nthread - 1 ? spare_blocks : 0); // num blocks for this thread
7668
- auto thr_elems = thr_blocks * block_size; // number of elements for this thread
7669
- auto thr_block_bytes = thr_blocks * block_size_bytes; // number of input bytes for this thread
7662
+ size_t nblocks = nelements / block_size;
7663
+ size_t blocks_per_thread = nblocks / nthread;
7664
+ size_t spare_blocks = nblocks - (blocks_per_thread * nthread); // if blocks aren't divisible by thread count
7665
+
7666
+ size_t in_buff_offs = 0 ;
7667
+ size_t out_buff_offs = 0 ;
7668
+
7669
+ for (int tnum = 0 ; tnum < nthread; tnum++) {
7670
+ size_t thr_blocks = blocks_per_thread + (tnum == nthread - 1 ? spare_blocks : 0 ); // num blocks for this thread
7671
+ size_t thr_elems = thr_blocks * block_size; // number of elements for this thread
7672
+ size_t thr_block_bytes = thr_blocks * block_size_bytes; // number of input bytes for this thread
7670
7673
7671
7674
auto compute = [qtype] (ggml_type typ, uint8_t * inbuf, float * outbuf, int nels) {
7672
7675
if (typ == GGML_TYPE_F16) {
0 commit comments