@@ -4086,7 +4086,7 @@ struct ggml_tensor * ggml_mul_mat_id(
4086
4086
GGML_ASSERT(ids->ne[1] == b->ne[1]);
4087
4087
GGML_ASSERT(ids->ne[2] == b->ne[2] && ids->ne[3] == b->ne[3]);
4088
4088
GGML_ASSERT(n_as > 0 && n_as <= GGML_MAX_SRC - 2);
4089
- GGML_ASSERT(id >= 0 && id < n_as );
4089
+ GGML_ASSERT(id >= 0 && id < ids->ne[0] );
4090
4090
4091
4091
bool is_node = false;
4092
4092
@@ -10345,7 +10345,7 @@ static void ggml_compute_forward_get_rows_q(
10345
10345
GGML_TENSOR_BINARY_OP_LOCALS
10346
10346
10347
10347
const int64_t nc = ne00;
10348
- const int64_t nr = ggml_nelements(src1);
10348
+ const int64_t nr = ggml_nelements(src1); GGML_UNUSED(nr);
10349
10349
10350
10350
const enum ggml_type type = src0->type;
10351
10351
ggml_to_float_t const dequantize_row_q = type_traits[type].to_float;
@@ -10356,14 +10356,16 @@ static void ggml_compute_forward_get_rows_q(
10356
10356
assert(ggml_nrows(dst) == nr);
10357
10357
10358
10358
// TODO: multi-thread
10359
- for (int64_t i = 0; i < nr ; ++i ) {
10360
- const int64_t r = ((int32_t *) src1->data)[i];
10361
-
10362
- const int64_t i02 = i/ne10 ;
10359
+ for (int64_t i12 = 0; i12 < ne12 ; ++i12 ) {
10360
+ for ( int64_t i11 = 0; i11 < ne11; ++i11) {
10361
+ for (int64_t i10 = 0; i10 < ne10; ++i10) {
10362
+ const int64_t i01 = *(int32_t *) ((char *) src1->data + i10*nb10 + i11*nb11 + i12*nb12) ;
10363
10363
10364
- dequantize_row_q(
10365
- (const void *) ((char *) src0->data + i02*nb02 + r*nb01),
10366
- (float *) ((char *) dst->data + i*nb1), nc);
10364
+ dequantize_row_q(
10365
+ (const void *) ((char *) src0->data + i01*nb01 + i11*nb02 + i12*nb03),
10366
+ (float *) ((char *) dst->data + i10*nb1 + i11*nb2 + i12*nb3), nc);
10367
+ }
10368
+ }
10367
10369
}
10368
10370
}
10369
10371
@@ -10381,22 +10383,23 @@ static void ggml_compute_forward_get_rows_f16(
10381
10383
GGML_TENSOR_BINARY_OP_LOCALS
10382
10384
10383
10385
const int64_t nc = ne00;
10384
- const int64_t nr = ggml_nelements(src1);
10386
+ const int64_t nr = ggml_nelements(src1); GGML_UNUSED(nr);
10385
10387
10386
10388
assert(ne0 == nc);
10387
10389
assert(ne02 == ne11);
10388
10390
assert(nb00 == sizeof(ggml_fp16_t));
10389
10391
assert(ggml_nrows(dst) == nr);
10390
10392
10391
10393
// TODO: multi-thread
10392
- for (int64_t i = 0; i < nr ; ++i ) {
10393
- const int64_t r = ((int32_t *) src1->data)[i];
10394
-
10395
- const int64_t i02 = i/ne10 ;
10394
+ for (int64_t i12 = 0; i12 < ne12 ; ++i12 ) {
10395
+ for ( int64_t i11 = 0; i11 < ne11; ++i11) {
10396
+ for (int64_t i10 = 0; i10 < ne10; ++i10) {
10397
+ const int64_t i01 = *(int32_t *) ((char *) src1->data + i10*nb10 + i11*nb11 + i12*nb12) ;
10396
10398
10397
- for (int j = 0; j < nc; ++j) {
10398
- ggml_fp16_t v = ((ggml_fp16_t *) ((char *) src0->data + i02*nb02 + r*nb01))[j];
10399
- ((float *) ((char *) dst->data + i*nb1))[j] = GGML_FP16_TO_FP32(v);
10399
+ ggml_fp16_to_fp32_row(
10400
+ (const void *) ((char *) src0->data + i01*nb01 + i11*nb02 + i12*nb03),
10401
+ (float *) ((char *) dst->data + i10*nb1 + i11*nb2 + i12*nb3), nc);
10402
+ }
10400
10403
}
10401
10404
}
10402
10405
}
@@ -10415,14 +10418,14 @@ static void ggml_compute_forward_get_rows_f32(
10415
10418
GGML_TENSOR_BINARY_OP_LOCALS
10416
10419
10417
10420
const int64_t nc = ne00;
10421
+ const int64_t nr = ggml_nelements(src1); GGML_UNUSED(nr);
10418
10422
10419
10423
assert(ne0 == nc);
10420
10424
assert(ne02 == ne11);
10421
10425
assert(nb00 == sizeof(float));
10422
10426
assert(ggml_nrows(dst) == nr);
10423
10427
10424
10428
// TODO: multi-thread
10425
- // TODO: same impl for get_rows_q and get_rows_f16
10426
10429
for (int64_t i12 = 0; i12 < ne12; ++i12) {
10427
10430
for (int64_t i11 = 0; i11 < ne11; ++i11) {
10428
10431
for (int64_t i10 = 0; i10 < ne10; ++i10) {
0 commit comments