@@ -476,9 +476,14 @@ public static function split(
476
476
int $ flags = 0 ,
477
477
bool $ captureOffset = false ,
478
478
bool $ noEmpty = false ,
479
+ bool $ utf8Offset = false ,
479
480
): array {
480
481
$ flags |= ($ captureOffset ? PREG_SPLIT_OFFSET_CAPTURE : 0 ) | ($ noEmpty ? PREG_SPLIT_NO_EMPTY : 0 ) | PREG_SPLIT_DELIM_CAPTURE ;
481
- return self ::pcre ('preg_split ' , [$ pattern , $ subject , -1 , $ flags ]);
482
+ $ m = self ::pcre ('preg_split ' , [$ pattern , $ subject , -1 , $ flags ]);
483
+ if ($ utf8Offset && ($ flags & PREG_SPLIT_OFFSET_CAPTURE )) {
484
+ return self ::bytesToChars ($ subject , [$ m ])[0 ];
485
+ }
486
+ return $ m ;
482
487
}
483
488
484
489
@@ -493,14 +498,22 @@ public static function match(
493
498
int $ offset = 0 ,
494
499
bool $ captureOffset = false ,
495
500
bool $ unmatchedAsNull = false ,
501
+ bool $ utf8Offset = false ,
496
502
): ?array {
497
503
$ flags |= ($ captureOffset ? PREG_OFFSET_CAPTURE : 0 ) | ($ unmatchedAsNull ? PREG_UNMATCHED_AS_NULL : 0 );
504
+ if ($ utf8Offset ) {
505
+ $ offset = strlen (self ::substring ($ subject , 0 , $ offset ));
506
+ }
498
507
if ($ offset > strlen ($ subject )) {
499
508
return null ;
500
509
}
501
- return self ::pcre ('preg_match ' , [$ pattern , $ subject , &$ m , $ flags , $ offset ])
502
- ? $ m
503
- : null ;
510
+ if (!self ::pcre ('preg_match ' , [$ pattern , $ subject , &$ m , $ flags , $ offset ])) {
511
+ return null ;
512
+ }
513
+ if ($ utf8Offset && ($ flags & PREG_OFFSET_CAPTURE )) {
514
+ return self ::bytesToChars ($ subject , [$ m ])[0 ];
515
+ }
516
+ return $ m ;
504
517
}
505
518
506
519
@@ -516,8 +529,12 @@ public static function matchAll(
516
529
bool $ captureOffset = false ,
517
530
bool $ unmatchedAsNull = false ,
518
531
bool $ patternOrder = false ,
532
+ bool $ utf8Offset = false ,
519
533
): array {
520
534
$ flags |= ($ captureOffset ? PREG_OFFSET_CAPTURE : 0 ) | ($ unmatchedAsNull ? PREG_UNMATCHED_AS_NULL : 0 ) | ($ patternOrder ? PREG_PATTERN_ORDER : 0 );
535
+ if ($ utf8Offset ) {
536
+ $ offset = strlen (self ::substring ($ subject , 0 , $ offset ));
537
+ }
521
538
if ($ offset > strlen ($ subject )) {
522
539
return [];
523
540
}
@@ -526,6 +543,9 @@ public static function matchAll(
526
543
($ flags & PREG_PATTERN_ORDER ) ? $ flags : ($ flags | PREG_SET_ORDER ),
527
544
$ offset ,
528
545
]);
546
+ if ($ utf8Offset && ($ flags & PREG_OFFSET_CAPTURE )) {
547
+ return self ::bytesToChars ($ subject , $ m );
548
+ }
529
549
return $ m ;
530
550
}
531
551
@@ -540,12 +560,16 @@ public static function replace(
540
560
int $ limit = -1 ,
541
561
bool $ captureOffset = false ,
542
562
bool $ unmatchedAsNull = false ,
563
+ bool $ utf8Offset = false ,
543
564
): string {
544
565
if (is_object ($ replacement ) || is_array ($ replacement )) {
545
566
if (!is_callable ($ replacement , false , $ textual )) {
546
567
throw new Nette \InvalidStateException ("Callback ' $ textual' is not callable. " );
547
568
}
548
569
$ flags = ($ captureOffset ? PREG_OFFSET_CAPTURE : 0 ) | ($ unmatchedAsNull ? PREG_UNMATCHED_AS_NULL : 0 );
570
+ if ($ utf8Offset && $ captureOffset ) {
571
+ $ replacement = fn ($ m ) => $ replacement (self ::bytesToChars ($ subject , [$ m ])[0 ]);
572
+ }
549
573
return self ::pcre ('preg_replace_callback ' , [$ pattern , $ replacement , $ subject , $ limit , 0 , $ flags ]);
550
574
551
575
} elseif (is_array ($ pattern ) && is_string (key ($ pattern ))) {
@@ -557,6 +581,22 @@ public static function replace(
557
581
}
558
582
559
583
584
+ private static function bytesToChars (string $ s , array $ groups ): array
585
+ {
586
+ $ lastBytes = $ lastChars = 0 ;
587
+ foreach ($ groups as &$ matches ) {
588
+ foreach ($ matches as &$ match ) {
589
+ if ($ match [1 ] > $ lastBytes ) {
590
+ $ lastChars += self ::length (substr ($ s , $ lastBytes , $ match [1 ] - $ lastBytes ));
591
+ $ lastBytes = $ match [1 ];
592
+ }
593
+ $ match [1 ] = $ lastChars ;
594
+ }
595
+ }
596
+ return $ groups ;
597
+ }
598
+
599
+
560
600
/** @internal */
561
601
public static function pcre (string $ func , array $ args )
562
602
{
0 commit comments