@@ -1294,6 +1294,46 @@ public void diff_cleanupMerge(LinkedList<Diff> diffs) {
1294
1294
}
1295
1295
}
1296
1296
1297
+ /**
1298
+ * Rearrange diff boudnaries that split Unicode surrogate pairs.
1299
+ * @param diffs Linked list of diff objects
1300
+ */
1301
+ public void diff_cleanupSplitSurrogates (List <Diff > diffs ) {
1302
+ char lastEnd = 0 ;
1303
+ boolean isFirst = true ;
1304
+ HashSet <Diff > toRemove = new HashSet <Diff >();
1305
+
1306
+ for (Diff aDiff : diffs ) {
1307
+ if (aDiff .text .isEmpty ()) {
1308
+ toRemove .add (aDiff );
1309
+ continue ;
1310
+ }
1311
+
1312
+ char thisTop = aDiff .text .charAt (0 );
1313
+ char thisEnd = aDiff .text .charAt (aDiff .text .length () - 1 );
1314
+
1315
+ if (Character .isHighSurrogate (thisEnd )) {
1316
+ lastEnd = thisEnd ;
1317
+ aDiff .text = aDiff .text .substring (0 , aDiff .text .length () - 1 );
1318
+ }
1319
+
1320
+ if (!isFirst && Character .isHighSurrogate (lastEnd ) && Character .isLowSurrogate (thisTop )) {
1321
+ aDiff .text = lastEnd + aDiff .text ;
1322
+ }
1323
+
1324
+ isFirst = false ;
1325
+
1326
+ if ( aDiff .text .isEmpty () ) {
1327
+ toRemove .add (aDiff );
1328
+ continue ;
1329
+ }
1330
+ }
1331
+
1332
+ for (Diff aDiff : toRemove ) {
1333
+ diffs .remove (aDiff );
1334
+ }
1335
+ }
1336
+
1297
1337
/**
1298
1338
* loc is a location in text1, compute and return the equivalent location in
1299
1339
* text2.
@@ -1430,31 +1470,8 @@ public int diff_levenshtein(List<Diff> diffs) {
1430
1470
*/
1431
1471
public String diff_toDelta (List <Diff > diffs ) {
1432
1472
StringBuilder text = new StringBuilder ();
1433
- char lastEnd = 0 ;
1434
- boolean isFirst = true ;
1473
+ this .diff_cleanupSplitSurrogates (diffs );
1435
1474
for (Diff aDiff : diffs ) {
1436
- if (aDiff .text .isEmpty ()) {
1437
- continue ;
1438
- }
1439
-
1440
- char thisTop = aDiff .text .charAt (0 );
1441
- char thisEnd = aDiff .text .charAt (aDiff .text .length () - 1 );
1442
-
1443
- if (Character .isHighSurrogate (thisEnd )) {
1444
- lastEnd = thisEnd ;
1445
- aDiff .text = aDiff .text .substring (0 , aDiff .text .length () - 1 );
1446
- }
1447
-
1448
- if (! isFirst && Character .isHighSurrogate (lastEnd ) && Character .isLowSurrogate (thisTop )) {
1449
- aDiff .text = lastEnd + aDiff .text ;
1450
- }
1451
-
1452
- isFirst = false ;
1453
-
1454
- if ( aDiff .text .isEmpty () ) {
1455
- continue ;
1456
- }
1457
-
1458
1475
switch (aDiff .operation ) {
1459
1476
case INSERT :
1460
1477
try {
0 commit comments