Skip to content
This repository was archived by the owner on Aug 5, 2024. It is now read-only.

Commit d681ef6

Browse files
committed
Refactor Java version to use cleanupSplitSurrogates
1 parent 191b698 commit d681ef6

File tree

1 file changed

+41
-24
lines changed

1 file changed

+41
-24
lines changed

java/src/name/fraser/neil/plaintext/diff_match_patch.java

+41-24
Original file line numberDiff line numberDiff line change
@@ -1294,6 +1294,46 @@ public void diff_cleanupMerge(LinkedList<Diff> diffs) {
12941294
}
12951295
}
12961296

1297+
/**
1298+
* Rearrange diff boudnaries that split Unicode surrogate pairs.
1299+
* @param diffs Linked list of diff objects
1300+
*/
1301+
public void diff_cleanupSplitSurrogates(List<Diff> diffs) {
1302+
char lastEnd = 0;
1303+
boolean isFirst = true;
1304+
HashSet<Diff> toRemove = new HashSet<Diff>();
1305+
1306+
for (Diff aDiff : diffs) {
1307+
if (aDiff.text.isEmpty()) {
1308+
toRemove.add(aDiff);
1309+
continue;
1310+
}
1311+
1312+
char thisTop = aDiff.text.charAt(0);
1313+
char thisEnd = aDiff.text.charAt(aDiff.text.length() - 1);
1314+
1315+
if (Character.isHighSurrogate(thisEnd)) {
1316+
lastEnd = thisEnd;
1317+
aDiff.text = aDiff.text.substring(0, aDiff.text.length() - 1);
1318+
}
1319+
1320+
if (!isFirst && Character.isHighSurrogate(lastEnd) && Character.isLowSurrogate(thisTop)) {
1321+
aDiff.text = lastEnd + aDiff.text;
1322+
}
1323+
1324+
isFirst = false;
1325+
1326+
if ( aDiff.text.isEmpty() ) {
1327+
toRemove.add(aDiff);
1328+
continue;
1329+
}
1330+
}
1331+
1332+
for (Diff aDiff : toRemove) {
1333+
diffs.remove(aDiff);
1334+
}
1335+
}
1336+
12971337
/**
12981338
* loc is a location in text1, compute and return the equivalent location in
12991339
* text2.
@@ -1430,31 +1470,8 @@ public int diff_levenshtein(List<Diff> diffs) {
14301470
*/
14311471
public String diff_toDelta(List<Diff> diffs) {
14321472
StringBuilder text = new StringBuilder();
1433-
char lastEnd = 0;
1434-
boolean isFirst = true;
1473+
this.diff_cleanupSplitSurrogates(diffs);
14351474
for (Diff aDiff : diffs) {
1436-
if (aDiff.text.isEmpty()) {
1437-
continue;
1438-
}
1439-
1440-
char thisTop = aDiff.text.charAt(0);
1441-
char thisEnd = aDiff.text.charAt(aDiff.text.length() - 1);
1442-
1443-
if (Character.isHighSurrogate(thisEnd)) {
1444-
lastEnd = thisEnd;
1445-
aDiff.text = aDiff.text.substring(0, aDiff.text.length() - 1);
1446-
}
1447-
1448-
if (! isFirst && Character.isHighSurrogate(lastEnd) && Character.isLowSurrogate(thisTop)) {
1449-
aDiff.text = lastEnd + aDiff.text;
1450-
}
1451-
1452-
isFirst = false;
1453-
1454-
if ( aDiff.text.isEmpty() ) {
1455-
continue;
1456-
}
1457-
14581475
switch (aDiff.operation) {
14591476
case INSERT:
14601477
try {

0 commit comments

Comments
 (0)