Skip to content

Commit 785620d

Browse files
committed
Oops, the default toString() being used in the Ssurgeon CombineMWT operation was capturing the tag as well
1 parent 010a955 commit 785620d

File tree

2 files changed

+54
-1
lines changed

2 files changed

+54
-1
lines changed

src/edu/stanford/nlp/semgraph/semgrex/ssurgeon/CombineMWT.java

+5-1
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,11 @@ public boolean evaluate(SemanticGraph sg, SemgrexMatcher sm) {
7777

7878
String newWord = this.word;
7979
if (newWord == null || newWord.equals("")) {
80-
newWord = StringUtils.join(nodes, "");
80+
StringBuilder newWordBuilder = new StringBuilder();
81+
for (IndexedWord node : nodes) {
82+
newWordBuilder.append(node.word());
83+
}
84+
newWord = newWordBuilder.toString();
8185
}
8286

8387
boolean changed = false;

test/src/edu/stanford/nlp/semgraph/semgrex/ssurgeon/SsurgeonTest.java

+49
Original file line numberDiff line numberDiff line change
@@ -1525,6 +1525,55 @@ public void readXMLCombineMWT() {
15251525
assertNull(vertex.get(CoreAnnotations.IsFirstWordOfMWTAnnotation.class));
15261526
assertNull(vertex.get(CoreAnnotations.MWTTokenTextAnnotation.class));
15271527
}
1528+
1529+
1530+
// This time, we put tags on the words... check that a bug in the
1531+
// initial implementation is fixed
1532+
mwt = String.join(newline,
1533+
"<ssurgeon-pattern-list>",
1534+
" <ssurgeon-pattern>",
1535+
" <uid>38</uid>",
1536+
" <notes>Edit a node's MWT</notes>",
1537+
" <semgrex>" + XMLUtils.escapeXML("{word:/[iI]t/}=it . {word:/'s/}=s") + "</semgrex>",
1538+
" <edit-list>CombineMWT -node it -node s</edit-list>",
1539+
" </ssurgeon-pattern>",
1540+
"</ssurgeon-pattern-list>");
1541+
patterns = inst.readFromString(mwt);
1542+
assertEquals(patterns.size(), 1);
1543+
editSsurgeon = patterns.get(0);
1544+
1545+
sg = SemanticGraph.valueOf("[yours-4 nsubj> it/PRP-1 cop> 's/VBZ-2 advmod> yours-3 punct> !-5]");
1546+
1547+
// check the original values
1548+
itVertex = sg.getNodeByIndexSafe(1);
1549+
assertEquals(null, itVertex.get(CoreAnnotations.IsMultiWordTokenAnnotation.class));
1550+
assertEquals(null, itVertex.get(CoreAnnotations.IsFirstWordOfMWTAnnotation.class));
1551+
assertEquals(null, itVertex.get(CoreAnnotations.MWTTokenTextAnnotation.class));
1552+
sVertex = sg.getNodeByIndexSafe(2);
1553+
assertEquals(null, sVertex.get(CoreAnnotations.IsMultiWordTokenAnnotation.class));
1554+
assertEquals(null, sVertex.get(CoreAnnotations.IsFirstWordOfMWTAnnotation.class));
1555+
assertEquals(null, sVertex.get(CoreAnnotations.MWTTokenTextAnnotation.class));
1556+
1557+
newSG = editSsurgeon.iterate(sg).first;
1558+
// the high level graph structure won't change
1559+
expected = SemanticGraph.valueOf("[yours-4 nsubj> it-1 cop> 's-2 advmod> yours-3 punct> !-5]");
1560+
assertEquals(expected, newSG);
1561+
1562+
// check the updates
1563+
itVertex = newSG.getNodeByIndexSafe(1);
1564+
assertTrue(itVertex.get(CoreAnnotations.IsMultiWordTokenAnnotation.class));
1565+
assertTrue(itVertex.get(CoreAnnotations.IsFirstWordOfMWTAnnotation.class));
1566+
assertEquals("it's", itVertex.get(CoreAnnotations.MWTTokenTextAnnotation.class));
1567+
sVertex = newSG.getNodeByIndexSafe(2);
1568+
assertTrue(sVertex.get(CoreAnnotations.IsMultiWordTokenAnnotation.class));
1569+
assertFalse(sVertex.get(CoreAnnotations.IsFirstWordOfMWTAnnotation.class));
1570+
assertEquals("it's", sVertex.get(CoreAnnotations.MWTTokenTextAnnotation.class));
1571+
for (int i = 3; i <= 5; ++i) {
1572+
IndexedWord vertex = newSG.getNodeByIndexSafe(i);
1573+
assertNull(vertex.get(CoreAnnotations.IsMultiWordTokenAnnotation.class));
1574+
assertNull(vertex.get(CoreAnnotations.IsFirstWordOfMWTAnnotation.class));
1575+
assertNull(vertex.get(CoreAnnotations.MWTTokenTextAnnotation.class));
1576+
}
15281577
}
15291578

15301579

0 commit comments

Comments
 (0)