Skip to content

Commit cf97e36

Browse files
committed
Add the ability to reuse indices in SemanticGraph.valueOf This possibly changes the meaning of existing expressions, since it was previously possible to assign multiple words to the same index, but that was a bad feature anyway
1 parent 8eca9c2 commit cf97e36

File tree

2 files changed

+60
-11
lines changed

2 files changed

+60
-11
lines changed

src/edu/stanford/nlp/semgraph/SemanticGraph.java

+12-11
Original file line numberDiff line numberDiff line change
@@ -1688,10 +1688,13 @@ public SemanticGraphEdge addEdge(SemanticGraphEdge edge) {
16881688
* dumb, could be made more sophisticated.
16891689
* <br>
16901690
*
1691-
* Example: "[ate subj>Bill dobj>[muffins compound>blueberry]]"
1691+
* Example: {@code [ate subj>Bill dobj>[muffins compound>blueberry]]}
16921692
* <br>
16931693
*
16941694
* This is the same format generated by toCompactString().
1695+
* <br>
1696+
* Indices are represented by a dash separated number after the word:
1697+
* {@code [ate-1 subj>Bill-2 ...}
16951698
*/
16961699
public static SemanticGraph valueOf(String s, Language language, Integer sentIndex) {
16971700
return (new SemanticGraphParsingTask(s, language, sentIndex)).parse();
@@ -1841,7 +1844,7 @@ public SemanticGraph makeSoftCopy() {
18411844

18421845
// ============================================================================
18431846

1844-
private static final Pattern WORD_AND_INDEX_PATTERN = Pattern.compile("([^-]+)-([0-9]+)");
1847+
private static final Pattern WORD_AND_INDEX_PATTERN = Pattern.compile("([^-]*)-([0-9]+)");
18451848

18461849
/**
18471850
* This nested class is a helper for valueOf(). It represents the task of
@@ -1850,7 +1853,7 @@ public SemanticGraph makeSoftCopy() {
18501853
private static class SemanticGraphParsingTask extends StringParsingTask<SemanticGraph> {
18511854

18521855
private SemanticGraph sg;
1853-
private Set<Integer> indexesUsed = Generics.newHashSet();
1856+
private Map<Integer, IndexedWord> indexesUsed = Generics.newHashMap();
18541857
private final Language language;
18551858
private final Integer sentIndex;
18561859

@@ -1922,21 +1925,19 @@ private IndexedWord makeVertex(String word) {
19221925
} else {
19231926
index = getNextFreeIndex();
19241927
}
1925-
indexesUsed.add(index);
1926-
// Note that, despite the use of indexesUsed and getNextFreeIndex(),
1927-
// nothing is actually enforcing that no indexes are used twice. This
1928-
// could occur if some words in the string representation being parsed
1929-
// come with index markers and some do not.
1928+
if (indexesUsed.containsKey(index)) {
1929+
return indexesUsed.get(index);
1930+
}
19301931
IndexedWord ifl = new IndexedWord(null, sentIndex != null ? sentIndex : 0, index);
19311932
// log.info("SemanticGraphParsingTask>>> word = " + word);
19321933
// log.info("SemanticGraphParsingTask>>> index = " + index);
1933-
// log.info("SemanticGraphParsingTask>>> indexesUsed = " +
1934-
// indexesUsed);
1934+
// log.info("SemanticGraphParsingTask>>> indexesUsed = " + indexesUsed);
19351935
String[] wordAndTag = word.split("/");
19361936
ifl.set(CoreAnnotations.TextAnnotation.class, wordAndTag[0]);
19371937
ifl.set(CoreAnnotations.ValueAnnotation.class, wordAndTag[0]);
19381938
if (wordAndTag.length > 1)
19391939
ifl.set(CoreAnnotations.PartOfSpeechAnnotation.class, wordAndTag[1]);
1940+
indexesUsed.put(index, ifl);
19401941
return ifl;
19411942
}
19421943

@@ -1953,7 +1954,7 @@ private static Pair<String, Integer> readWordAndIndex(String word) {
19531954

19541955
private Integer getNextFreeIndex() {
19551956
int i = 0;
1956-
while (indexesUsed.contains(i))
1957+
while (indexesUsed.containsKey(i))
19571958
i++;
19581959
return i;
19591960
}

test/src/edu/stanford/nlp/semgraph/SemanticGraphTest.java

+48
Original file line numberDiff line numberDiff line change
@@ -347,4 +347,52 @@ public void testValueOfIndices() {
347347
assertEquals(sg.getParentsWithReln(E, "obj").size(), 1);
348348
assertEquals(sg.getParentsWithReln(E, "dep").size(), 0);
349349
}
350+
351+
/**
352+
* Test the vertices and edges if we reuse some indices in valueOf
353+
*/
354+
public void testValueOfReuseIndices() {
355+
SemanticGraph sg = SemanticGraph.valueOf("[A/foo-0 obj> B/bar-1 obj> C/foo-2 obj> -2 dep> B/bar-1 nsubj> [D/bar-3 obj> E/baz-4]]");
356+
357+
List<IndexedWord> words = sg.vertexListSorted();
358+
assertEquals(words.size(), 5);
359+
360+
for (int i = 0; i < 5; ++i) {
361+
assertEquals(words.get(i).index(), i);
362+
}
363+
IndexedWord A = words.get(0);
364+
IndexedWord B = words.get(1);
365+
IndexedWord C = words.get(2);
366+
IndexedWord D = words.get(3);
367+
IndexedWord E = words.get(4);
368+
369+
assertEquals(A.word(), "A");
370+
assertEquals(A.tag(), "foo");
371+
assertEquals(B.word(), "B");
372+
assertEquals(B.tag(), "bar");
373+
assertEquals(C.word(), "C");
374+
assertEquals(C.tag(), "foo");
375+
assertEquals(D.word(), "D");
376+
assertEquals(D.tag(), "bar");
377+
assertEquals(E.word(), "E");
378+
assertEquals(E.tag(), "baz");
379+
380+
assertEquals(sg.getAllEdges(A, B).size(), 2);
381+
assertEquals(sg.getParentsWithReln(B, "obj").size(), 1);
382+
assertEquals(sg.getParentsWithReln(B, "dep").size(), 1);
383+
384+
assertEquals(sg.getAllEdges(A, C).size(), 2);
385+
assertEquals(sg.getParentsWithReln(C, "obj").size(), 1);
386+
387+
assertEquals(sg.getAllEdges(A, D).size(), 1);
388+
assertEquals(sg.getParentsWithReln(D, "nsubj").size(), 1);
389+
assertEquals(sg.getParentsWithReln(D, "obj").size(), 0);
390+
assertEquals(sg.getParentsWithReln(D, "dep").size(), 0);
391+
392+
assertEquals(sg.getAllEdges(A, E).size(), 0);
393+
assertEquals(sg.getAllEdges(D, E).size(), 1);
394+
assertEquals(sg.getParentsWithReln(E, "obj").size(), 1);
395+
assertEquals(sg.getParentsWithReln(E, "dep").size(), 0);
396+
}
397+
350398
}

0 commit comments

Comments
 (0)