@@ -1688,10 +1688,13 @@ public SemanticGraphEdge addEdge(SemanticGraphEdge edge) {
16881688 * dumb, could be made more sophisticated.
16891689 * <br>
16901690 *
1691- * Example: " [ate subj>Bill dobj>[muffins compound>blueberry]]"
1691+ * Example: {@code [ate subj>Bill dobj>[muffins compound>blueberry]]}
16921692 * <br>
16931693 *
16941694 * This is the same format generated by toCompactString().
1695+ * <br>
1696+ * Indices are represented by a dash separated number after the word:
1697+ * {@code [ate-1 subj>Bill-2 ...}
16951698 */
16961699 public static SemanticGraph valueOf (String s , Language language , Integer sentIndex ) {
16971700 return (new SemanticGraphParsingTask (s , language , sentIndex )).parse ();
@@ -1841,7 +1844,7 @@ public SemanticGraph makeSoftCopy() {
18411844
18421845 // ============================================================================
18431846
1844- private static final Pattern WORD_AND_INDEX_PATTERN = Pattern .compile ("([^-]+ )-([0-9]+)" );
1847+ private static final Pattern WORD_AND_INDEX_PATTERN = Pattern .compile ("([^-]* )-([0-9]+)" );
18451848
18461849 /**
18471850 * This nested class is a helper for valueOf(). It represents the task of
@@ -1850,7 +1853,7 @@ public SemanticGraph makeSoftCopy() {
18501853 private static class SemanticGraphParsingTask extends StringParsingTask <SemanticGraph > {
18511854
18521855 private SemanticGraph sg ;
1853- private Set <Integer > indexesUsed = Generics .newHashSet ();
1856+ private Map <Integer , IndexedWord > indexesUsed = Generics .newHashMap ();
18541857 private final Language language ;
18551858 private final Integer sentIndex ;
18561859
@@ -1922,21 +1925,19 @@ private IndexedWord makeVertex(String word) {
19221925 } else {
19231926 index = getNextFreeIndex ();
19241927 }
1925- indexesUsed .add (index );
1926- // Note that, despite the use of indexesUsed and getNextFreeIndex(),
1927- // nothing is actually enforcing that no indexes are used twice. This
1928- // could occur if some words in the string representation being parsed
1929- // come with index markers and some do not.
1928+ if (indexesUsed .containsKey (index )) {
1929+ return indexesUsed .get (index );
1930+ }
19301931 IndexedWord ifl = new IndexedWord (null , sentIndex != null ? sentIndex : 0 , index );
19311932 // log.info("SemanticGraphParsingTask>>> word = " + word);
19321933 // log.info("SemanticGraphParsingTask>>> index = " + index);
1933- // log.info("SemanticGraphParsingTask>>> indexesUsed = " +
1934- // indexesUsed);
1934+ // log.info("SemanticGraphParsingTask>>> indexesUsed = " + indexesUsed);
19351935 String [] wordAndTag = word .split ("/" );
19361936 ifl .set (CoreAnnotations .TextAnnotation .class , wordAndTag [0 ]);
19371937 ifl .set (CoreAnnotations .ValueAnnotation .class , wordAndTag [0 ]);
19381938 if (wordAndTag .length > 1 )
19391939 ifl .set (CoreAnnotations .PartOfSpeechAnnotation .class , wordAndTag [1 ]);
1940+ indexesUsed .put (index , ifl );
19401941 return ifl ;
19411942 }
19421943
@@ -1953,7 +1954,7 @@ private static Pair<String, Integer> readWordAndIndex(String word) {
19531954
19541955 private Integer getNextFreeIndex () {
19551956 int i = 0 ;
1956- while (indexesUsed .contains (i ))
1957+ while (indexesUsed .containsKey (i ))
19571958 i ++;
19581959 return i ;
19591960 }
0 commit comments