File tree Expand file tree Collapse file tree 1 file changed +8
-1
lines changed
src/edu/stanford/nlp/pipeline Expand file tree Collapse file tree 1 file changed +8
-1
lines changed Original file line number Diff line number Diff line change @@ -356,8 +356,15 @@ private static void setTokenBeginTokenEnd(List<CoreLabel> tokensList) {
356356 */
357357 private static void setNewlineStatus (List <CoreLabel > tokensList ) {
358358 // label newlines
359+ // TODO: could look to see if the original text was exactly *NL*,
360+ // in which case we don't want to do this. Could even check that
361+ // length == 4 as an optimization. This will involve checking
362+ // the sentence splitter to make sure all comparisons to
363+ // NEWLINE_TOKEN respect isNewlineAnnotation
364+ // What didn't work was checking if length was 1, since that
365+ // runs afoul of two character Windows newlines...
359366 for (CoreLabel token : tokensList ) {
360- if (token .word ().equals (AbstractTokenizer .NEWLINE_TOKEN ) && ( token . endPosition () - token . beginPosition () == 1 ) )
367+ if (token .word ().equals (AbstractTokenizer .NEWLINE_TOKEN ))
361368 token .set (CoreAnnotations .IsNewlineAnnotation .class , true );
362369 else
363370 token .set (CoreAnnotations .IsNewlineAnnotation .class , false );
You can’t perform that action at this time.
0 commit comments