Skip to content

Commit a606afa

Browse files
committed
Include {} as punct words, as some parsers don't produce LCB/RCB
1 parent 56cd6bb commit a606afa

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

src/edu/stanford/nlp/trees/PennTreebankLanguagePack.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ public PennTreebankLanguagePack() {
2929
private static final String[] collinsPunctTags = {"''", "``", ".", ":", ","};
3030

3131
// new tokenizers should return (), old tokenizers return -LRB- -RRB-. so we anticipate both
32-
private static final String[] pennPunctWords = {"''", "'", "``", "`", "-LRB-", "-RRB-", "(", ")", "-LCB-", "-RCB-", ".", "?", "!", ",", ":", "-", "--", "...", ";"};
32+
private static final String[] pennPunctWords = {"{", "}", "''", "'", "``", "`", "-LRB-", "-RRB-", "(", ")", "-LCB-", "-RCB-", ".", "?", "!", ",", ":", "-", "--", "...", ";"};
3333

3434
private static final String[] pennSFPunctWords = {".", "!", "?"};
3535

0 commit comments

Comments
 (0)