@@ -266,7 +266,7 @@ import edu.stanford.nlp.util.logging.Redwood;
266
266
267
267
268
268
/* * Turn on to find out how things were tokenized. */
269
- private static final boolean DEBUG = false ;
269
+ private static final boolean DEBUG = true ;
270
270
271
271
/* * A logger for this class */
272
272
private static final Redwood . RedwoodChannels logger = Redwood . channels(PTBLexer . class);
@@ -571,13 +571,13 @@ import edu.stanford.nlp.util.logging.Redwood;
571
571
SENTEND1 = {SPACENL} ( {SPACENL} |[:uppercase:]| {SGML1} )
572
572
SENTEND2 = {SPACE} ( {SPACE} |[:uppercase:]| {SGML2} )
573
573
574
+ /* Note that JFlex doesn't support {2,} pattern form. Only {j,k}. */
574
575
DATE = {DIGIT} {1,2} [ \- \u2012 \/ ] {DIGIT} {1,2} [ \- \u2012 \/ ] {DIGIT} {2,4}| {DIGIT} {4} [ \- \u2012 \/ ] {DIGIT} {1,2} [ \- \u2012 \/ ] {DIGIT} {1,2}
575
576
/* Note that NUM also includes times like 12:55. One can start with a . or , but not a : */
576
577
NUM = {DIGIT} *( [ .,\u066B\u066C ] {DIGIT} +)+| {DIGIT} +( [ .:,\u00AD\u066B\u066C\u2009\u202F ] {DIGIT} +)*
577
578
LEADING_NUM = {DIGIT} +( [ .,\u066B\u066C ] {DIGIT} +)+
578
579
/* Now don't allow bracketed negative numbers! They have too many uses (e.g.,
579
- years or times in parentheses), and having them in tokens messes up
580
- treebank parsing.
580
+ years or times in parentheses), and having them in tokens messes up treebank parsing.
581
581
NUMBER = [\-+]?{NUM}|\({NUM}\) */
582
582
NUMBER = [ \- \u2212 +] ? {NUM}
583
583
SUBSUPNUM = [ \u207A\u207B\u208A\u208B ] ?( [ \u2070\u00B9\u00B2\u00B3\u2074 - \u2079 ] +| [ \u2080 - \u2089 ] +)
@@ -614,7 +614,7 @@ THING_LETTER = ([dDoOlL]{APOSETCETERA}[\p{Alpha}\p{Digit}])?([\p{Alpha}\p{Digit}
614
614
THINGA = [ A- Z] +(( [ +&] | {SPAMP} ) [ A- Z] +)+
615
615
THING3 = [\p{Alpha}\p{Digit}] +( -[\p{Alpha}] +){0,2}( \\ ? \/ [\p{Alpha}\p{Digit}] +( -[\p{Alpha}] +){0,2}){1,2}
616
616
APOS = [ '\u0092\u2019 ´] | ' /* ASCII straight quote, single right curly quote in CP1252 (wrong) or Unicode or reversed quote or HTML SGML escape */
617
- /* Includes extra ones that may appear inside a word, rightly or wrongly */
617
+ /* Includes extra ones that may appear inside a word, rightly or wrongly: ASCII backquote, CP1252 left curly quote, left curly quote, high upside down left curly quote */
618
618
APOSETCETERA = {APOS} | [ `\u0091\u2018\u201B ]
619
619
/* HTHING recognizes hyphenated words, including ones with various kinds of numbers in them. And with underscores. */
620
620
HTHING = [\p{Alpha}\p{Digit}][\p{Alpha}\p{Digit} .,\u00AD\u200C\u200D\u2060 ] *( [- _] ( [\p{Alpha}\p{Digit} \u00AD\u200C\u200D\u2060 ] +( \. {DIGIT} +)?| {ACRO2} \. ))+
@@ -639,21 +639,24 @@ HTHINGEXCEPTIONPREFIXED = (e|a|u|x|agro|ante|anti|arch|be|bi|bio|co|counter|cros
639
639
HTHINGEXCEPTIONSUFFIXED = ( [\p{Alpha}\p{Digit}][\p{Alpha}\p{Digit} .,\u00AD ] *)( -)( esque| ette| fest| fold| gate| itis| less| most| o-torium| rama| wise)( s| es| d| ed)?
640
640
HTHINGEXCEPTIONWHOLE = ( mm-hm| mm-mm| o-kay| uh-huh| uh-oh)( s| es| d| ed)?
641
641
642
- /* things like 'll and 'm */
643
- REDAUX = {APOSETCETERA} ( [ msdMSD ] | re| ve| ll)
642
+ /* things like 'll and 'm and 'em for them */
643
+ REDAUX = {APOSETCETERA} ( m | s | d | re| ve| ll| em )
644
644
/* For things that will have n't on the end. They can't end in 'n' */
645
645
/* \u00AD is soft hyphen. \u2060 is word joiner */
646
- SWORD = [\p{Alpha} \u00AD\u200C\u200D\u2060 ] * [ A- MO- Za- mo- z][ \u00AD\u200C\u200D\u2060 ] *
647
- SREDAUX = n{APOSETCETERA} t
648
- /* Tokens you want but already okay: C'mon 'n' '[2-9]0s '[eE]m 'till?
649
- [Yy]'all 'Cause Shi'ite B'Gosh o'clock. Here now only need apostrophe
650
- final words. */
651
- /* Note that Jflex doesn't support {2,} form. Only {2,k}. */
652
- /* [yY]' is for Y'know, y'all and I for I. So exclude from one letter first */
653
- /* Rest are for French borrowings. n allows n'ts in "don'ts" */
654
- /* Arguably, c'mon should be split to "c'm" + "on", but not yet. 'Twixt for betwixt */
655
- APOWORD = {APOS} n{APOS} ?| [ lLdDjJ] {APOS} |( Dunkin| somethin| ol) {APOS} | {APOS} em| diff{APOSETCETERA} rent| [ A- HJ- XZn] {APOSETCETERA} [:letter:]{2}[:letter:]*| {APOS} [ 1- 9] 0s| [ 1- 9] 0{APOS} s| {APOS} till?|[:letter:][:letter:]* [ aáeiouhlpyAEIOUY] {APOSETCETERA} [ aeiíoulA- Z] [:letter:]*| {APOS} cause| cont{APOSETCETERA} d\. ?| nor{APOSETCETERA} easter| c{APOSETCETERA} mon| e{APOSETCETERA} er| s{APOSETCETERA} mores| ev{APOSETCETERA} ry| li{APOSETCETERA} l| nat{APOSETCETERA} l| ass{APOSETCETERA} t| 'twixt| O{APOSETCETERA} o
656
- APOWORD2 = y{APOS}
646
+ WORD_NOT = [\p{Alpha} \u00AD\u200C\u200D\u2060 ] * [ A- MO- Za- mo- z][ \u00AD\u200C\u200D\u2060 ] *
647
+ REDAUX_NOT = n{APOSETCETERA} ts?
648
+
649
+ /* 2022 tokenizer change. We generally allow apostrophes (including curly ones) into words. This is much better for
650
+ * Hebrew, Arabic, Star Trek and some Black American names, etc. We only separate off word forms with apostrophes
651
+ * that are known common word shortenings or clitics.
652
+ */
653
+ /* Tokens you want: 'n' '[2-9]0s '[eE]m 'till? 'Cause Shi'ite B'Gosh o'clock 'Twixt
654
+ Here now only need apostrophe initial or final words listed. */
655
+ /* Single letters are for French borrowings. */
656
+ /* Arguably, c'mon should be split to "c'm" + "on", but not yet. */
657
+ APOWORD = {WORD} ( {APOSETCETERA}{WORD} )+|\p{Script=Latin} {APOSETCETERA} [ A- Z] \. ( [ A- Z] \. )+| {APOS} n{APOS} ?|( [ lLdDjJ] | Dunkin| somethin| ol) {APOS} | {APOS} ( em| till?| cause| twixt| [ 1- 9] 0s)| [ 1- 9] 0{APOS} s
658
+ /* APOWORD2 is things we will strip at beginning of word: th' shortening "the" (Th'enchanting) and y' shortening "you" (y'know, y'all) */
659
+ APOWORD2 = ( th| y) {APOS}
657
660
/* Some Wired URLs end in + or = so omit that too. Some quoting with '[' and ']' so disallow. */
658
661
FULLURL = ( ftp| svn| svn\+ ssh| http| https| mailto) :\/\/ [^ \t\n\f\r <>|`\p{OpenPunctuation}\p{InitialPunctuation}\p{ClosePunctuation}\p{FinalPunctuation}] + [^ \t\n\f\r <>|.!?¡¿,·;:&`\"\'\* \p{OpenPunctuation}\p{InitialPunctuation}\p{ClosePunctuation}\p{FinalPunctuation}-]
659
662
LIKELYURL = (( www\. ( [^ \t\n\f\r `<>|.!?,\p{OpenPunctuation}\p{InitialPunctuation}\p{ClosePunctuation}\p{FinalPunctuation}] + \. )+ [ a- zA- Z] {2,4})|(( [^ \t\n\f\r `<>|.!?,:\/ $\p{OpenPunctuation}\p{InitialPunctuation}\p{ClosePunctuation}\p{FinalPunctuation}] + \. )+( com| net| org| edu)))( \/ [^ \t\n\f\r `<>|] + [^ \t\n\f\r `<>|.!?,;:&\p{OpenPunctuation}\p{InitialPunctuation}\p{ClosePunctuation}\p{FinalPunctuation}-] )?
@@ -769,26 +772,34 @@ INSENTP = [,;:\u3001\u0F0D]
769
772
QUOTES = {APOS} | [ `\u2018 - \u201F\u0082\u0084\u0091 - \u0094\u2039\u203A\u00AB\u00BB ] {1,2}
770
773
DBLQUOT = \" | "| [ `'\u0091\u0092\u2018\u2019 ] '
771
774
/* Cap'n for captain, c'est for french */
772
- TBSPEC = -( RRB| LRB| RCB| LCB| RSB| LSB) -| C\. D\. s| pro-| anti-| S( &| &) P-500| S( &| &) Ls| Cap {APOS} n | c {APOS} est
775
+ TBSPEC = -( RRB| LRB| RCB| LCB| RSB| LSB) -| C\. D\. s| pro-| anti-| S( &| &) P-500| S( &| &) Ls
773
776
SWEARING = f[- *][- c*] k( in[ g'] ?| e[ dr] )?| f[- *] ( in[ g'] ?| e[ dr] )|( bull| dip)? s[ h@][- \* #] t( ty| e| box| s)?| c[- *] nts?| p[- *] ss( e[ sd] | ing)?| c[- *] ck| b[- *] tch| t[- *] ts| tw[- *] ts?| cr[- *] p| d[- *] cks?| b[- *][- *s] t[- *] rds?| pr[- *] ck| d[- *] mn| bl[- *] {2,2} dy
774
777
TBSPEC2 = {APOS} [ 0- 9][ 0- 9]
775
778
BANGWORDS = ( E| Yahoo| Jeopardy) \!
776
779
BANGMAGAZINES = OK\!
777
780
781
+ /* Allows covid-19 variants and other similar things. Must filter out first p.500, No.17, etc. */
782
+ CAP_NUM_REST = [ 0- 9] +( \. [ 0- 9] +)* [ A- Za- z] *
783
+ CAP_NUM = [ A- Z] + \. ( A-Z]+ \. )? {CAP_NUM_REST}
784
+
778
785
/* Smileys (based on Chris Potts' sentiment tutorial, but much more restricted set - e.g., no "8)", "do:" or "):", too ambiguous) and simple Asian smileys */
779
786
SMILEY = [ <>] ? [ :;=][ \- o\* '] ? [ \(\) DPdpO\\ {@\|\[\] ]
780
787
ASIANSMILEY = [ \^ x=~<>] \.\[\^ x =~ <>]| [ \-\^ x=~<>'] _[ \-\^ x=~<>'] | \( [ \-\^ x=~<>'][ _.] ? [ \-\^ x=~<>'] \) | \( [ \^ x=~<>'] -[ \^ x=~<>'`] \) | ¯\\ _\( ツ\) _\/ ¯
781
788
782
-
783
789
/* U+2200-U+2BFF has a lot of the various mathematical, etc. symbol ranges */
784
790
/* \uFF65 is Halfwidth katakana middle dot; \u30FB is Katakana middle dot */
785
791
/* Math and other symbols that stand alone: °²× ∀; \u33A1 is m^2 in one char! */
786
792
/* Tibetan tsheg or tsek (U+0F0B) goes between syllables; words aren't space separated, so it may be a word or syllable marker; it indicates a possible line-break point. Treat as separate symbol. */
787
793
MISCSYMBOL = [ +%&~\^ |\\ ¦\u00A7 ¨\u00A9\u00AC\u00AE ¯\u00B0 - \u00B3\u00B4 - \u00BA\u00D7\u00F7\u0387\u05BE\u05C0\u05C3\u05C6\u05F3\u05F4\u0600 - \u0603\u0606 - \u060A\u060C\u0614\u061B\u061E\u066A\u066D\u0703 - \u070D\u07F6\u07F7\u07F8\u0964\u0965\u0E4F\u0F0B\u1FBD\u2016\u2017\u2020 - \u2025\u2030 - \u2038\u203B\u203C\u2043\u203E - \u2042\u2044\u2053\u207A - \u207F\u208A - \u208E\u2100 - \u214F\u2190 - \u21FF\u2200 - \u2BFF\u3001 - \u3006\u3008 - \u3020\u30FB\u33A1\uFF01 - \uFF0F\uFF1A - \uFF20\uFF3B - \uFF40\uFF5B - \uFF65\uFF65 ]
788
794
789
795
PROG_LANGS = c[ +][ +] |( c| f) #
796
+
797
+ ONECHAR_APOS = [ '\u0092\u2019 ´`\u0091\u2018\u201B ]
798
+ /* Assimilations5 leave 5 chars behind after division */
799
+ ASSIMILATIONS5 = {ONECHAR_APOS} tain{ONECHAR_APOS} t| t{ONECHAR_APOS} ain{ONECHAR_APOS} t
790
800
/* Assimilations3 leave 3 chars behind after division */
791
801
ASSIMILATIONS3 = cannot| 'twas| dunno| [ '’] d[ '’] ve
802
+ /* Assimilations2 leave 2 chars behind after division */
792
803
/* "nno" is a remnant after pushing back from dunno in ASSIMILATIONS3 */
793
804
/* Include splitting some apostrophe-less negations, but not ones like "wont" that are also words. */
794
805
ASSIMILATIONS2 = {APOS} tis| gonna| gotta| lemme| gimme| wanna| nno| aint| dont| doesnt| didnt| theyre
@@ -806,6 +817,14 @@ CP1252_MISC_SYMBOL = [\u0086\u0087\u0089\u0095\u0098\u0099]
806
817
if (DEBUG ) { logger. info(" Used {PROG_LANGS} to recognize " + tok); }
807
818
return getNext(tok, tok);
808
819
}
820
+ {ASSIMILATIONS5} { if (splitAssimilations) {
821
+ yypushback(5 );
822
+ }
823
+ String tok = yytext();
824
+ if (DEBUG ) { logger. info(" Used {ASSIMILATIONS5} to recognize " + tok +
825
+ " ; splitAssimilations=" + splitAssimilations); }
826
+ return getNext(tok, tok);
827
+ }
809
828
{ASSIMILATIONS3} { if (splitAssimilations) {
810
829
yypushback(3 );
811
830
}
@@ -860,6 +879,29 @@ CP1252_MISC_SYMBOL = [\u0086\u0087\u0089\u0095\u0098\u0099]
860
879
if (DEBUG ) { logger. info(" Used {SPPUNC} to recognize " + tok); }
861
880
return getNext(tok, tok);
862
881
}
882
+
883
+ /* Allow for two {REDAUX} like I'd've or they'd've */
884
+ {WORD} / {REDAUX}{REDAUX} { final String origTxt = yytext();
885
+ String tok = LexerUtils . removeSoftHyphens(origTxt);
886
+ if (americanize) {
887
+ tok = Americanize . americanize(tok);
888
+ }
889
+ if (DEBUG ) { logger. info(" Used {WORD} (4) to recognize " + origTxt + " as " + tok); }
890
+ return getNext(tok, origTxt);
891
+ }
892
+ {APOWORD} / {REDAUX}{REDAUX} { String tok = yytext();
893
+ String norm = LexerUtils . handleQuotes(tok, false , quoteStyle);
894
+ if (DEBUG ) { logger. info(" Used {APOWORD} (2) to recognize " + tok + " as " + norm +
895
+ " ; quoteStyle=" + quoteStyle + " ; probablyLeft=" + false ); }
896
+ return getNext(norm, tok);
897
+ }
898
+ {WORD_NOT} / {REDAUX_NOT}{REDAUX} { final String origTxt = yytext();
899
+ String tok = LexerUtils . removeSoftHyphens(origTxt);
900
+ if (DEBUG ) { logger. info(" Used {WORD_NOT} (2) to recognize " + origTxt + " as " + tok); }
901
+ return getNext(tok, origTxt);
902
+ }
903
+
904
+
863
905
{WORD} / {REDAUX} { final String origTxt = yytext();
864
906
String tok = LexerUtils . removeSoftHyphens(origTxt);
865
907
if (americanize) {
@@ -868,11 +910,21 @@ CP1252_MISC_SYMBOL = [\u0086\u0087\u0089\u0095\u0098\u0099]
868
910
if (DEBUG ) { logger. info(" Used {WORD} to recognize " + origTxt + " as " + tok); }
869
911
return getNext(tok, origTxt);
870
912
}
871
- {SWORD } / {SREDAUX} { final String origTxt = yytext();
913
+ {WORD_NOT } / {REDAUX_NOT} { final String origTxt = yytext();
872
914
String tok = LexerUtils . removeSoftHyphens(origTxt);
873
- if (DEBUG ) { logger. info(" Used {SWORD } to recognize " + origTxt + " as " + tok); }
915
+ if (DEBUG ) { logger. info(" Used {WORD_NOT } to recognize " + origTxt + " as " + tok); }
874
916
return getNext(tok, origTxt);
875
917
}
918
+ {APOWORD} / {REDAUX} { String tok = yytext();
919
+ String norm = LexerUtils . handleQuotes(tok, false , quoteStyle);
920
+ if (DEBUG ) { logger. info(" Used {APOWORD} (2) to recognize " + tok + " as " + norm +
921
+ " ; quoteStyle=" + quoteStyle + " ; probablyLeft=" + false ); }
922
+ return getNext(norm, tok);
923
+ }
924
+ {APOWORD2} / {WORD} { String txt = yytext();
925
+ if (DEBUG ) { logger. info(" Used {APOWORD2} to recognize " + txt); }
926
+ return getNext(txt, txt);
927
+ }
876
928
{DIGIT} +/ {SEP_SUFFIX} { String txt = yytext();
877
929
if (DEBUG ) { logger. info(" Used {DIGIT}/{SEP_SUFFIX} to recognize " + txt); }
878
930
return getNext(txt, txt);
@@ -897,14 +949,11 @@ CP1252_MISC_SYMBOL = [\u0086\u0087\u0089\u0095\u0098\u0099]
897
949
}
898
950
{APOWORD} { String tok = yytext();
899
951
String norm = LexerUtils . handleQuotes(tok, false , quoteStyle);
952
+ norm = LexerUtils . removeSoftHyphens(norm);
900
953
if (DEBUG ) { logger. info(" Used {APOWORD} to recognize " + tok + " as " + norm +
901
- " ; probablyLeft=" + false ); }
954
+ " ; quoteStyle= " + quoteStyle + " ; probablyLeft=" + false ); }
902
955
return getNext(norm, tok);
903
956
}
904
- {APOWORD2} /[:letter:] { String txt = yytext();
905
- if (DEBUG ) { logger. info(" Used {APOWORD2} to recognize " + txt); }
906
- return getNext(txt, txt);
907
- }
908
957
{FULLURL} { String txt = yytext();
909
958
String norm = txt;
910
959
if (escapeForwardSlashAsterisk) {
@@ -934,13 +983,13 @@ CP1252_MISC_SYMBOL = [\u0086\u0087\u0089\u0095\u0098\u0099]
934
983
{REDAUX} / [^\p{Latin} '’] { String tok = yytext();
935
984
String norm = LexerUtils . handleQuotes(tok, false , quoteStyle);
936
985
if (DEBUG ) { logger. info(" Used {REDAUX} to recognize " + tok + " as " + norm +
937
- " ; probablyLeft=" + false ); }
986
+ " ; quoteStyle= " + quoteStyle + " ; probablyLeft=" + false ); }
938
987
return getNext(norm, tok);
939
988
}
940
- {SREDAUX } / [^\p{Latin} '’] { String tok = yytext();
989
+ {REDAUX_NOT } / [^\p{Latin} '’] { String tok = yytext();
941
990
String norm = LexerUtils . handleQuotes(tok, false , quoteStyle);
942
- if (DEBUG ) { logger. info(" Used {SREDAUX } to recognize " + tok + " as " + norm +
943
- " ; probablyLeft =" + false ); }
991
+ if (DEBUG ) { logger. info(" Used {REDAUX_NOT } to recognize " + tok + " as " + norm +
992
+ " ; quoteStyle =" + quoteStyle ); }
944
993
return getNext(norm, tok);
945
994
}
946
995
{DATE} { String origTxt = yytext();
@@ -1175,7 +1224,7 @@ CP1252_MISC_SYMBOL = [\u0086\u0087\u0089\u0095\u0098\u0099]
1175
1224
}
1176
1225
{DBLQUOT} { String tok = yytext();
1177
1226
String norm = LexerUtils . handleQuotes(tok, false , quoteStyle);
1178
- if (DEBUG ) { logger. info(" Used {SREDAUX } to recognize " + tok + " as " + norm +
1227
+ if (DEBUG ) { logger. info(" Used {DBLQUOT } to recognize " + tok + " as " + norm +
1179
1228
" ; probablyLeft=" + false ); }
1180
1229
return getNext(norm, tok);
1181
1230
}
@@ -1185,6 +1234,18 @@ CP1252_MISC_SYMBOL = [\u0086\u0087\u0089\u0095\u0098\u0099]
1185
1234
if (DEBUG ) { logger. info(" Used {SMILEY} to recognize " + origText + " as " + txt); }
1186
1235
return getNext(txt, origText);
1187
1236
}
1237
+
1238
+ /* This rule doesn't seem to fire to block {CAP_NUM} when it could. I have no idea why. Ignoring for now as a rare case. */
1239
+ {ABBREV3} / {CAP_NUM_REST} {
1240
+ String txt = yytext();
1241
+ if (DEBUG ) { logger. info(" Used {ABBREV3} (2) to recognize " + txt); }
1242
+ return getNext(txt, txt);
1243
+ }
1244
+ {CAP_NUM} {
1245
+ String txt = yytext();
1246
+ if (DEBUG ) { logger. info(" Used {CAP_NUM} to recognize " + txt); }
1247
+ return getNext(txt, txt);
1248
+ }
1188
1249
{ASIANSMILEY} { String txt = yytext();
1189
1250
String origText = txt;
1190
1251
txt = LexerUtils . pennNormalizeParens(txt, normalizeParentheses);
@@ -1457,7 +1518,7 @@ CP1252_MISC_SYMBOL = [\u0086\u0087\u0089\u0095\u0098\u0099]
1457
1518
" ; probablyLeft=" + false ); }
1458
1519
return getNext(norm, tok);
1459
1520
}
1460
- /* This QUOTES must proceed (S) REDAUX (2) so it by preference matches straight quote before word.
1521
+ /* This QUOTES must proceed REDAUX (2) so it by preference matches straight quote before word.
1461
1522
Trying to collapse the first two cases seemed to break things (?!?). */
1462
1523
{QUOTES} /[:letter:] {NOT_SPACENL_ONE_CHAR}
1463
1524
{ // Extra context is to not match on ones like 'd but you do want words like "a"
@@ -1485,17 +1546,12 @@ CP1252_MISC_SYMBOL = [\u0086\u0087\u0089\u0095\u0098\u0099]
1485
1546
" ; probablyLeft=" + false ); }
1486
1547
return getNext(norm, tok);
1487
1548
}
1488
- /* These (S) REDAUX (2) cases are needed in case string ends on "it's". See: testJacobEisensteinApostropheCase */
1549
+ /* This REDAUX (2) case is needed in case string ends on "it's". See: testJacobEisensteinApostropheCase */
1489
1550
{REDAUX} { String tok = yytext();
1490
1551
if (DEBUG ) { logger. info(" Used {REDAUX} (2) to recognize " + tok); }
1491
1552
return getNext(tok, tok);
1492
1553
}
1493
- {SREDAUX} { String tok = yytext();
1494
- String norm = LexerUtils . handleQuotes(tok, false , quoteStyle);
1495
- if (DEBUG ) { logger. info(" Used {SREDAUX} (2) to recognize " + tok + " as " + norm +
1496
- " ; probablyLeft=" + false ); }
1497
- return getNext(norm, tok);
1498
- }
1554
+ /* Plain {REDAUX_NOT} is captured by {APOWORD} */
1499
1555
1500
1556
{FAKEDUCKFEET} {
1501
1557
String tok = yytext();
0 commit comments