@@ -893,11 +893,13 @@ pub(crate) enum Ignore {
893
893
/// ```eBNF
894
894
/// lang-string = *(token-list / delimited-attribute-list / comment)
895
895
///
896
- /// bareword = CHAR *(CHAR)
896
+ /// bareword = LEADINGCHAR *(CHAR)
897
+ /// bareword-without-leading-char = CHAR *(CHAR)
897
898
/// quoted-string = QUOTE *(NONQUOTE) QUOTE
898
899
/// token = bareword / quoted-string
900
+ /// token-without-leading-char = bareword-without-leading-char / quoted-string
899
901
/// sep = COMMA/WS *(COMMA/WS)
900
- /// attribute = (DOT token)/(token EQUAL token)
902
+ /// attribute = (DOT token)/(token EQUAL token-without-leading-char )
901
903
/// attribute-list = [sep] attribute *(sep attribute) [sep]
902
904
/// delimited-attribute-list = OPEN-CURLY-BRACKET attribute-list CLOSE-CURLY-BRACKET
903
905
/// token-list = [sep] token *(sep token) [sep]
@@ -907,8 +909,15 @@ pub(crate) enum Ignore {
907
909
/// CLOSE_PARENT = ")"
908
910
/// OPEN-CURLY-BRACKET = "{"
909
911
/// CLOSE-CURLY-BRACKET = "}"
910
- /// CHAR = ALPHA / DIGIT / "_" / "-" / ":"
911
- /// QUOTE = %x22
912
+ /// LEADINGCHAR = ALPHA | DIGIT | "_" | "-" | ":"
913
+ /// ; All ASCII punctuation except comma, quote, equals, backslash, grave (backquote) and braces.
914
+ /// ; Comma is used to separate language tokens, so it can't be used in one.
915
+ /// ; Quote is used to allow otherwise-disallowed characters in language tokens.
916
+ /// ; Equals is used to make key=value pairs in attribute blocks.
917
+ /// ; Backslash and grave are special Markdown characters.
918
+ /// ; Braces are used to start an attribute block.
919
+ /// CHAR = ALPHA | DIGIT | "_" | "-" | ":" | "." | "!" | "#" | "$" | "%" | "&" | "*" | "+" | "/" |
920
+ /// ";" | "<" | ">" | "?" | "@" | "^" | "|" | "~"
912
921
/// NONQUOTE = %x09 / %x20 / %x21 / %x23-7E ; TAB / SPACE / all printable characters except `"`
913
922
/// COMMA = ","
914
923
/// DOT = "."
@@ -932,9 +941,12 @@ pub(crate) enum LangStringToken<'a> {
932
941
KeyValueAttribute ( & ' a str , & ' a str ) ,
933
942
}
934
943
935
- fn is_bareword_char ( c : char ) -> bool {
944
+ fn is_leading_char ( c : char ) -> bool {
936
945
c == '_' || c == '-' || c == ':' || c. is_ascii_alphabetic ( ) || c. is_ascii_digit ( )
937
946
}
947
+ fn is_bareword_char ( c : char ) -> bool {
948
+ is_leading_char ( c) || ".!#$%&*+/;<>?@^|~" . contains ( c)
949
+ }
938
950
fn is_separator ( c : char ) -> bool {
939
951
c == ' ' || c == ',' || c == '\t'
940
952
}
@@ -1077,7 +1089,7 @@ impl<'a, 'tcx> TagIterator<'a, 'tcx> {
1077
1089
return self . next ( ) ;
1078
1090
} else if c == '.' {
1079
1091
return self . parse_class ( pos) ;
1080
- } else if c == '"' || is_bareword_char ( c) {
1092
+ } else if c == '"' || is_leading_char ( c) {
1081
1093
return self . parse_key_value ( c, pos) ;
1082
1094
} else {
1083
1095
self . emit_error ( format ! ( "unexpected character `{c}`" ) ) ;
@@ -1107,16 +1119,18 @@ impl<'a, 'tcx> TagIterator<'a, 'tcx> {
1107
1119
return None ;
1108
1120
}
1109
1121
let indices = self . parse_string ( pos) ?;
1110
- if let Some ( ( _, c) ) = self . inner . peek ( ) . copied ( ) && c != '{' && !is_separator ( c) && c != '(' {
1122
+ if let Some ( ( _, c) ) = self . inner . peek ( ) . copied ( ) &&
1123
+ c != '{' &&
1124
+ !is_separator ( c) &&
1125
+ c != '('
1126
+ {
1111
1127
self . emit_error ( format ! ( "expected ` `, `{{` or `,` after `\" `, found `{c}`" ) ) ;
1112
1128
return None ;
1113
1129
}
1114
1130
return Some ( LangStringToken :: LangToken ( & self . data [ indices. start ..indices. end ] ) ) ;
1115
1131
} else if c == '{' {
1116
1132
self . is_in_attribute_block = true ;
1117
1133
return self . next ( ) ;
1118
- } else if is_bareword_char ( c) {
1119
- continue ;
1120
1134
} else if is_separator ( c) {
1121
1135
if pos != start {
1122
1136
return Some ( LangStringToken :: LangToken ( & self . data [ start..pos] ) ) ;
@@ -1130,6 +1144,10 @@ impl<'a, 'tcx> TagIterator<'a, 'tcx> {
1130
1144
return Some ( LangStringToken :: LangToken ( & self . data [ start..pos] ) ) ;
1131
1145
}
1132
1146
return self . next ( ) ;
1147
+ } else if pos == start && is_leading_char ( c) {
1148
+ continue ;
1149
+ } else if pos != start && is_bareword_char ( c) {
1150
+ continue ;
1133
1151
} else {
1134
1152
self . emit_error ( format ! ( "unexpected character `{c}`" ) ) ;
1135
1153
return None ;
0 commit comments