Skip to content

Commit 74f5667

Browse files
[3.14] gh-134675: Add t-string prefixes to tokenizer module, lexical analysis doc, and add a test to make sure we catch this error in the future. (GH-134734) (#134739)
gh-134675: Add t-string prefixes to tokenizer module, lexical analysis doc, and add a test to make sure we catch this error in the future. (GH-134734) * Add t-string prefixes to _all_string_prefixes, and add a test to make sure we catch this error in the future. * Update lexical analysis docs for t-string prefixes. (cherry picked from commit 08c78e0) Co-authored-by: Eric V. Smith <[email protected]>
1 parent c6cc282 commit 74f5667

File tree

3 files changed

+59
-2
lines changed

3 files changed

+59
-2
lines changed

Doc/reference/lexical_analysis.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -489,8 +489,9 @@ String literals are described by the following lexical definitions:
489489

490490
.. productionlist:: python-grammar
491491
stringliteral: [`stringprefix`](`shortstring` | `longstring`)
492-
stringprefix: "r" | "u" | "R" | "U" | "f" | "F"
492+
stringprefix: "r" | "u" | "R" | "U" | "f" | "F" | "t" | "T"
493493
: | "fr" | "Fr" | "fR" | "FR" | "rf" | "rF" | "Rf" | "RF"
494+
: | "tr" | "Tr" | "tR" | "TR" | "rt" | "rT" | "Rt" | "RT"
494495
shortstring: "'" `shortstringitem`* "'" | '"' `shortstringitem`* '"'
495496
longstring: "'''" `longstringitem`* "'''" | '"""' `longstringitem`* '"""'
496497
shortstringitem: `shortstringchar` | `stringescapeseq`

Lib/test/test_tokenize.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import contextlib
2+
import itertools
23
import os
34
import re
5+
import string
46
import tempfile
57
import token
68
import tokenize
@@ -3238,5 +3240,59 @@ def test_exact_flag(self):
32383240
self.check_output(source, expect, flag)
32393241

32403242

3243+
class StringPrefixTest(unittest.TestCase):
3244+
def test_prefixes(self):
3245+
# Get the list of defined string prefixes. I don't see an
3246+
# obvious documented way of doing this, but probably the best
3247+
# thing is to split apart tokenize.StringPrefix.
3248+
3249+
# Make sure StringPrefix begins and ends in parens.
3250+
self.assertEqual(tokenize.StringPrefix[0], '(')
3251+
self.assertEqual(tokenize.StringPrefix[-1], ')')
3252+
3253+
# Then split apart everything else by '|'.
3254+
defined_prefixes = set(tokenize.StringPrefix[1:-1].split('|'))
3255+
3256+
# Now compute the actual string prefixes, by exec-ing all
3257+
# valid prefix combinations, followed by an empty string.
3258+
3259+
# Try all prefix lengths until we find a length that has zero
3260+
# valid prefixes. This will miss the case where for example
3261+
# there are no valid 3 character prefixes, but there are valid
3262+
# 4 character prefixes. That seems extremely unlikely.
3263+
3264+
# Note that the empty prefix is being included, because length
3265+
# starts at 0. That's expected, since StringPrefix includes
3266+
# the empty prefix.
3267+
3268+
valid_prefixes = set()
3269+
for length in itertools.count():
3270+
num_at_this_length = 0
3271+
for prefix in (
3272+
"".join(l) for l in list(itertools.combinations(string.ascii_lowercase, length))
3273+
):
3274+
for t in itertools.permutations(prefix):
3275+
for u in itertools.product(*[(c, c.upper()) for c in t]):
3276+
p = ''.join(u)
3277+
if p == "not":
3278+
# 'not' can never be a string prefix,
3279+
# because it's a valid expression: not ""
3280+
continue
3281+
try:
3282+
eval(f'{p}""')
3283+
3284+
# No syntax error, so p is a valid string
3285+
# prefix.
3286+
3287+
valid_prefixes.add(p)
3288+
num_at_this_length += 1
3289+
except SyntaxError:
3290+
pass
3291+
if num_at_this_length == 0:
3292+
break
3293+
3294+
self.assertEqual(defined_prefixes, valid_prefixes)
3295+
3296+
32413297
if __name__ == "__main__":
32423298
unittest.main()

Lib/tokenize.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ def _all_string_prefixes():
8686
# The valid string prefixes. Only contain the lower case versions,
8787
# and don't contain any permutations (include 'fr', but not
8888
# 'rf'). The various permutations will be generated.
89-
_valid_string_prefixes = ['b', 'r', 'u', 'f', 'br', 'fr']
89+
_valid_string_prefixes = ['b', 'r', 'u', 'f', 't', 'br', 'fr', 'tr']
9090
# if we add binary f-strings, add: ['fb', 'fbr']
9191
result = {''}
9292
for prefix in _valid_string_prefixes:

0 commit comments

Comments
 (0)