Skip to content

Commit ad8d62c

Browse files
committed
Optimize performance of character class predicates
Note that because letters are contiguous blocks in ASCII, we can compare with lower and upper boundaries, which is more efficient than checking containment in a set or string.
1 parent 9f35a7a commit ad8d62c

File tree

5 files changed

+149
-23
lines changed

5 files changed

+149
-23
lines changed

.coveragerc

+2-1
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,10 @@ source = src
44
omit =
55
*/conftest.py
66
*/test_*_fuzz.py
7+
*/assert_valid_name.py
78
*/cached_property.py
9+
*/character_classes.py
810
*/is_iterable.py
9-
*/assert_valid_name.py
1011

1112
[report]
1213
exclude_lines =

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ a query language for APIs created by Facebook.
1414

1515
The current version 3.1.6 of GraphQL-core is up-to-date with GraphQL.js version 15.5.1.
1616

17-
An extensive test suite with nearly 2300 unit tests and 100% coverage comprises a
17+
An extensive test suite with over 2300 unit tests and 100% coverage comprises a
1818
replication of the complete test suite of GraphQL.js, making sure this port is
1919
reliable and compatible with GraphQL.js.
2020

+55-20
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,68 @@
11
__all__ = ["is_digit", "is_letter", "is_name_start", "is_name_continue"]
22

3+
try:
4+
"string".isascii()
5+
except AttributeError: # Python < 3.7
36

4-
def is_digit(char: str) -> bool:
5-
"""Check whether char is a digit
7+
def is_digit(char: str) -> bool:
8+
"""Check whether char is a digit
69
7-
For internal use by the lexer only.
8-
"""
9-
return "0" <= char <= "9"
10+
For internal use by the lexer only.
11+
"""
12+
return "0" <= char <= "9"
1013

14+
def is_letter(char: str) -> bool:
15+
"""Check whether char is a plain ASCII letter
1116
12-
def is_letter(char: str) -> bool:
13-
"""Check whether char is a plain ASCII letter
17+
For internal use by the lexer only.
18+
"""
19+
return "a" <= char <= "z" or "A" <= char <= "Z"
1420

15-
For internal use by the lexer only.
16-
"""
17-
return "A" <= char <= "Z" or "a" <= char <= "z"
21+
def is_name_start(char: str) -> bool:
22+
"""Check whether char is allowed at the beginning of a GraphQL name
1823
24+
For internal use by the lexer only.
25+
"""
26+
return "a" <= char <= "z" or "A" <= char <= "Z" or char == "_"
1927

20-
def is_name_start(char: str) -> bool:
21-
"""Check whether char is allowed at the beginning of a GraphQL name
28+
def is_name_continue(char: str) -> bool:
29+
"""Check whether char is allowed in the continuation of a GraphQL name
2230
23-
For internal use by the lexer only.
24-
"""
25-
return is_letter(char) or char == "_"
31+
For internal use by the lexer only.
32+
"""
33+
return (
34+
"a" <= char <= "z"
35+
or "A" <= char <= "Z"
36+
or "0" <= char <= "9"
37+
or char == "_"
38+
)
2639

40+
else:
2741

28-
def is_name_continue(char: str) -> bool:
29-
"""Check whether char is allowed in the continuation of a GraphQL name
42+
def is_digit(char: str) -> bool:
43+
"""Check whether char is a digit
3044
31-
For internal use by the lexer only.
32-
"""
33-
return is_letter(char) or is_digit(char) or char == "_"
45+
For internal use by the lexer only.
46+
"""
47+
return char.isascii() and char.isdigit()
48+
49+
def is_letter(char: str) -> bool:
50+
"""Check whether char is a plain ASCII letter
51+
52+
For internal use by the lexer only.
53+
"""
54+
return char.isascii() and char.isalpha()
55+
56+
def is_name_start(char: str) -> bool:
57+
"""Check whether char is allowed at the beginning of a GraphQL name
58+
59+
For internal use by the lexer only.
60+
"""
61+
return char.isascii() and (char.isalpha() or char == "_")
62+
63+
def is_name_continue(char: str) -> bool:
64+
"""Check whether char is allowed in the continuation of a GraphQL name
65+
66+
For internal use by the lexer only.
67+
"""
68+
return char.isascii() and (char.isalnum() or char == "_")

src/graphql/language/lexer.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -524,7 +524,7 @@ def read_hex_digit(char: str) -> int:
524524
return ord(char) - 48
525525
elif "A" <= char <= "F":
526526
return ord(char) - 55
527-
elif "a" <= char <= "f": # a-f
527+
elif "a" <= char <= "f":
528528
return ord(char) - 87
529529
return -1
530530

+90
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
from string import ascii_letters as letters, digits, punctuation
2+
3+
from graphql.language.character_classes import (
4+
is_digit,
5+
is_letter,
6+
is_name_start,
7+
is_name_continue,
8+
)
9+
10+
non_ascii = "¯_±¹²³½£ºµÄäÖöØø×〇᧐〸αΑωΩ"
11+
12+
13+
def describe_digit():
14+
def accepts_digits():
15+
assert all(is_digit(char) for char in digits)
16+
17+
def rejects_letters():
18+
assert not any(is_digit(char) for char in letters)
19+
20+
def rejects_underscore():
21+
assert not is_digit("_")
22+
23+
def rejects_punctuation():
24+
assert not any(is_digit(char) for char in punctuation)
25+
26+
def rejects_non_ascii():
27+
assert not any(is_digit(char) for char in non_ascii)
28+
29+
def rejects_empty_string():
30+
assert not is_digit("")
31+
32+
33+
def describe_letter():
34+
def accepts_letters():
35+
assert all(is_letter(char) for char in letters)
36+
37+
def rejects_digits():
38+
assert not any(is_letter(char) for char in digits)
39+
40+
def rejects_underscore():
41+
assert not is_letter("_")
42+
43+
def rejects_punctuation():
44+
assert not any(is_letter(char) for char in punctuation)
45+
46+
def rejects_non_ascii():
47+
assert not any(is_letter(char) for char in non_ascii)
48+
49+
def rejects_empty_string():
50+
assert not is_letter("")
51+
52+
53+
def describe_name_start():
54+
def accepts_letters():
55+
assert all(is_name_start(char) for char in letters)
56+
57+
def accepts_underscore():
58+
assert is_name_start("_")
59+
60+
def rejects_digits():
61+
assert not any(is_name_start(char) for char in digits)
62+
63+
def rejects_punctuation():
64+
assert not any(is_name_start(char) for char in punctuation if char != "_")
65+
66+
def rejects_non_ascii():
67+
assert not any(is_name_start(char) for char in non_ascii)
68+
69+
def rejects_empty_string():
70+
assert not is_name_start("")
71+
72+
73+
def describe_name_continue():
74+
def accepts_letters():
75+
assert all(is_name_continue(char) for char in letters)
76+
77+
def accepts_digits():
78+
assert all(is_name_continue(char) for char in digits)
79+
80+
def accepts_underscore():
81+
assert is_name_continue("_")
82+
83+
def rejects_punctuation():
84+
assert not any(is_name_continue(char) for char in punctuation if char != "_")
85+
86+
def rejects_non_ascii():
87+
assert not any(is_name_continue(char) for char in non_ascii)
88+
89+
def rejects_empty_string():
90+
assert not is_name_continue("")

0 commit comments

Comments
 (0)