Skip to content

Commit 9f67f3f

Browse files
committed
parser: limit maximum number of tokens
Replicates graphql/graphql-js@f0a0a4d
1 parent 52daf76 commit 9f67f3f

File tree

2 files changed

+61
-11
lines changed

2 files changed

+61
-11
lines changed

src/graphql/language/parser.py

+44-11
Original file line numberDiff line numberDiff line change
@@ -73,15 +73,22 @@
7373
def parse(
7474
source: SourceType,
7575
no_location: bool = False,
76+
max_tokens: Optional[int] = None,
7677
allow_legacy_fragment_variables: bool = False,
7778
) -> DocumentNode:
7879
"""Given a GraphQL source, parse it into a Document.
7980
8081
Throws GraphQLError if a syntax error is encountered.
8182
8283
By default, the parser creates AST nodes that know the location in the source that
83-
they correspond to. The ``no_location`` option disables that behavior for
84-
performance or testing.
84+
they correspond to. Setting the ``no_location`` parameter to False disables that
85+
behavior for performance or testing.
86+
87+
Parser CPU and memory usage is linear to the number of tokens in a document,
88+
however in extreme cases it becomes quadratic due to memory exhaustion.
89+
Parsing happens before validation, so even invalid queries can burn lots of
90+
CPU time and memory. To prevent this, you can set a maximum number of tokens
91+
allowed within a document using the ``max_tokens`` parameter.
8592
8693
Legacy feature (will be removed in v3.3):
8794
@@ -100,6 +107,7 @@ def parse(
100107
parser = Parser(
101108
source,
102109
no_location=no_location,
110+
max_tokens=max_tokens,
103111
allow_legacy_fragment_variables=allow_legacy_fragment_variables,
104112
)
105113
return parser.parse_document()
@@ -108,6 +116,7 @@ def parse(
108116
def parse_value(
109117
source: SourceType,
110118
no_location: bool = False,
119+
max_tokens: Optional[int] = None,
111120
allow_legacy_fragment_variables: bool = False,
112121
) -> ValueNode:
113122
"""Parse the AST for a given string containing a GraphQL value.
@@ -123,6 +132,7 @@ def parse_value(
123132
parser = Parser(
124133
source,
125134
no_location=no_location,
135+
max_tokens=max_tokens,
126136
allow_legacy_fragment_variables=allow_legacy_fragment_variables,
127137
)
128138
parser.expect_token(TokenKind.SOF)
@@ -134,6 +144,7 @@ def parse_value(
134144
def parse_const_value(
135145
source: SourceType,
136146
no_location: bool = False,
147+
max_tokens: Optional[int] = None,
137148
allow_legacy_fragment_variables: bool = False,
138149
) -> ConstValueNode:
139150
"""Parse the AST for a given string containing a GraphQL constant value.
@@ -144,6 +155,7 @@ def parse_const_value(
144155
parser = Parser(
145156
source,
146157
no_location=no_location,
158+
max_tokens=max_tokens,
147159
allow_legacy_fragment_variables=allow_legacy_fragment_variables,
148160
)
149161
parser.expect_token(TokenKind.SOF)
@@ -155,6 +167,7 @@ def parse_const_value(
155167
def parse_type(
156168
source: SourceType,
157169
no_location: bool = False,
170+
max_tokens: Optional[int] = None,
158171
allow_legacy_fragment_variables: bool = False,
159172
) -> TypeNode:
160173
"""Parse the AST for a given string containing a GraphQL Type.
@@ -170,6 +183,7 @@ def parse_type(
170183
parser = Parser(
171184
source,
172185
no_location=no_location,
186+
max_tokens=max_tokens,
173187
allow_legacy_fragment_variables=allow_legacy_fragment_variables,
174188
)
175189
parser.expect_token(TokenKind.SOF)
@@ -191,13 +205,16 @@ class Parser:
191205
"""
192206

193207
_lexer: Lexer
194-
_no_Location: bool
208+
_no_location: bool
209+
_max_tokens: Optional[int]
195210
_allow_legacy_fragment_variables: bool
211+
_token_counter: int
196212

197213
def __init__(
198214
self,
199215
source: SourceType,
200216
no_location: bool = False,
217+
max_tokens: Optional[int] = None,
201218
allow_legacy_fragment_variables: bool = False,
202219
):
203220
source = (
@@ -206,7 +223,9 @@ def __init__(
206223

207224
self._lexer = Lexer(source)
208225
self._no_location = no_location
226+
self._max_tokens = max_tokens
209227
self._allow_legacy_fragment_variables = allow_legacy_fragment_variables
228+
self._token_counter = 0
210229

211230
def parse_name(self) -> NameNode:
212231
"""Convert a name lex token into a name parse node."""
@@ -477,7 +496,7 @@ def parse_value_literal(self, is_const: bool) -> ValueNode:
477496

478497
def parse_string_literal(self, _is_const: bool = False) -> StringValueNode:
479498
token = self._lexer.token
480-
self._lexer.advance()
499+
self.advance_lexer()
481500
return StringValueNode(
482501
value=token.value,
483502
block=token.kind == TokenKind.BLOCK_STRING,
@@ -514,18 +533,18 @@ def parse_object(self, is_const: bool) -> ObjectValueNode:
514533

515534
def parse_int(self, _is_const: bool = False) -> IntValueNode:
516535
token = self._lexer.token
517-
self._lexer.advance()
536+
self.advance_lexer()
518537
return IntValueNode(value=token.value, loc=self.loc(token))
519538

520539
def parse_float(self, _is_const: bool = False) -> FloatValueNode:
521540
token = self._lexer.token
522-
self._lexer.advance()
541+
self.advance_lexer()
523542
return FloatValueNode(value=token.value, loc=self.loc(token))
524543

525544
def parse_named_values(self, _is_const: bool = False) -> ValueNode:
526545
token = self._lexer.token
527546
value = token.value
528-
self._lexer.advance()
547+
self.advance_lexer()
529548
if value == "true":
530549
return BooleanValueNode(value=True, loc=self.loc(token))
531550
if value == "false":
@@ -1020,7 +1039,7 @@ def expect_token(self, kind: TokenKind) -> Token:
10201039
"""
10211040
token = self._lexer.token
10221041
if token.kind == kind:
1023-
self._lexer.advance()
1042+
self.advance_lexer()
10241043
return token
10251044

10261045
raise GraphQLSyntaxError(
@@ -1037,7 +1056,7 @@ def expect_optional_token(self, kind: TokenKind) -> bool:
10371056
"""
10381057
token = self._lexer.token
10391058
if token.kind == kind:
1040-
self._lexer.advance()
1059+
self.advance_lexer()
10411060
return True
10421061

10431062
return False
@@ -1050,7 +1069,7 @@ def expect_keyword(self, value: str) -> None:
10501069
"""
10511070
token = self._lexer.token
10521071
if token.kind == TokenKind.NAME and token.value == value:
1053-
self._lexer.advance()
1072+
self.advance_lexer()
10541073
else:
10551074
raise GraphQLSyntaxError(
10561075
self._lexer.source,
@@ -1066,7 +1085,7 @@ def expect_optional_keyword(self, value: str) -> bool:
10661085
"""
10671086
token = self._lexer.token
10681087
if token.kind == TokenKind.NAME and token.value == value:
1069-
self._lexer.advance()
1088+
self.advance_lexer()
10701089
return True
10711090

10721091
return False
@@ -1154,6 +1173,20 @@ def delimited_many(
11541173
break
11551174
return nodes
11561175

1176+
def advance_lexer(self) -> None:
1177+
max_tokens = self._max_tokens
1178+
token = self._lexer.advance()
1179+
1180+
if max_tokens is not None and token.kind != TokenKind.EOF:
1181+
self._token_counter += 1
1182+
if self._token_counter > max_tokens:
1183+
raise GraphQLSyntaxError(
1184+
self._lexer.source,
1185+
token.start,
1186+
f"Document contains more than {max_tokens} tokens."
1187+
" Parsing aborted.",
1188+
)
1189+
11571190

11581191
def get_token_desc(token: Token) -> str:
11591192
"""Describe a token as a string for debugging."""

tests/language/test_parser.py

+17
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,23 @@ def parse_provides_useful_error_when_using_source():
9191
"""
9292
)
9393

94+
def limits_maximum_number_of_tokens():
95+
assert parse("{ foo }", max_tokens=3)
96+
with raises(
97+
GraphQLSyntaxError,
98+
match="Syntax Error: Document contains more than 2 tokens."
99+
" Parsing aborted.",
100+
):
101+
assert parse("{ foo }", max_tokens=2)
102+
103+
assert parse('{ foo(bar: "baz") }', max_tokens=8)
104+
with raises(
105+
GraphQLSyntaxError,
106+
match="Syntax Error: Document contains more than 7 tokens."
107+
" Parsing aborted.",
108+
):
109+
assert parse('{ foo(bar: "baz") }', max_tokens=7)
110+
94111
def parses_variable_inline_values():
95112
parse("{ field(complex: { a: { b: [ $var ] } }) }")
96113

0 commit comments

Comments
 (0)