Skip to content

Commit a889c3c

Browse files
committed
Rework lambda expressions
Summary ------- `$.lambda_expression` body was changed from `$._block` to `$._indentable_expression`. This had the following effects: * x10 faster parser generation * parser size reduced from 41M to 24M * conflict with `$.self_type`, which was resolved by matching indent-tokens in `$.template_body`. This change, in its turn required scanner.c to stop emitting INDENT and OUTDENT tokens when encountering comments
1 parent d24edb6 commit a889c3c

File tree

4 files changed

+103
-34
lines changed

4 files changed

+103
-34
lines changed

corpus/definitions.txt

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -840,6 +840,8 @@ Value declarations (Scala 3 syntax)
840840
================================================================================
841841

842842
class A:
843+
// Comments that should not
844+
// influence indentation
843845
val b, c : Int
844846
val d : String
845847

@@ -849,6 +851,8 @@ class A:
849851
(class_definition
850852
(identifier)
851853
(template_body
854+
(comment)
855+
(comment)
852856
(val_declaration
853857
(identifier)
854858
(identifier)
@@ -1454,6 +1458,10 @@ trait A {
14541458
def f: Int
14551459
}
14561460

1461+
trait A { self =>
1462+
def f: Int
1463+
}
1464+
14571465
class B {
14581466
self: Something[A] =>
14591467

@@ -1463,6 +1471,14 @@ class B {
14631471
--------------------------------------------------------------------------------
14641472

14651473
(compilation_unit
1474+
(trait_definition
1475+
(identifier)
1476+
(template_body
1477+
(self_type
1478+
(identifier))
1479+
(function_declaration
1480+
(identifier)
1481+
(type_identifier))))
14661482
(trait_definition
14671483
(identifier)
14681484
(template_body

corpus/expressions.txt

Lines changed: 35 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -185,10 +185,11 @@ class C:
185185
(indented_block
186186
(lambda_expression
187187
(identifier)
188-
(infix_expression
189-
(identifier)
190-
(operator_identifier)
191-
(integer_literal))))))
188+
(indented_block
189+
(infix_expression
190+
(identifier)
191+
(operator_identifier)
192+
(integer_literal)))))))
192193
(call_expression
193194
(identifier)
194195
(colon_argument
@@ -217,8 +218,8 @@ class C:
217218
(indented_cases
218219
(case_clause
219220
(identifier)
220-
(identifier)))))
221-
(comment)
221+
(identifier))
222+
(comment))))
222223
(ascription_expression
223224
(identifier)
224225
(type_identifier))
@@ -442,8 +443,8 @@ class C:
442443
(if_expression
443444
(boolean_literal)
444445
(indented_block
445-
(unit))
446-
(comment)
446+
(unit)
447+
(comment))
447448
(indented_block
448449
(unit))))))))
449450

@@ -1054,8 +1055,14 @@ object O {
10541055
val l = a => a + 1
10551056
val b = (x: Int, y: Int) => { x * y }
10561057
val f = _ => 2
1057-
(a, b, _) => a - b
1058-
foo { i => val x = 2 + i }
1058+
foo { i =>
1059+
val x = 2 + i
1060+
x
1061+
}
1062+
{ x =>
1063+
val y = 2 * x
1064+
y * y
1065+
}
10591066
}
10601067

10611068
--------------------------------------------------------------------------------
@@ -1092,29 +1099,33 @@ object O {
10921099
(lambda_expression
10931100
(wildcard)
10941101
(integer_literal)))
1095-
(lambda_expression
1096-
(bindings
1097-
(binding
1098-
(identifier))
1099-
(binding
1100-
(identifier))
1101-
(binding
1102-
(identifier)))
1103-
(infix_expression
1104-
(identifier)
1105-
(operator_identifier)
1106-
(identifier)))
11071102
(call_expression
11081103
(identifier)
11091104
(block
11101105
(lambda_expression
11111106
(identifier)
1107+
(indented_block
1108+
(val_definition
1109+
(identifier)
1110+
(infix_expression
1111+
(integer_literal)
1112+
(operator_identifier)
1113+
(identifier)))
1114+
(identifier)))))
1115+
(block
1116+
(lambda_expression
1117+
(identifier)
1118+
(indented_block
11121119
(val_definition
11131120
(identifier)
11141121
(infix_expression
11151122
(integer_literal)
11161123
(operator_identifier)
1117-
(identifier)))))))))
1124+
(identifier)))
1125+
(infix_expression
1126+
(identifier)
1127+
(operator_identifier)
1128+
(identifier))))))))
11181129

11191130
================================================================================
11201131
Unit expressions
@@ -1648,6 +1659,7 @@ throws()
16481659
using()
16491660

16501661
--------------------------------------------------------------------------------
1662+
16511663
(compilation_unit
16521664
(call_expression
16531665
(identifier)

grammar.js

Lines changed: 31 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -369,17 +369,37 @@ module.exports = grammar({
369369
/*
370370
* TemplateBody ::= :<<< [SelfType] TemplateStat {semi TemplateStat} >>>
371371
*/
372-
template_body: $ =>
372+
template_body: $ => choice(
373+
prec.left(PREC.control, $._indented_template_body),
374+
prec.left(PREC.control, $._braced_template_body),
375+
),
376+
377+
_indented_template_body: $ => seq(
378+
':',
379+
$._indent,
380+
optional($.self_type),
381+
$._block,
382+
$._outdent,
383+
),
384+
385+
_braced_template_body: $ => seq(
386+
'{',
387+
optional(choice(
388+
$._braced_template_body1,
389+
$._braced_template_body2,
390+
)),
391+
'}',
392+
),
393+
394+
_braced_template_body1: $ => seq(optional($.self_type), $._block),
395+
_braced_template_body2: $ => seq(
373396
choice(
374-
prec.left(
375-
PREC.control,
376-
seq(":", $._indent, optional($.self_type), $._block, $._outdent),
377-
),
378-
prec.left(
379-
PREC.control,
380-
seq("{", optional($.self_type), optional($._block), "}"),
381-
),
397+
seq($._indent, optional($.self_type)),
398+
seq(optional($.self_type), $._indent),
382399
),
400+
optional($._block),
401+
$._outdent
402+
),
383403

384404
/*
385405
* WithTemplateBody ::= <<< [SelfType] TemplateStat {semi TemplateStat} >>>
@@ -1038,12 +1058,13 @@ module.exports = grammar({
10381058
$.call_expression,
10391059
),
10401060

1061+
10411062
lambda_expression: $ =>
10421063
prec.right(
10431064
seq(
10441065
field("parameters", choice($.bindings, $._identifier, $.wildcard)),
10451066
"=>",
1046-
$._block,
1067+
$._indentable_expression,
10471068
),
10481069
),
10491070

src/scanner.c

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,18 @@ static bool scan_string_content(TSLexer *lexer, bool is_multiline, bool has_inte
8989
}
9090
}
9191

92+
static bool detect_comment_start(TSLexer *lexer) {
93+
lexer->mark_end(lexer);
94+
// Comments should not affect indentation
95+
if (lexer->lookahead == '/') {
96+
advance(lexer);
97+
if (lexer->lookahead == '/' || lexer -> lookahead == '*') {
98+
return true;
99+
}
100+
}
101+
return false;
102+
}
103+
92104
bool tree_sitter_scala_external_scanner_scan(void *payload, TSLexer *lexer,
93105
const bool *valid_symbols) {
94106
ScannerStack *stack = (ScannerStack *)payload;
@@ -103,7 +115,8 @@ bool tree_sitter_scala_external_scanner_scan(void *payload, TSLexer *lexer,
103115
(
104116
(prev != -1) &&
105117
lexer->lookahead == ')' ||
106-
lexer->lookahead == ']'
118+
lexer->lookahead == ']' ||
119+
lexer->lookahead == '}'
107120
) || (
108121
stack->last_indentation_size != -1 &&
109122
prev != -1 &&
@@ -131,6 +144,9 @@ bool tree_sitter_scala_external_scanner_scan(void *payload, TSLexer *lexer,
131144
newline_count > 0 &&
132145
(isEmptyStack(stack) ||
133146
indentation_size > peekStack(stack))) {
147+
if (detect_comment_start(lexer)) {
148+
return false;
149+
}
134150
pushStack(stack, indentation_size);
135151
lexer->result_symbol = INDENT;
136152
LOG(" INDENT\n");
@@ -148,6 +164,10 @@ bool tree_sitter_scala_external_scanner_scan(void *payload, TSLexer *lexer,
148164
LOG(" pop\n");
149165
LOG(" OUTDENT\n");
150166
lexer->result_symbol = OUTDENT;
167+
lexer->mark_end(lexer);
168+
if (detect_comment_start(lexer)) {
169+
return false;
170+
}
151171
stack->last_indentation_size = indentation_size;
152172
stack->last_newline_count = newline_count;
153173
if (lexer->eof(lexer)) {

0 commit comments

Comments
 (0)