@@ -137,6 +137,14 @@ let ifLetAttr = (Location.mknoloc "ns.iflet", Parsetree.PStr [])
137
137
let suppressFragileMatchWarningAttr = (Location. mknoloc " warning" , Parsetree. PStr [Ast_helper.Str. eval (Ast_helper.Exp. constant (Pconst_string (" -4" , None )))])
138
138
let makeBracesAttr loc = (Location. mkloc " ns.braces" loc, Parsetree. PStr [] )
139
139
140
+ type stringLiteralState =
141
+ | Start
142
+ | Backslash
143
+ | HexEscape
144
+ | DecimalEscape
145
+ | OctalEscape
146
+ | EscapedLineBreak
147
+
140
148
type typDefOrExt =
141
149
| TypeDef of {recFlag : Asttypes .rec_flag ; types : Parsetree .type_declaration list }
142
150
| TypeExt of Parsetree .type_extension
@@ -488,149 +496,74 @@ let parseStringLiteral s =
488
496
let len = String. length s in
489
497
let b = Buffer. create (String. length s) in
490
498
491
- let rec loop i =
499
+ let rec parse state i d =
492
500
if i = len then
493
- ()
501
+ (match state with
502
+ | HexEscape | DecimalEscape | OctalEscape -> false
503
+ | _ -> true )
494
504
else
495
505
let c = String. unsafe_get s i in
496
- match c with
497
- | '\\' as c ->
498
- let nextIx = i + 1 in
499
- if nextIx < len then
500
- let nextChar = String. unsafe_get s nextIx in
501
- begin match nextChar with
502
- (* this is interesting :
503
- * let x = "foo\
504
- * bar"
505
- * The `\` escapes the newline, as if there was nothing here.
506
- * Essentialy transforming this piece of code in `let x = "foobar"`
507
- *
508
- * What is even more interesting is that any space or tabs after the
509
- * escaped newline are also dropped.
510
- * let x = "foo\
511
- * bar"
512
- * is the same as `let x = "foobar"`
513
- * )
514
- | '\010' | '\013' ->
515
- let i = ref (nextIx + 1 ) in
516
- while ! i < len && (
517
- let c = String. unsafe_get s ! i in
518
- c = ' ' || c = '\t'
519
- ) do
520
- incr i
521
- done ;
522
- loop ! i
523
- | 'n' ->
524
- Buffer. add_char b '\010' ;
525
- loop (nextIx + 1 )
526
- | 'r' ->
527
- Buffer. add_char b '\013' ;
528
- loop (nextIx + 1 )
529
- | 'b' ->
530
- Buffer. add_char b '\008' ;
531
- loop (nextIx + 1 )
532
- | 't' ->
533
- Buffer. add_char b '\009' ;
534
- loop (nextIx + 1 )
535
- | '\\' as c ->
536
- Buffer. add_char b c;
537
- loop (nextIx + 1 )
538
- | ' ' as c ->
539
- Buffer. add_char b c;
540
- loop (nextIx + 1 )
541
- | '\' ' as c ->
542
- Buffer. add_char b c;
543
- loop (nextIx + 1 )
544
- | '\"' as c ->
545
- Buffer. add_char b c;
546
- loop (nextIx + 1 )
547
- | '0' .. '9' ->
548
- if nextIx + 2 < len then
549
- let c0 = nextChar in
550
- let c1 = (String. unsafe_get s (nextIx + 1 )) in
551
- let c2 = (String. unsafe_get s (nextIx + 2 )) in
552
- let c =
553
- 100 * (Char. code c0 - 48 ) +
554
- 10 * (Char. code c1 - 48 ) +
555
- (Char. code c2 - 48 )
556
- in
557
- if (c < 0 || c > 255 ) then (
558
- Buffer. add_char b '\\' ;
559
- Buffer. add_char b c0;
560
- Buffer. add_char b c1;
561
- Buffer. add_char b c2;
562
- loop (nextIx + 3 )
563
- ) else (
564
- Buffer. add_char b (Char. unsafe_chr c);
565
- loop (nextIx + 3 )
566
- )
567
- else (
568
- Buffer. add_char b '\\' ;
569
- Buffer. add_char b nextChar;
570
- loop (nextIx + 1 )
571
- )
572
- | 'o' ->
573
- if nextIx + 3 < len then
574
- let c0 = (String. unsafe_get s (nextIx + 1 )) in
575
- let c1 = (String. unsafe_get s (nextIx + 2 )) in
576
- let c2 = (String. unsafe_get s (nextIx + 3 )) in
577
- let c =
578
- 64 * (Char. code c0 - 48 ) +
579
- 8 * (Char. code c1 - 48 ) +
580
- (Char. code c2 - 48 )
581
- in
582
- if (c < 0 || c > 255 ) then (
583
- Buffer. add_char b '\\' ;
584
- Buffer. add_char b '0' ;
585
- Buffer. add_char b c0;
586
- Buffer. add_char b c1;
587
- Buffer. add_char b c2;
588
- loop (nextIx + 4 )
589
- ) else (
590
- Buffer. add_char b (Char. unsafe_chr c);
591
- loop (nextIx + 4 )
592
- )
593
- else (
594
- Buffer. add_char b '\\' ;
595
- Buffer. add_char b nextChar;
596
- loop (nextIx + 1 )
597
- )
598
- | 'x' as c ->
599
- if nextIx + 2 < len then
600
- let c0 = (String. unsafe_get s (nextIx + 1 )) in
601
- let c1 = (String. unsafe_get s (nextIx + 2 )) in
602
- let c = (16 * (hexValue c0)) + (hexValue c1) in
603
- if (c < 0 || c > 255 ) then (
604
- Buffer. add_char b '\\' ;
605
- Buffer. add_char b 'x' ;
606
- Buffer. add_char b c0;
607
- Buffer. add_char b c1;
608
- loop (nextIx + 3 )
609
- ) else (
610
- Buffer. add_char b (Char. unsafe_chr c);
611
- loop (nextIx + 3 )
612
- )
613
- else (
614
- Buffer. add_char b '\\' ;
615
- Buffer. add_char b c;
616
- loop (nextIx + 2 )
617
- )
618
- | _ ->
619
- Buffer. add_char b c;
620
- Buffer. add_char b nextChar;
621
- loop (nextIx + 1 )
622
- end
623
- else (
624
- Buffer. add_char b c;
625
- ()
626
- )
627
- | c ->
628
- Buffer. add_char b c;
629
- loop (i + 1 )
506
+ match state with
507
+ | Start ->
508
+ (match c with
509
+ | '\\' -> parse Backslash (i + 1 ) d
510
+ | c -> Buffer. add_char b c; parse Start (i + 1 ) d)
511
+ | Backslash ->
512
+ (match c with
513
+ | 'n' -> Buffer. add_char b '\n' ; parse Start (i + 1 ) d
514
+ | 'r' -> Buffer. add_char b '\r' ; parse Start (i + 1 ) d
515
+ | 'b' -> Buffer. add_char b '\008' ; parse Start (i + 1 ) d
516
+ | 't' -> Buffer. add_char b '\009' ; parse Start (i + 1 ) d
517
+ | ('\\' | ' ' | '\' ' | '"' ) as c -> Buffer. add_char b c; parse Start (i + 1 ) d
518
+ | 'x' -> parse HexEscape (i + 1 ) 0
519
+ | 'o' -> parse OctalEscape (i + 1 ) 0
520
+ | '0' .. '9' -> parse DecimalEscape i 0
521
+ | '\010' | '\013' -> parse EscapedLineBreak (i + 1 ) d
522
+ | c -> Buffer. add_char b '\\' ; Buffer. add_char b c; parse Start (i + 1 ) d)
523
+ | HexEscape ->
524
+ if d == 1 then
525
+ let c0 = String. unsafe_get s (i - 1 ) in
526
+ let c1 = String. unsafe_get s i in
527
+ let c = (16 * (hexValue c0)) + (hexValue c1) in
528
+ if c < 0 || c > 255 then false
529
+ else (
530
+ Buffer. add_char b (Char. unsafe_chr c);
531
+ parse Start (i + 1 ) 0
532
+ )
533
+ else
534
+ parse HexEscape (i + 1 ) (d + 1 )
535
+ | DecimalEscape ->
536
+ if d == 2 then
537
+ let c0 = String. unsafe_get s (i - 2 ) in
538
+ let c1 = String. unsafe_get s (i - 1 ) in
539
+ let c2 = String. unsafe_get s i in
540
+ let c = 100 * (Char. code c0 - 48 ) + 10 * (Char. code c1 - 48 ) + (Char. code c2 - 48 ) in
541
+ if c < 0 || c > 255 then false
542
+ else (
543
+ Buffer. add_char b (Char. unsafe_chr c);
544
+ parse Start (i + 1 ) 0
545
+ )
546
+ else
547
+ parse DecimalEscape (i + 1 ) (d + 1 )
548
+ | OctalEscape ->
549
+ if d == 2 then
550
+ let c0 = String. unsafe_get s (i - 2 ) in
551
+ let c1 = String. unsafe_get s (i - 1 ) in
552
+ let c2 = String. unsafe_get s i in
553
+ let c = 64 * (Char. code c0 - 48 ) + 8 * (Char. code c1 - 48 ) + (Char. code c2 - 48 ) in
554
+ if c < 0 || c > 255 then false
555
+ else (
556
+ Buffer. add_char b (Char. unsafe_chr c);
557
+ parse Start (i + 1 ) 0
558
+ )
559
+ else
560
+ parse OctalEscape (i + 1 ) (d + 1 )
561
+ | EscapedLineBreak ->
562
+ (match c with
563
+ | ' ' | '\t' -> parse EscapedLineBreak (i + 1 ) d
564
+ | c -> Buffer. add_char b c; parse Start (i + 1 ) d)
630
565
in
631
- loop 0 ;
632
- Buffer. contents b
633
-
566
+ if parse Start 0 0 then Buffer. contents b else s
634
567
635
568
let rec parseLident p =
636
569
let recoverLident p =
@@ -696,8 +629,8 @@ let parseHashIdent ~startPos p =
696
629
Parser. expect Hash p;
697
630
match p.token with
698
631
| String text ->
699
- Parser. next p;
700
632
let text = if p.mode = ParseForTypeChecker then parseStringLiteral text else text in
633
+ Parser. next p;
701
634
(text, mkLoc startPos p.prevEndPos)
702
635
| Int {i; suffix} ->
703
636
let () = match suffix with
@@ -1207,8 +1140,8 @@ let rec parsePattern ?(alias=true) ?(or_=true) p =
1207
1140
) else (
1208
1141
let (ident, loc) = match p.token with
1209
1142
| String text ->
1210
- Parser. next p;
1211
1143
let text = if p.mode = ParseForTypeChecker then parseStringLiteral text else text in
1144
+ Parser. next p;
1212
1145
(text, mkLoc startPos p.prevEndPos)
1213
1146
| Int {i; suffix} ->
1214
1147
let () = match suffix with
0 commit comments