Skip to content

Commit 5119643

Browse files
committed
improve string parser performance
1 parent 72dc284 commit 5119643

File tree

1 file changed

+14
-12
lines changed

1 file changed

+14
-12
lines changed

src/Text/Parsing/StringParser/CodePoints.purs

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,8 @@ import Data.Either (Either(..))
3131
import Data.Enum (fromEnum)
3232
import Data.Foldable (class Foldable, foldMap, elem, notElem)
3333
import Data.Maybe (Maybe(..))
34-
import Data.String.CodePoints (codePointAt, drop, indexOf, length)
35-
import Data.String.CodeUnits (singleton)
36-
import Data.String.Pattern (Pattern(..))
34+
import Data.String.CodePoints as SCP
35+
import Data.String.CodeUnits as SCU
3736
import Data.String.Regex as Regex
3837
import Data.String.Regex.Flags (noFlags)
3938
import Text.Parsing.StringParser (Parser(..), try, fail)
@@ -43,15 +42,15 @@ import Text.Parsing.StringParser.Combinators (many, (<?>))
4342
eof :: Parser Unit
4443
eof = Parser \s ->
4544
case s of
46-
{ substr, posFromStart } | 0 < length substr -> Left { pos: posFromStart, error: "Expected EOF" }
45+
{ substr, posFromStart } | 0 < SCU.length substr -> Left { pos: posFromStart, error: "Expected EOF" }
4746
_ -> Right { result: unit, suffix: s }
4847

4948
-- | Match any character.
5049
anyChar :: Parser Char
5150
anyChar = Parser \{ substr, posFromStart } ->
52-
case codePointAt 0 substr of
51+
case SCP.codePointAt 0 substr of
5352
Just cp -> case toChar cp of
54-
Just chr -> Right { result: chr, suffix: { substr: drop 1 substr, posFromStart: posFromStart + 1 } }
53+
Just chr -> Right { result: chr, suffix: { substr: SCP.drop 1 substr, posFromStart: posFromStart + 1 } }
5554
Nothing -> Left { pos: posFromStart, error: "CodePoint " <> show cp <> " is not a character" }
5655
Nothing -> Left { pos: posFromStart, error: "Unexpected EOF" }
5756
where
@@ -66,10 +65,13 @@ anyDigit = try do
6665

6766
-- | Match the specified string.
6867
string :: String -> Parser String
69-
string nt = Parser \s ->
70-
case s of
71-
{ substr, posFromStart } | indexOf (Pattern nt) substr == Just 0 -> Right { result: nt, suffix: { substr: drop (length nt) substr, posFromStart: posFromStart + length nt } }
72-
{ posFromStart } -> Left { pos: posFromStart, error: "Expected '" <> nt <> "'." }
68+
string pattern = Parser \{ substr, posFromStart } ->
69+
let
70+
length = SCU.length pattern
71+
{ before, after } = SCU.splitAt length substr
72+
in
73+
if before == pattern then Right { result: pattern, suffix: { substr: after, posFromStart: posFromStart + length } }
74+
else Left { pos: posFromStart, error: "Expected '" <> pattern <> "'." }
7375

7476
-- | Match a character satisfying the given predicate.
7577
satisfy :: (Char -> Boolean) -> Parser Char
@@ -86,7 +88,7 @@ char c = satisfy (_ == c) <?> "Could not match character " <> show c
8688
whiteSpace :: Parser String
8789
whiteSpace = do
8890
cs <- many (satisfy \c -> c == '\n' || c == '\r' || c == ' ' || c == '\t')
89-
pure (foldMap singleton cs)
91+
pure (foldMap SCU.singleton cs)
9092

9193
-- | Skip many whitespace characters.
9294
skipSpaces :: Parser Unit
@@ -138,6 +140,6 @@ regex pat =
138140
matchRegex r = Parser \{ substr, posFromStart } -> do
139141
case NEA.head <$> Regex.match r substr of
140142
Just (Just matched) ->
141-
Right { result: matched, suffix: { substr: drop (length matched) substr, posFromStart: posFromStart + length matched } }
143+
Right { result: matched, suffix: { substr: SCU.drop (SCU.length matched) substr, posFromStart: posFromStart + SCU.length matched } }
142144
_ ->
143145
Left { pos: posFromStart, error: "no match" }

0 commit comments

Comments
 (0)