Skip to content

Commit c4567b7

Browse files
committed
proof of concept
1 parent 449b87d commit c4567b7

File tree

7 files changed

+58
-55
lines changed

7 files changed

+58
-55
lines changed

src/Text/Parsing/StringParser.purs

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,14 @@ import Data.Either (Either(..))
1515
-- | A position in an input string.
1616
type Pos = Int
1717

18-
-- | Strings are represented as a string with an index from the
18+
-- | Strings are represented as a substring with an index from the
1919
-- | start of the string.
2020
-- |
21-
-- | `{ str: s, pos: n }` is interpreted as the substring of `s`
22-
-- | starting at index n.
21+
-- | `{ str: s, pos: n }` is interpreted as the substring `s`
22+
-- | starting at index n of the original string.
2323
-- |
24-
-- | This allows us to avoid repeatedly finding substrings
25-
-- | every time we match a character.
26-
type PosString = { str :: String, pos :: Pos }
24+
-- | The pos is only kept for error messaging.
25+
type PosString = { substr :: String, posFromStart :: Pos }
2726

2827
-- | The type of parsing errors.
2928
type ParseError = { error :: String, pos :: Pos }
@@ -43,7 +42,7 @@ unParser (Parser p) = p
4342
-- | Run a parser for an input string. See also `printParserError`
4443
-- | and `unParser` for more flexible usages.
4544
runParser :: forall a. Parser a -> String -> Either ParseError a
46-
runParser (Parser p) s = map _.result (p { str: s, pos: 0 })
45+
runParser (Parser p) s = map _.result (p { substr: s, posFromStart: 0 })
4746

4847
-- | Prints a ParseError's the error message and the position of the error.
4948
printParserError :: ParseError -> String
@@ -65,7 +64,7 @@ instance altParser :: Alt Parser where
6564
alt (Parser p1) (Parser p2) = Parser \s ->
6665
case p1 s of
6766
Left { error, pos }
68-
| s.pos == pos -> p2 s
67+
| s.posFromStart == pos -> p2 s
6968
| otherwise -> Left { error, pos }
7069
right -> right
7170

@@ -92,17 +91,28 @@ instance monadRecParser :: MonadRec Parser where
9291
split { result: Done b, suffix } = Done { result: b, suffix }
9392

9493
instance lazyParser :: Lazy (Parser a) where
95-
defer f = Parser $ \str -> unParser (f unit) str
94+
defer f = Parser \str -> unParser (f unit) str
9695

9796
-- | Fail with the specified message.
9897
fail :: forall a. String -> Parser a
99-
fail error = Parser \{ pos } -> Left { pos, error }
98+
fail error = Parser \{ posFromStart } -> Left { pos: posFromStart, error }
10099

101100
-- | In case of error, the default behavior is to backtrack if no input was consumed.
102101
-- |
103102
-- | `try p` backtracks even if input was consumed.
104103
try :: forall a. Parser a -> Parser a
105-
try (Parser p) = Parser \(s@{ pos }) -> lmap (_ { pos = pos }) (p s)
104+
-- try (Parser p) = Parser \(s@{ pos }) -> lmap (_ { pos = pos }) (p s)
105+
try (Parser p) = Parser \s ->
106+
case p s of
107+
Left { error } -> Left { pos: s.posFromStart, error }
108+
right -> right
109+
110+
-- | Read ahead without consuming input.
111+
lookAhead :: forall a. Parser a -> Parser a
112+
lookAhead (Parser p) = Parser \s ->
113+
case p s of
114+
Right { result } -> Right { result, suffix: s }
115+
left -> left
106116

107117
instance semigroupParser :: Semigroup a => Semigroup (Parser a) where
108118
append = lift2 append

src/Text/Parsing/StringParser/CodePoints.purs

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ import Data.Either (Either(..))
3131
import Data.Enum (fromEnum)
3232
import Data.Foldable (class Foldable, foldMap, elem, notElem)
3333
import Data.Maybe (Maybe(..))
34-
import Data.String.CodePoints (codePointAt, drop, indexOf', length)
34+
import Data.String.CodePoints (codePointAt, drop, indexOf, length)
3535
import Data.String.CodeUnits (singleton)
3636
import Data.String.Pattern (Pattern(..))
3737
import Data.String.Regex as Regex
@@ -43,17 +43,17 @@ import Text.Parsing.StringParser.Combinators (many, (<?>))
4343
eof :: Parser Unit
4444
eof = Parser \s ->
4545
case s of
46-
{ str, pos } | pos < length str -> Left { pos, error: "Expected EOF" }
46+
{ substr, posFromStart } | 0 < length substr -> Left { pos: posFromStart, error: "Expected EOF" }
4747
_ -> Right { result: unit, suffix: s }
4848

4949
-- | Match any character.
5050
anyChar :: Parser Char
51-
anyChar = Parser \{ str, pos } ->
52-
case codePointAt pos str of
51+
anyChar = Parser \{ substr, posFromStart } ->
52+
case codePointAt 0 substr of
5353
Just cp -> case toChar cp of
54-
Just chr -> Right { result: chr, suffix: { str, pos: pos + 1 } }
55-
Nothing -> Left { pos, error: "CodePoint " <> show cp <> " is not a character" }
56-
Nothing -> Left { pos, error: "Unexpected EOF" }
54+
Just chr -> Right { result: chr, suffix: { substr: drop 1 substr, posFromStart: posFromStart + 1 } }
55+
Nothing -> Left { pos: posFromStart, error: "CodePoint " <> show cp <> " is not a character" }
56+
Nothing -> Left { pos: posFromStart, error: "Unexpected EOF" }
5757
where
5858
toChar = fromCharCode <<< fromEnum
5959

@@ -68,8 +68,8 @@ anyDigit = try do
6868
string :: String -> Parser String
6969
string nt = Parser \s ->
7070
case s of
71-
{ str, pos } | indexOf' (Pattern nt) pos str == Just pos -> Right { result: nt, suffix: { str, pos: pos + length nt } }
72-
{ pos } -> Left { pos, error: "Expected '" <> nt <> "'." }
71+
{ substr, posFromStart } | indexOf (Pattern nt) substr == Just 0 -> Right { result: nt, suffix: { substr: drop (length nt) substr, posFromStart: posFromStart + length nt } }
72+
{ posFromStart } -> Left { pos: posFromStart, error: "Expected '" <> nt <> "'." }
7373

7474
-- | Match a character satisfying the given predicate.
7575
satisfy :: (Char -> Boolean) -> Parser Char
@@ -135,10 +135,9 @@ regex pat =
135135
pattern = "^(" <> pat <> ")"
136136

137137
matchRegex :: Regex.Regex -> Parser String
138-
matchRegex r = Parser \{ str, pos } -> do
139-
let remainder = drop pos str
140-
case NEA.head <$> Regex.match r remainder of
138+
matchRegex r = Parser \{ substr, posFromStart } -> do
139+
case NEA.head <$> Regex.match r substr of
141140
Just (Just matched) ->
142-
Right { result: matched, suffix: { str, pos: pos + length matched } }
141+
Right { result: matched, suffix: { substr: drop (length matched) substr, posFromStart: posFromStart + length matched } }
143142
_ ->
144-
Left { pos, error: "no match" }
143+
Left { pos: posFromStart, error: "no match" }

src/Text/Parsing/StringParser/CodeUnits.purs

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -42,15 +42,15 @@ import Text.Parsing.StringParser.Combinators (many, (<?>))
4242
eof :: Parser Unit
4343
eof = Parser \s ->
4444
case s of
45-
{ str, pos } | pos < SCU.length str -> Left { pos, error: "Expected EOF" }
45+
{ substr, posFromStart } | 0 < SCU.length substr -> Left { pos: posFromStart, error: "Expected EOF" }
4646
_ -> Right { result: unit, suffix: s }
4747

4848
-- | Match any character.
4949
anyChar :: Parser Char
50-
anyChar = Parser \{ str, pos } ->
51-
case charAt pos str of
52-
Just chr -> Right { result: chr, suffix: { str, pos: pos + 1 } }
53-
Nothing -> Left { pos, error: "Unexpected EOF" }
50+
anyChar = Parser \{ substr, posFromStart } ->
51+
case charAt 0 substr of
52+
Just chr -> Right { result: chr, suffix: { substr: SCU.drop 1 substr, posFromStart: posFromStart + 1 } }
53+
Nothing -> Left { pos: posFromStart, error: "Unexpected EOF" }
5454

5555
-- | Match any digit.
5656
anyDigit :: Parser Char
@@ -63,8 +63,8 @@ anyDigit = try do
6363
string :: String -> Parser String
6464
string nt = Parser \s ->
6565
case s of
66-
{ str, pos } | SCU.indexOf' (Pattern nt) pos str == Just pos -> Right { result: nt, suffix: { str, pos: pos + SCU.length nt } }
67-
{ pos } -> Left { pos, error: "Expected '" <> nt <> "'." }
66+
{ substr, posFromStart } | SCU.indexOf (Pattern nt) substr == Just 0 -> Right { result: nt, suffix: { substr: SCU.drop (SCU.length nt) substr, posFromStart: posFromStart + SCU.length nt } }
67+
{ posFromStart } -> Left { pos: posFromStart, error: "Expected '" <> nt <> "'." }
6868

6969
-- | Match a character satisfying the given predicate.
7070
satisfy :: (Char -> Boolean) -> Parser Char
@@ -130,10 +130,9 @@ regex pat =
130130
pattern = "^(" <> pat <> ")"
131131

132132
matchRegex :: Regex.Regex -> Parser String
133-
matchRegex r = Parser \{ str, pos } -> do
134-
let remainder = SCU.drop pos str
135-
case NEA.head <$> Regex.match r remainder of
133+
matchRegex r = Parser \{ substr, posFromStart } -> do
134+
case NEA.head <$> Regex.match r substr of
136135
Just (Just matched) ->
137-
Right { result: matched, suffix: { str, pos: pos + SCU.length matched } }
136+
Right { result: matched, suffix: { substr: SCU.drop (SCU.length matched) substr, posFromStart: posFromStart + SCU.length matched } }
138137
_ ->
139-
Left { pos, error: "no match" }
138+
Left { pos: posFromStart, error: "no match" }

src/Text/Parsing/StringParser/Combinators.purs

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
-- | This module defines combinators for building string parsers.
22
module Text.Parsing.StringParser.Combinators
3-
( lookAhead
4-
, many
3+
( many
54
, many1
65
, withError
76
, (<?>)
@@ -32,21 +31,13 @@ import Prelude
3231
import Control.Alt ((<|>))
3332
import Control.Lazy (fix)
3433
import Control.Monad.Rec.Class (Step(..), tailRecM)
35-
import Data.Either (Either(..))
3634
import Data.Foldable (class Foldable, foldl)
3735
import Data.List (List(..), manyRec)
3836
import Data.List.NonEmpty (NonEmptyList(..))
3937
import Data.List.NonEmpty as NEL
4038
import Data.Maybe (Maybe(..))
4139
import Data.NonEmpty ((:|))
42-
import Text.Parsing.StringParser (Parser(..), fail)
43-
44-
-- | Read ahead without consuming input.
45-
lookAhead :: forall a. Parser a -> Parser a
46-
lookAhead (Parser p) = Parser \s ->
47-
case p s of
48-
Right { result } -> Right { result, suffix: s }
49-
left -> left
40+
import Text.Parsing.StringParser (Parser, fail)
5041

5142
-- | Match zero or more times.
5243
many :: forall a. Parser a -> Parser (List a)

test/CodePoints.purs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ import Data.Unfoldable (replicate)
1515
import Effect (Effect)
1616
import Effect.Class.Console (log)
1717
import Test.Assert (assert', assert)
18-
import Text.Parsing.StringParser (Parser, runParser, try)
18+
import Text.Parsing.StringParser (ParseError, Parser(..), PosString, runParser, try)
1919
import Text.Parsing.StringParser.CodePoints (anyDigit, char, eof, string, anyChar, regex)
2020
import Text.Parsing.StringParser.Combinators (many1, endBy1, sepBy1, optionMaybe, many, manyTill, many1Till, chainl, fix, between)
2121
import Text.Parsing.StringParser.Expr (Assoc(..), Operator(..), buildExprParser)
@@ -60,6 +60,9 @@ tryTest =
6060
try (string "aa" <> string "bb") <|>
6161
(string "aa" <> string "cc")
6262

63+
testParser :: forall a. Parser a -> String -> Either ParseError { result :: a, suffix :: PosString }
64+
testParser (Parser p) s = p { substr: s, posFromStart: 0 }
65+
6366
canParse :: forall a. Parser a -> String -> Boolean
6467
canParse p input = isRight $ runParser p input
6568

test/Examples.purs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@ import Data.Foldable (fold, foldl, sum)
88
import Data.List.Types (NonEmptyList)
99
import Effect (Effect)
1010
import Effect.Console (log, logShow)
11-
import Text.Parsing.StringParser (Parser, fail, runParser, unParser)
11+
import Text.Parsing.StringParser (Parser, fail, lookAhead, runParser, unParser)
1212
import Text.Parsing.StringParser.CodePoints (anyChar, char, eof, regex, skipSpaces, string)
13-
import Text.Parsing.StringParser.Combinators (between, endBy1, lookAhead, many, many1, sepBy1, (<?>))
13+
import Text.Parsing.StringParser.Combinators (between, endBy1, many, many1, sepBy1, (<?>))
1414

1515
-- Serves only to make this file runnable
1616
main :: Effect Unit
@@ -234,7 +234,7 @@ doBoth parserName parser content = do
234234
doUnParser :: forall a. Show a => String -> Parser a -> String -> Effect Unit
235235
doUnParser parserName parser content = do
236236
log $ "(unParser) Parsing content with '" <> parserName <> "'"
237-
case unParser parser { str: content, pos: 0 } of
237+
case unParser parser { substr: content, posFromStart: 0 } of
238238
Left rec -> log $ "Position: " <> show rec.pos
239239
<>
240240
"\n\

test/Main.purs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,9 @@ import Test.CodeUnits (testCodeUnits)
99

1010
main :: Effect Unit
1111
main = do
12-
log "Testing CodePoint parsing\n"
13-
testCodePoints
1412

15-
log "\n\nTesting CodeUnit parsing\n"
13+
log "Testing CodeUnit parsing\n"
1614
testCodeUnits
15+
16+
log "\n\nTesting CodePoint parsing\n"
17+
testCodePoints

0 commit comments

Comments
 (0)