Skip to content

More stack-safe combinators #131

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jan 9, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@ Notable changes to this project are documented in this file. The format is based

## [Unreleased]

New features:

- Added more stack-safe (at the expense of `MonadRec` constraint) combinators
`many1Rec`, `sepByRec`, `sepBy1Rec`, `endByRec`, `endBy1Rec`, `chainrRec`,
`chainr1Rec`, `chainlRec`, `chainl1Rec`, `skipManyRec`, and `skipMany1Rec`.
(#131 by @fsoikin)

## [v7.2.0](https://github.com/purescript-contrib/purescript-parsing/releases/tag/v7.2.0) - 2022-01-07

New features:
Expand Down
144 changes: 124 additions & 20 deletions src/Text/Parsing/Parser/Combinators.purs
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,13 @@ import Control.Monad.State (StateT(..), runStateT)
import Control.Plus (empty, (<|>))
import Data.Either (Either(..))
import Data.Foldable (class Foldable, foldl)
import Data.List (List(..), many, reverse, (:))
import Data.List (List(..), many, manyRec, reverse, (:))
import Data.List.NonEmpty (NonEmptyList)
import Data.List.NonEmpty as NEL
import Data.Maybe (Maybe(..))
import Data.Newtype (unwrap)
import Data.Tuple (Tuple(..))
import Data.Tuple.Nested (type (/\), (/\))
import Text.Parsing.Parser (ParseError(..), ParseState(..), ParserT(..), fail)

-- | Provide an error message in the case of failure.
Expand Down Expand Up @@ -112,6 +113,10 @@ lookAhead p = (ParserT <<< ExceptT <<< StateT) \s -> do
many1 :: forall m s a. Monad m => ParserT s m a -> ParserT s m (NonEmptyList a)
many1 p = NEL.cons' <$> p <*> many p

-- | Stack-safe version of `many1` at the expense of `MonadRec` constraint
many1Rec :: forall m s a. MonadRec m => ParserT s m a -> ParserT s m (NonEmptyList a)
many1Rec p = NEL.cons' <$> p <*> manyRec p

-- | Parse phrases delimited by a separator.
-- |
-- | For example:
Expand All @@ -122,13 +127,24 @@ many1 p = NEL.cons' <$> p <*> many p
sepBy :: forall m s a sep. Monad m => ParserT s m a -> ParserT s m sep -> ParserT s m (List a)
sepBy p sep = map NEL.toList (sepBy1 p sep) <|> pure Nil

-- | Stack-safe version of `sepBy` at the expense of `MonadRec` constraint
sepByRec :: forall m s a sep. MonadRec m => ParserT s m a -> ParserT s m sep -> ParserT s m (List a)
sepByRec p sep = map NEL.toList (sepBy1Rec p sep) <|> pure Nil

-- | Parse phrases delimited by a separator, requiring at least one match.
sepBy1 :: forall m s a sep. Monad m => ParserT s m a -> ParserT s m sep -> ParserT s m (NonEmptyList a)
sepBy1 p sep = do
a <- p
as <- many $ sep *> p
pure (NEL.cons' a as)

-- | Stack-safe version of `sepBy1` at the expense of `MonadRec` constraint
sepBy1Rec :: forall m s a sep. MonadRec m => ParserT s m a -> ParserT s m sep -> ParserT s m (NonEmptyList a)
sepBy1Rec p sep = do
a <- p
as <- manyRec $ sep *> p
pure (NEL.cons' a as)

-- | Parse phrases delimited and optionally terminated by a separator.
sepEndBy :: forall m s a sep. Monad m => ParserT s m a -> ParserT s m sep -> ParserT s m (List a)
sepEndBy p sep = map NEL.toList (sepEndBy1 p sep) <|> pure Nil
Expand Down Expand Up @@ -168,51 +184,129 @@ sepEndBy1Rec p sep = do
endBy1 :: forall m s a sep. Monad m => ParserT s m a -> ParserT s m sep -> ParserT s m (NonEmptyList a)
endBy1 p sep = many1 $ p <* sep

-- | Stack-safe version of `endBy1` at the expense of `MonadRec` constraint
endBy1Rec :: forall m s a sep. MonadRec m => ParserT s m a -> ParserT s m sep -> ParserT s m (NonEmptyList a)
endBy1Rec p sep = many1Rec $ p <* sep

-- | Parse phrases delimited and terminated by a separator.
endBy :: forall m s a sep. Monad m => ParserT s m a -> ParserT s m sep -> ParserT s m (List a)
endBy p sep = many $ p <* sep

-- | Stack-safe version of `endBy` at the expense of `MonadRec` constraint
endByRec :: forall m s a sep. MonadRec m => ParserT s m a -> ParserT s m sep -> ParserT s m (List a)
endByRec p sep = manyRec $ p <* sep

-- | Parse phrases delimited by a right-associative operator.
-- |
-- | For example:
-- |
-- | ```purescript
-- | chainr digit (string "+" *> add) 0
-- | chainr digit (string "+" $> add) 0
-- | ```
chainr :: forall m s a. Monad m => ParserT s m a -> ParserT s m (a -> a -> a) -> a -> ParserT s m a
chainr p f a = chainr1 p f <|> pure a

-- | Stack-safe version of `chainr` at the expense of `MonadRec` constraint.
chainrRec :: forall m s a. MonadRec m => ParserT s m a -> ParserT s m (a -> a -> a) -> a -> ParserT s m a
chainrRec p f a = chainr1Rec p f <|> pure a

-- | Parse phrases delimited by a left-associative operator.
-- |
-- | For example:
-- |
-- | ```purescript
-- | chainr digit (string "+" $> add) 0
-- | ```
chainl :: forall m s a. Monad m => ParserT s m a -> ParserT s m (a -> a -> a) -> a -> ParserT s m a
chainl p f a = chainl1 p f <|> pure a

-- | Stack-safe version of `chainl` at the expense of `MonadRec` constraint.
chainlRec :: forall m s a. MonadRec m => ParserT s m a -> ParserT s m (a -> a -> a) -> a -> ParserT s m a
chainlRec p f a = chainl1Rec p f <|> pure a

-- | Parse phrases delimited by a left-associative operator, requiring at least one match.
chainl1 :: forall m s a. Monad m => ParserT s m a -> ParserT s m (a -> a -> a) -> ParserT s m a
chainl1 p f = do
a <- p
chainl1' p f a

chainl1' :: forall m s a. Monad m => ParserT s m a -> ParserT s m (a -> a -> a) -> a -> ParserT s m a
chainl1' p f a =
( do
f' <- f
a' <- p
chainl1' p f (f' a a')
) <|> pure a
chainl1' a
where
chainl1' a =
( do
f' <- f
a' <- p
chainl1' (f' a a')
) <|> pure a
Comment on lines -195 to +238
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I moved this under where, because it seems like a private helper function. Technically it might be breaking though, if anybody happens to use this function for some reason. Should it be added to changelog? Or would you prefer it be brought back to the top level?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, technically it is breaking, isn't it.

I'll add this to the changelog and we'll bump major version to 8.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like this private helper function has been exported since 2014, when this file was renamed from something else.

I agree that this is a private helper function and it should be private.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's better this way, thanks.


-- | Stack-safe version of `chainl1` at the expense of `MonadRec` constraint.
chainl1Rec :: forall m s a. MonadRec m => ParserT s m a -> ParserT s m (a -> a -> a) -> ParserT s m a
chainl1Rec p f = do
a <- p
tailRecM go a
where
go :: a -> ParserT s m (Step a a)
go a =
( do
op <- f
a' <- p
pure $ Loop $ op a a'
)
<|> pure (Done a)

-- | Parse phrases delimited by a right-associative operator, requiring at least one match.
chainr1 :: forall m s a. Monad m => ParserT s m a -> ParserT s m (a -> a -> a) -> ParserT s m a
chainr1 p f = do
a <- p
chainr1' p f a

chainr1' :: forall m s a. Monad m => ParserT s m a -> ParserT s m (a -> a -> a) -> a -> ParserT s m a
chainr1' p f a =
( do
f' <- f
a' <- chainr1 p f
pure $ f' a a'
) <|> pure a
chainr1' a
where
chainr1' a =
( do
f' <- f
a' <- chainr1 p f
pure $ f' a a'
) <|> pure a
Comment on lines -209 to +266
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same here.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah.


-- | Stack-safe version of `chainr1` at the expense of `MonadRec` constraint.
chainr1Rec :: forall m s a. MonadRec m => ParserT s m a -> ParserT s m (a -> a -> a) -> ParserT s m a
chainr1Rec p f = do
a <- p
tailRecM go { last: a, init: Nil }
where
-- This looks scary at first glance, so I'm leaving a comment in a vain
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I love this explanation, thanks very much for this.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

-- attempt to explain how it works.
--
-- The loop state is a record {init, last}, where `last` is the last (i.e.
-- rightmost) `a` value that has been parsed so far, and `init` is a list of
-- (value + operator) pairs that have been parsed before that.
--
-- The very first value is parsed at top level, and it becomes the initial
-- value of `last`, while the initial value of `init` is just `Nil`,
-- indicating that no pairs of (value + operator) have been parsed yet.
--
-- At every step, we parse an operator and a value, and then the newly parsed
-- value becomes `last` (because, well, it's been parsed last), and the pair
-- of (previous last + operator) is prepended to `init`.
--
-- After we can no longer parse a pair of (value + operation), we're done. At
-- that point, we have a list of (value + operation) pairs in reverse order
-- (since we prepend each pair as we go) and the very last value. All that's
-- left is combine them all via `foldl`.
go
:: { init :: List (a /\ (a -> a -> a)), last :: a }
-> ParserT s m
( Step
{ init :: List (a /\ (a -> a -> a)), last :: a }
a
)
go { last, init } =
( do
op <- f
a <- p
pure $ Loop { last: a, init: (last /\ op) : init }
)
<|> pure (Done $ foldl apply last init)

apply :: a -> (a /\ (a -> a -> a)) -> a
apply y (x /\ op) = x `op` y

-- | Parse one of a set of alternatives.
choice :: forall f m s a. Foldable f => Monad m => f (ParserT s m a) -> ParserT s m a
Expand All @@ -222,13 +316,23 @@ choice = foldl (<|>) empty
skipMany :: forall s a m. Monad m => ParserT s m a -> ParserT s m Unit
skipMany p = skipMany1 p <|> pure unit

-- | Stack-safe version of `skipMany` at the expense of `MonadRec` constraint.
skipManyRec :: forall s a m. MonadRec m => ParserT s m a -> ParserT s m Unit
skipManyRec p = skipMany1Rec p <|> pure unit

-- | Skip at least one instance of a phrase.
skipMany1 :: forall s a m. Monad m => ParserT s m a -> ParserT s m Unit
skipMany1 p = do
_ <- p
_ <- skipMany p
pure unit

-- | Stack-safe version of `skipMany1` at the expense of `MonadRec` constraint.
skipMany1Rec :: forall s a m. MonadRec m => ParserT s m a -> ParserT s m Unit
skipMany1Rec p = p *> tailRecM go unit
where
go _ = (p $> Loop unit) <|> pure (Done unit)

-- | Fail if the specified parser matches.
notFollowedBy :: forall s a m. Monad m => ParserT s m a -> ParserT s m Unit
notFollowedBy p = try $ (try p *> fail "Negated parser succeeded") <|> pure unit
Expand Down
75 changes: 74 additions & 1 deletion test/Main.purs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ import Effect (Effect)
import Effect.Console (logShow)
import Test.Assert (assert')
import Text.Parsing.Parser (ParseError(..), Parser, ParserT, parseErrorMessage, parseErrorPosition, region, runParser)
import Text.Parsing.Parser.Combinators (between, chainl, endBy1, many1TillRec, manyTillRec, optionMaybe, sepBy1, sepEndBy1Rec, sepEndByRec, try)
import Text.Parsing.Parser.Combinators (between, chainl, chainl1Rec, chainlRec, chainr1Rec, chainrRec, endBy1, endBy1Rec, endByRec, many1Rec, many1TillRec, manyTillRec, optionMaybe, sepBy1, sepBy1Rec, sepByRec, sepEndBy1Rec, sepEndByRec, skipMany1Rec, skipManyRec, try)
import Text.Parsing.Parser.Expr (Assoc(..), Operator(..), buildExprParser)
import Text.Parsing.Parser.Language (haskellDef, haskellStyle, javaStyle)
import Text.Parsing.Parser.Pos (Position(..), initialPos)
Expand Down Expand Up @@ -127,6 +127,79 @@ stackSafeLoopsTest = do
"b,a,a"
(Position { line: 1, column: 1 })

-- 8 `div` (8 `div` 2) == 2
parseTest "8x8x2" 2 $
chainrRec digit (string "x" $> div) 42
parseTest "" 42 $
chainrRec digit (string "x" $> div) 42
parseTest "8x8x2" 2 $
chainr1Rec digit (string "x" $> div)
parseErrorTestPosition
(chainr1Rec digit (string "x" $> div))
""
(Position { line: 1, column: 1 })

-- (8 `div` 2) `div` 2 == 2
parseTest "8x2x2" 2 $
chainlRec digit (string "x" $> div) 42
parseTest "" 42 $
chainlRec digit (string "x" $> div) 42
parseTest "8x2x2" 2 $
chainl1Rec digit (string "x" $> div)
parseErrorTestPosition
(chainl1Rec digit (string "x" $> div))
""
(Position { line: 1, column: 1 })

parseTest "aaaabcd" "b" $
skipMany1Rec (string "a") *> string "b"
parseErrorTestPosition
(skipMany1Rec (string "a"))
"bcd"
(Position { line: 1, column: 1 })

parseTest "aaaabcd" "b" $
skipManyRec (string "a") *> string "b"
parseTest "bcd" "b" $
skipManyRec (string "a") *> string "b"

parseTest "aaa" (NE.cons' "a" $ toUnfoldable [ "a", "a" ]) $
many1Rec (string "a")
parseErrorTestPosition
(many1Rec (string "a"))
""
(Position { line: 1, column: 1 })

parseTest "a,a,ab" (toUnfoldable [ "a", "a", "a" ]) $
sepByRec (string "a") (string ",") <* string "b"
parseTest "b" Nil $
sepByRec (string "a") (string ",") <* string "b"
parseTest "a,a,ab" (NE.cons' "a" $ toUnfoldable [ "a", "a" ]) $
sepBy1Rec (string "a") (string ",") <* string "b"
parseErrorTestPosition
(sepBy1Rec (string "a") (string ","))
""
(Position { line: 1, column: 1 })
parseErrorTestPosition
(sepBy1Rec (string "a") (string ","))
"a,"
(Position { line: 1, column: 3 })

parseTest "a,a,a,b" (toUnfoldable [ "a", "a", "a" ]) $
endByRec (string "a") (string ",") <* string "b"
parseTest "b" Nil $
endByRec (string "a") (string ",") <* string "b"
parseTest "a,a,a,b" (NE.cons' "a" $ toUnfoldable [ "a", "a" ]) $
endBy1Rec (string "a") (string ",") <* string "b"
parseErrorTestPosition
(endBy1Rec (string "a") (string ","))
""
(Position { line: 1, column: 1 })
parseErrorTestPosition
(endBy1Rec (string "a") (string ","))
"a,a"
(Position { line: 1, column: 4 })

data TestToken = A | B

instance showTestTokens :: Show TestToken where
Expand Down