Skip to content

Some stack-safe sequence combinators #130

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jan 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ Notable changes to this project are documented in this file. The format is based

New features:

- Added stack-safe (at the expense of `MonadRec` constraint) combinators
`manyTillRec`, `many1TillRec`, `sepEndByRec`, and `sepEndBy1Rec`. (#130 by @fsoikin)
- Added a new operator `<~?>` (alias of `withLazyErrorMessage`), an analog of
`<?>`, but allows the error message to be deferred until there is actually an
error. Handy when the error message is expensive to construct. (#129 by @fsoikin)
Expand Down
37 changes: 36 additions & 1 deletion src/Text/Parsing/Parser/Combinators.purs
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,12 @@ import Prelude

import Control.Lazy (defer)
import Control.Monad.Except (ExceptT(..), runExceptT)
import Control.Monad.Rec.Class (class MonadRec, Step(..), tailRecM)
import Control.Monad.State (StateT(..), runStateT)
import Control.Plus (empty, (<|>))
import Data.Either (Either(..))
import Data.Foldable (class Foldable, foldl)
import Data.List (List(..), many, (:))
import Data.List (List(..), many, reverse, (:))
import Data.List.NonEmpty (NonEmptyList)
import Data.List.NonEmpty as NEL
import Data.Maybe (Maybe(..))
Expand Down Expand Up @@ -132,6 +133,10 @@ sepBy1 p sep = do
sepEndBy :: forall m s a sep. Monad m => ParserT s m a -> ParserT s m sep -> ParserT s m (List a)
sepEndBy p sep = map NEL.toList (sepEndBy1 p sep) <|> pure Nil

-- | Stack-safe version of `sepEndBy` at the expense of `MonadRec` constraint
sepEndByRec :: forall m s a sep. MonadRec m => ParserT s m a -> ParserT s m sep -> ParserT s m (List a)
sepEndByRec p sep = map NEL.toList (sepEndBy1Rec p sep) <|> pure Nil

-- | Parse phrases delimited and optionally terminated by a separator, requiring at least one match.
sepEndBy1 :: forall m s a sep. Monad m => ParserT s m a -> ParserT s m sep -> ParserT s m (NonEmptyList a)
sepEndBy1 p sep = do
Expand All @@ -142,6 +147,23 @@ sepEndBy1 p sep = do
pure (NEL.cons' a as)
) <|> pure (NEL.singleton a)

-- | Stack-safe version of `sepEndBy1` at the expense of `MonadRec` constraint
sepEndBy1Rec :: forall m s a sep. MonadRec m => ParserT s m a -> ParserT s m sep -> ParserT s m (NonEmptyList a)
sepEndBy1Rec p sep = do
a <- p
(NEL.cons' a <$> tailRecM go Nil) <|> pure (NEL.singleton a)
where
go :: List a -> ParserT s m (Step (List a) (List a))
go acc = nextOne <|> done
where
nextOne = do
-- First make sure there's a separator.
_ <- sep
-- Then try the phrase and loop if it's there, or bail if it's not there.
(p <#> \a -> Loop $ a : acc) <|> done

done = defer \_ -> pure $ Done $ reverse acc

-- | Parse phrases delimited and terminated by a separator, requiring at least one match.
endBy1 :: forall m s a sep. Monad m => ParserT s m a -> ParserT s m sep -> ParserT s m (NonEmptyList a)
endBy1 p sep = many1 $ p <* sep
Expand Down Expand Up @@ -220,9 +242,22 @@ manyTill p end = scan
xs <- scan
pure (x : xs)

-- | Stack-safe version of `manyTill` at the expense of `MonadRec` constraint
manyTillRec :: forall s a m e. MonadRec m => ParserT s m a -> ParserT s m e -> ParserT s m (List a)
manyTillRec p end = tailRecM go Nil
where
go :: List a -> ParserT s m (Step (List a) (List a))
go acc =
(end <#> \_ -> Done $ reverse acc)
<|> (p <#> \x -> Loop $ x : acc)

-- | Parse several phrases until the specified terminator matches, requiring at least one match.
many1Till :: forall s a m e. Monad m => ParserT s m a -> ParserT s m e -> ParserT s m (NonEmptyList a)
many1Till p end = do
x <- p
xs <- manyTill p end
pure (NEL.cons' x xs)

-- | Stack-safe version of `many1Till` at the expense of `MonadRec` constraint
many1TillRec :: forall s a m e. MonadRec m => ParserT s m a -> ParserT s m e -> ParserT s m (NonEmptyList a)
many1TillRec p end = NEL.cons' <$> p <*> manyTillRec p end
45 changes: 43 additions & 2 deletions test/Main.purs
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@ import Prelude hiding (between, when)

import Control.Alt ((<|>))
import Control.Lazy (fix)
import Data.Array (some)
import Data.Array (some, toUnfoldable)
import Data.Array as Array
import Data.Either (Either(..))
import Data.List (List(..), fromFoldable, many)
import Data.List.NonEmpty (cons, cons')
import Data.List.NonEmpty as NE
import Data.Maybe (Maybe(..))
import Data.String.CodePoints as SCP
import Data.String.CodeUnits (fromCharArray, singleton)
Expand All @@ -18,7 +19,7 @@ import Effect (Effect)
import Effect.Console (logShow)
import Test.Assert (assert')
import Text.Parsing.Parser (ParseError(..), Parser, ParserT, parseErrorMessage, parseErrorPosition, region, runParser)
import Text.Parsing.Parser.Combinators (between, chainl, endBy1, optionMaybe, sepBy1, try)
import Text.Parsing.Parser.Combinators (between, chainl, endBy1, many1TillRec, manyTillRec, optionMaybe, sepBy1, sepEndBy1Rec, sepEndByRec, try)
import Text.Parsing.Parser.Expr (Assoc(..), Operator(..), buildExprParser)
import Text.Parsing.Parser.Language (haskellDef, haskellStyle, javaStyle)
import Text.Parsing.Parser.Pos (Position(..), initialPos)
Expand Down Expand Up @@ -88,6 +89,44 @@ manySatisfyTest = do
_ <- char '?'
pure (fromCharArray r)

-- This test doesn't test the actual stack safety of these combinators, mainly
-- because I don't know how to come up with an example guaranteed to be large
-- enough to overflow the stack. But thankfully, their stack safety is more or
-- less guaranteed by construction.
--
-- Instead, this test checks functional correctness of the combinators, since
-- that's the more tricky part to get right (or to break later) in the absence
-- of clear explicit recursion.
stackSafeLoopsTest :: TestM
stackSafeLoopsTest = do
parseTest "aaabaa" (toUnfoldable [ "a", "a", "a" ]) $
manyTillRec (string "a") (string "b")
parseTest "baa" Nil $
manyTillRec (string "a") (string "b")

parseTest "aaabaa" (NE.cons' "a" $ toUnfoldable [ "a", "a" ]) $
many1TillRec (string "a") (string "b")
parseErrorTestPosition
(many1TillRec (string "a") (string "b"))
"baa"
(Position { line: 1, column: 1 })

parseTest "a,a,a,b,a,a" (toUnfoldable [ "a", "a", "a" ]) $
sepEndByRec (string "a") (string ",")
parseTest "a,a,abaa" (toUnfoldable [ "a", "a", "a" ]) $
sepEndByRec (string "a") (string ",")
parseTest "b,a,a" Nil $
sepEndByRec (string "a") (string ",")

parseTest "a,a,a,b,a,a" (NE.cons' "a" $ toUnfoldable [ "a", "a" ]) $
sepEndBy1Rec (string "a") (string ",")
parseTest "a,a,abaa" (NE.cons' "a" $ toUnfoldable [ "a", "a" ]) $
sepEndBy1Rec (string "a") (string ",")
parseErrorTestPosition
(sepEndBy1Rec (string "a") (string ","))
"b,a,a"
(Position { line: 1, column: 1 })

data TestToken = A | B

instance showTestTokens :: Show TestToken where
Expand Down Expand Up @@ -510,6 +549,8 @@ main = do
parseErrorTestPosition (string "a\nb\nc\n" *> eof) "a\nb\nc\nd\n" (Position { column: 1, line: 4 })
parseErrorTestPosition (string "\ta" *> eof) "\tab" (Position { column: 10, line: 1 })

stackSafeLoopsTest

tokenParserIdentifierTest
tokenParserReservedTest
tokenParserOperatorTest
Expand Down