Skip to content

Commit 0cfe422

Browse files
committed
New module Text.Parsing.Parser.String.Basic
New functions - `Parser.String.Basic.number` - `Parser.String.Basic.intDecimal` Moved the `Parser.Token` parsers `digit`, `hexDigit`, `octDigit`, `upper`, `space`, `letter`, `alphaNum` into the new module `Parser.String.Basic`.
1 parent ea13e18 commit 0cfe422

File tree

10 files changed

+186
-59
lines changed

10 files changed

+186
-59
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ New features:
1515
- `Parser.Combinators.many1Till_` (#143 by @jamesbrock)
1616
- `Parser.Combinators.manyTillRec_` (#143 by @jamesbrock)
1717
- `Parser.Combinators.many1TillRec_` (#143 by @jamesbrock)
18+
- `Parser.String.Basic.number` (#142 by @jamesbrock)
19+
- `Parser.String.Basic.intDecimal` (#142 by @jamesbrock)
1820

1921
Bugfixes:
2022

@@ -23,8 +25,11 @@ Bugfixes:
2325

2426
Other improvements:
2527

28+
- Moved the `Parser.Token` parsers `digit`, `hexDigit`, `octDigit`, `upper`,
29+
`space`, `letter`, `alphaNum` into the new module `Parser.String.Basic`. (#142 by @jamesdbrock)
2630
- Documentation. (#140 by @jamesdbrock)
2731
- Documentation. (#143 by @jamesdbrock)
32+
- Documentation. (#142 by @jamesdbrock)
2833

2934
## [v8.1.0](https://github.com/purescript-contrib/purescript-parsing/releases/tag/v8.1.0) - 2022-01-10
3035

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ and then the parser will succeed and return `Right true`.
7979

8080
### More parsers
8181

82-
There are other `String` parsers in the module `Text.Parsing.Parser.Token`, for example the parser `letter :: Parser String Char` which will accept any single alphabetic letter.
82+
There are other `String` parsers in the module `Text.Parsing.Parser.String.Basic`, for example the parser `letter :: Parser String Char` which will accept any single alphabetic letter.
8383

8484
### Parser combinators
8585

bench/Main.purs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ import Effect.Unsafe (unsafePerformEffect)
5555
import Performance.Minibench (benchWith)
5656
import Text.Parsing.Parser (Parser, runParser)
5757
import Text.Parsing.Parser.String (string)
58-
import Text.Parsing.Parser.Token (digit)
58+
import Text.Parsing.Parser.String.Basic (digit)
5959
import Text.Parsing.StringParser as StringParser
6060
import Text.Parsing.StringParser.CodePoints as StringParser.CodePoints
6161
import Text.Parsing.StringParser.CodeUnits as StringParser.CodeUnits

spago.dhall

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
, "math"
1313
, "maybe"
1414
, "newtype"
15+
, "numbers"
1516
, "prelude"
1617
, "strings"
1718
, "tailrec"

src/Text/Parsing/Parser/Expr.purs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
-- | This module is a port of the Haskell
2+
-- | [__Text.Parsec.Expr__](https://hackage.haskell.org/package/docs/Text-Parsec-Expr.html)
3+
-- | module.
4+
15
module Text.Parsing.Parser.Expr
26
( Assoc(..)
37
, Operator(..)

src/Text/Parsing/Parser/Indent.purs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1-
-- | This is purescript-port of Text.Parsing.Indent
2-
-- | <https://hackage.haskell.org/package/indents-0.3.3/docs/Text-Parsec-Indent.html>, 05.07.2016.
1+
-- | This module is a port of the Haskell
2+
-- | [__Text.Parsec.Indent__](https://hackage.haskell.org/package/indents-0.3.3/docs/Text-Parsec-Indent.html)
3+
-- | module from 2016-05-07.
4+
-- |
35
-- | A module to construct indentation aware parsers. Many programming
46
-- | language have indentation based syntax rules e.g. python and Haskell.
57
-- | This module exports combinators to create such parsers.

src/Text/Parsing/Parser/Language.purs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
-- | This module is a port of the Haskell
2+
-- | [__Text.Parsec.Language__](https://hackage.haskell.org/package/parsec/docs/Text-Parsec-Language.html)
3+
-- | module.
14
module Text.Parsing.Parser.Language
25
( haskellDef
36
, haskell
@@ -11,7 +14,8 @@ import Prelude
1114
import Control.Alt ((<|>))
1215
import Text.Parsing.Parser (ParserT)
1316
import Text.Parsing.Parser.String (char, oneOf)
14-
import Text.Parsing.Parser.Token (GenLanguageDef(..), LanguageDef, TokenParser, alphaNum, letter, makeTokenParser, unGenLanguageDef)
17+
import Text.Parsing.Parser.String.Basic (alphaNum, letter)
18+
import Text.Parsing.Parser.Token (GenLanguageDef(..), LanguageDef, TokenParser, makeTokenParser, unGenLanguageDef)
1519

1620
-----------------------------------------------------------
1721
-- Styles: haskellStyle, javaStyle
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
-- | Basic `String` parsers derived from primitive `String` parsers.
2+
-- |
3+
-- | Note: In the future, the
4+
-- | __noneOf__, __noneOfCodePoints__, __oneOf__, __oneOfCodePoints__, __skipSpaces__, __whiteSpace__
5+
-- | should be moved into this module and removed from the
6+
-- | __Text.Parsing.Parser.String__ module, because they are not primitive parsers.
7+
module Text.Parsing.Parser.String.Basic
8+
( digit
9+
, hexDigit
10+
, octDigit
11+
, letter
12+
, space
13+
, upper
14+
, alphaNum
15+
, intDecimal
16+
, number
17+
, module Text.Parsing.Parser.String
18+
) where
19+
20+
import Prelude
21+
22+
import Data.CodePoint.Unicode (isAlpha, isAlphaNum, isDecDigit, isHexDigit, isOctDigit, isSpace, isUpper)
23+
import Data.Int as Data.Int
24+
import Data.Maybe (Maybe(..))
25+
import Data.Number (infinity, nan)
26+
import Data.Number as Data.Number
27+
import Data.String (CodePoint)
28+
import Data.String.CodePoints (codePointFromChar)
29+
import Data.Tuple (Tuple(..))
30+
import Text.Parsing.Parser (ParserT, fail)
31+
import Text.Parsing.Parser.Combinators (choice, skipMany, (<?>))
32+
import Text.Parsing.Parser.String (noneOf, noneOfCodePoints, oneOf, oneOfCodePoints, skipSpaces, whiteSpace)
33+
import Text.Parsing.Parser.String as Parser.String
34+
35+
-- | Parse a digit. Matches any char that satisfies `Data.CodePoint.Unicode.isDecDigit`.
36+
digit :: forall m. Monad m => ParserT String m Char
37+
digit = satisfyCP isDecDigit <?> "digit"
38+
39+
-- | Parse a hex digit. Matches any char that satisfies `Data.CodePoint.Unicode.isHexDigit`.
40+
hexDigit :: forall m. Monad m => ParserT String m Char
41+
hexDigit = satisfyCP isHexDigit <?> "hex digit"
42+
43+
-- | Parse an octal digit. Matches any char that satisfies `Data.CodePoint.Unicode.isOctDigit`.
44+
octDigit :: forall m. Monad m => ParserT String m Char
45+
octDigit = satisfyCP isOctDigit <?> "oct digit"
46+
47+
-- | Parse an uppercase letter. Matches any char that satisfies `Data.CodePoint.Unicode.isUpper`.
48+
upper :: forall m. Monad m => ParserT String m Char
49+
upper = satisfyCP isUpper <?> "uppercase letter"
50+
51+
-- | Parse a space character. Matches any char that satisfies `Data.CodePoint.Unicode.isSpace`.
52+
space :: forall m. Monad m => ParserT String m Char
53+
space = satisfyCP isSpace <?> "space"
54+
55+
-- | Parse an alphabetical character. Matches any char that satisfies `Data.CodePoint.Unicode.isAlpha`.
56+
letter :: forall m. Monad m => ParserT String m Char
57+
letter = satisfyCP isAlpha <?> "letter"
58+
59+
-- | Parse an alphabetical or numerical character.
60+
-- | Matches any char that satisfies `Data.CodePoint.Unicode.isAlphaNum`.
61+
alphaNum :: forall m. Monad m => ParserT String m Char
62+
alphaNum = satisfyCP isAlphaNum <?> "letter or digit"
63+
64+
-- | Parser based on the __Data.Number.fromString__ function.
65+
-- |
66+
-- | This should be the inverse of `show :: String -> Number`.
67+
-- |
68+
-- | Examples of strings which can be parsed by this parser:
69+
-- | * `"3"`
70+
-- | * `"3.0"`
71+
-- | * `"0.3"`
72+
-- | * `"-0.3"`
73+
-- | * `"+0.3"`
74+
-- | * `"-3e-1"`
75+
-- | * `"-3.0E-1.0"`
76+
-- | * `"NaN"`
77+
-- | * `"-Infinity"`
78+
number :: forall m. Monad m => ParserT String m Number
79+
-- TODO because the JavaScript parseFloat function will successfully parse
80+
-- a Number up until it doesn't understand something and then return
81+
-- the partially parsed Number, this parser will sometimes consume more
82+
-- String that it actually parses. Example "1..3" will parse as 1.0.
83+
-- So this needs improvement.
84+
number =
85+
choice
86+
[ Parser.String.string "Infinity" *> pure infinity
87+
, Parser.String.string "+Infinity" *> pure infinity
88+
, Parser.String.string "-Infinity" *> pure (negate infinity)
89+
, Parser.String.string "NaN" *> pure nan
90+
, do
91+
Tuple section _ <- Parser.String.match do
92+
_ <- Parser.String.oneOf [ '+', '-', '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' ]
93+
skipMany $ Parser.String.oneOf [ 'e', 'E', '+', '-', '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' ]
94+
-- https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/parseFloat
95+
case Data.Number.fromString section of
96+
Nothing -> fail $ "Could not parse Number " <> section
97+
Just x -> pure x
98+
]
99+
100+
-- | Parser based on the __Data.Int.fromString__ function.
101+
-- |
102+
-- | This should be the inverse of `show :: String -> Int`.
103+
-- |
104+
-- | Examples of strings which can be parsed by this parser:
105+
-- | * `"3"`
106+
-- | * `"-3"`
107+
-- | * `"+300"`
108+
intDecimal :: forall m. Monad m => ParserT String m Int
109+
intDecimal = do
110+
Tuple section _ <- Parser.String.match do
111+
_ <- Parser.String.oneOf [ '+', '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' ]
112+
skipMany $ Parser.String.oneOf [ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' ]
113+
case Data.Int.fromString section of
114+
Nothing -> fail $ "Could not parse Int " <> section
115+
Just x -> pure x
116+
117+
-- | Helper function
118+
satisfyCP :: forall m. Monad m => (CodePoint -> Boolean) -> ParserT String m Char
119+
satisfyCP p = Parser.String.satisfy (p <<< codePointFromChar)

src/Text/Parsing/Parser/Token.purs

Lines changed: 17 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
11
-- | Functions for working with streams of tokens.
2+
-- |
3+
-- | This module is a port of the Haskell
4+
-- | [__Text.Parsec.Token__](https://hackage.haskell.org/package/docs/Text-Parsec-Token.html)
5+
-- | module.
26

37
module Text.Parsing.Parser.Token
48
( token
@@ -11,14 +15,7 @@ module Text.Parsing.Parser.Token
1115
, TokenParser
1216
, GenTokenParser
1317
, makeTokenParser
14-
-- should these be exported? Maybe they should go in a different module?
15-
, digit
16-
, hexDigit
17-
, octDigit
18-
, upper
19-
, space
20-
, letter
21-
, alphaNum
18+
, module Text.Parsing.Parser.String.Basic
2219
) where
2320

2421
import Prelude hiding (between, when)
@@ -28,7 +25,7 @@ import Control.Monad.State (get, gets, modify_)
2825
import Control.MonadPlus (guard, (<|>))
2926
import Data.Array as Array
3027
import Data.Char (fromCharCode, toCharCode)
31-
import Data.CodePoint.Unicode (hexDigitToInt, isAlpha, isAlphaNum, isDecDigit, isHexDigit, isOctDigit, isSpace, isUpper)
28+
import Data.CodePoint.Unicode (hexDigitToInt, isAlpha, isSpace)
3229
import Data.Either (Either(..))
3330
import Data.Foldable (foldl, foldr)
3431
import Data.Identity (Identity)
@@ -37,7 +34,7 @@ import Data.List (List(..))
3734
import Data.List as List
3835
import Data.List.NonEmpty (NonEmptyList)
3936
import Data.Maybe (Maybe(..), maybe)
40-
import Data.String (CodePoint, null, toLower)
37+
import Data.String (null, toLower)
4138
import Data.String.CodePoints (codePointFromChar)
4239
import Data.String.CodeUnits (singleton, toChar) as CodeUnits
4340
import Data.String.CodeUnits as SCU
@@ -48,6 +45,8 @@ import Text.Parsing.Parser (ParseState(..), ParserT, consume, fail)
4845
import Text.Parsing.Parser.Combinators (between, choice, notFollowedBy, option, sepBy, sepBy1, skipMany, skipMany1, try, tryRethrow, (<?>), (<??>))
4946
import Text.Parsing.Parser.Pos (Position)
5047
import Text.Parsing.Parser.String (char, noneOf, oneOf, satisfy, satisfyCodePoint, string)
48+
import Text.Parsing.Parser.String.Basic as Basic
49+
import Text.Parsing.Parser.String.Basic (digit, hexDigit, octDigit, upper, space, letter, alphaNum)
5150

5251
-- | A parser which returns the first token in the stream.
5352
token :: forall m a. Monad m => (a -> Position) -> ParserT (List a) m a
@@ -475,7 +474,7 @@ makeTokenParser (LanguageDef languageDef) =
475474
escapeEmpty = char '&'
476475

477476
escapeGap :: ParserT String m Char
478-
escapeGap = Array.some space *> char '\\' <?> "end of string gap"
477+
escapeGap = Array.some Basic.space *> char '\\' <?> "end of string gap"
479478

480479
-- -- escape codes
481480
escapeCode :: ParserT String m Char
@@ -485,16 +484,16 @@ makeTokenParser (LanguageDef languageDef) =
485484
charControl :: ParserT String m Char
486485
charControl = do
487486
_ <- char '^'
488-
code <- upper
487+
code <- Basic.upper
489488
case fromCharCode (toCharCode code - toCharCode 'A' + 1) of
490489
Just c -> pure c
491490
Nothing -> fail "invalid character code (should not happen)"
492491

493492
charNum :: ParserT String m Char
494493
charNum = do
495494
code <- decimal
496-
<|> (char 'o' *> number 8 octDigit)
497-
<|> (char 'x' *> number 16 hexDigit)
495+
<|> (char 'o' *> number 8 Basic.octDigit)
496+
<|> (char 'x' *> number 16 Basic.hexDigit)
498497
if code > 0x10FFFF then fail "invalid escape sequence"
499498
else case fromCharCode code of
500499
Just c -> pure c
@@ -646,7 +645,7 @@ makeTokenParser (LanguageDef languageDef) =
646645
fraction :: ParserT String m Number
647646
fraction = "fraction" <??> do
648647
_ <- char '.'
649-
digits <- Array.some digit <?> "fraction"
648+
digits <- Array.some Basic.digit <?> "fraction"
650649
maybe (fail "not digit") pure $ foldr op (Just 0.0) digits
651650
where
652651
op :: Char -> Maybe Number -> Maybe Number
@@ -688,13 +687,13 @@ makeTokenParser (LanguageDef languageDef) =
688687
(hexadecimal <|> octal <|> decimal <|> pure 0) <?> ""
689688

690689
decimal :: ParserT String m Int
691-
decimal = number 10 digit
690+
decimal = number 10 Basic.digit
692691

693692
hexadecimal :: ParserT String m Int
694-
hexadecimal = oneOf [ 'x', 'X' ] *> number 16 hexDigit
693+
hexadecimal = oneOf [ 'x', 'X' ] *> number 16 Basic.hexDigit
695694

696695
octal :: ParserT String m Int
697-
octal = oneOf [ 'o', 'O' ] *> number 8 octDigit
696+
octal = oneOf [ 'o', 'O' ] *> number 8 Basic.octDigit
698697

699698
number :: Int -> ParserT String m Char -> ParserT String m Int
700699
number base baseDigit = do
@@ -878,38 +877,3 @@ inCommentSingle (LanguageDef languageDef) =
878877
startEnd :: Array Char
879878
startEnd = SCU.toCharArray languageDef.commentEnd <> SCU.toCharArray languageDef.commentStart
880879

881-
-------------------------------------------------------------------------
882-
-- Helper functions that should maybe go in Text.Parsing.Parser.String --
883-
-------------------------------------------------------------------------
884-
885-
satisfyCP :: forall m. Monad m => (CodePoint -> Boolean) -> ParserT String m Char
886-
satisfyCP p = satisfy (p <<< codePointFromChar)
887-
888-
-- | Parse a digit. Matches any char that satisfies `Data.CodePoint.Unicode.isDecDigit`.
889-
digit :: forall m. Monad m => ParserT String m Char
890-
digit = satisfyCP isDecDigit <?> "digit"
891-
892-
-- | Parse a hex digit. Matches any char that satisfies `Data.CodePoint.Unicode.isHexDigit`.
893-
hexDigit :: forall m. Monad m => ParserT String m Char
894-
hexDigit = satisfyCP isHexDigit <?> "hex digit"
895-
896-
-- | Parse an octal digit. Matches any char that satisfies `Data.CodePoint.Unicode.isOctDigit`.
897-
octDigit :: forall m. Monad m => ParserT String m Char
898-
octDigit = satisfyCP isOctDigit <?> "oct digit"
899-
900-
-- | Parse an uppercase letter. Matches any char that satisfies `Data.CodePoint.Unicode.isUpper`.
901-
upper :: forall m. Monad m => ParserT String m Char
902-
upper = satisfyCP isUpper <?> "uppercase letter"
903-
904-
-- | Parse a space character. Matches any char that satisfies `Data.CodePoint.Unicode.isSpace`.
905-
space :: forall m. Monad m => ParserT String m Char
906-
space = satisfyCP isSpace <?> "space"
907-
908-
-- | Parse an alphabetical character. Matches any char that satisfies `Data.CodePoint.Unicode.isAlpha`.
909-
letter :: forall m. Monad m => ParserT String m Char
910-
letter = satisfyCP isAlpha <?> "letter"
911-
912-
-- | Parse an alphabetical or numerical character.
913-
-- | Matches any char that satisfies `Data.CodePoint.Unicode.isAlphaNum`.
914-
alphaNum :: forall m. Monad m => ParserT String m Char
915-
alphaNum = satisfyCP isAlphaNum <?> "letter or digit"

test/Main.purs

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import Data.List (List(..), fromFoldable, many)
1111
import Data.List.NonEmpty (cons, cons')
1212
import Data.List.NonEmpty as NE
1313
import Data.Maybe (Maybe(..), fromJust)
14+
import Data.Number (infinity, isNaN)
1415
import Data.String.CodePoints as SCP
1516
import Data.String.CodeUnits (fromCharArray, singleton)
1617
import Data.String.CodeUnits as SCU
@@ -25,7 +26,8 @@ import Text.Parsing.Parser.Expr (Assoc(..), Operator(..), buildExprParser)
2526
import Text.Parsing.Parser.Language (haskellDef, haskellStyle, javaStyle)
2627
import Text.Parsing.Parser.Pos (Position(..), initialPos)
2728
import Text.Parsing.Parser.String (anyChar, anyCodePoint, char, eof, noneOfCodePoints, oneOfCodePoints, rest, satisfy, string, takeN, whiteSpace)
28-
import Text.Parsing.Parser.Token (TokenParser, letter, makeTokenParser, match, token, when)
29+
import Text.Parsing.Parser.String.Basic (intDecimal, number, letter)
30+
import Text.Parsing.Parser.Token (TokenParser, makeTokenParser, match, token, when)
2931
import Text.Parsing.Parser.Token as Parser.Token
3032

3133
parens :: forall m a. Monad m => ParserT String m a -> ParserT String m a
@@ -655,6 +657,32 @@ main = do
655657
parseErrorTestPosition (string "a\nb\nc\n" *> eof) "a\nb\nc\nd\n" (Position { column: 1, line: 4 })
656658
parseErrorTestPosition (string "\ta" *> eof) "\tab" (Position { column: 10, line: 1 })
657659

660+
parseTest "Infinity" infinity number
661+
parseTest "+Infinity" infinity number
662+
parseTest "-Infinity" (negate infinity) number
663+
parseErrorTestPosition number "+xxx" (mkPos 2)
664+
665+
parseTest "-3.0E-1.0" (-0.3) number
666+
667+
-- test from issue #73
668+
parseTest "0.7531531167929774" 0.7531531167929774 number
669+
670+
-- test from issue #115
671+
parseTest "-6.0" (-6.0) number
672+
parseTest "+6.0" (6.0) number
673+
674+
-- we can't test "NaN" with `parseTest` because nan doesn't compare equal
675+
case runParser "NaN" number of
676+
Right actual -> do
677+
assert' ("expected: NaN, actual: " <> show actual) (isNaN actual)
678+
logShow actual
679+
Left err -> assert' ("error: " <> show err) false
680+
681+
-- TODO This shows the current limitations of the number parser. Ideally this parse should fail.
682+
parseTest "1..3" 1.0 $ number <* eof
683+
684+
parseTest "-300" (-300) intDecimal
685+
658686
stackSafeLoopsTest
659687

660688
tokenParserIdentifierTest

0 commit comments

Comments
 (0)