@@ -37,7 +37,6 @@ module Parsing.String
37
37
, noneOfCodePoints
38
38
, match
39
39
, regex
40
- , RegexFlagsRow
41
40
, consumeWith
42
41
) where
43
42
@@ -55,12 +54,10 @@ import Data.String (CodePoint, Pattern(..), codePointAt, length, null, singleton
55
54
import Data.String as String
56
55
import Data.String.CodeUnits as SCU
57
56
import Data.String.Regex as Regex
58
- import Data.String.Regex.Flags (RegexFlags (..), RegexFlagsRec )
57
+ import Data.String.Regex.Flags (RegexFlags )
59
58
import Data.Tuple (Tuple (..), fst )
60
59
import Partial.Unsafe (unsafePartial )
61
- import Prim.Row (class Nub , class Union )
62
- import Record (merge )
63
- import Parsing (ParseError (..), ParseState (..), ParserT (..), fail )
60
+ import Parsing (ParseError (..), ParseState (..), ParserT (..))
64
61
import Parsing.Combinators ((<?>), (<~?>))
65
62
import Parsing.Pos (Position (..))
66
63
@@ -229,101 +226,71 @@ match p = do
229
226
-- boundary.
230
227
pure $ Tuple (SCU .take (SCU .length input1 - SCU .length input2) input1) x
231
228
232
- -- | Parser which uses the `Data.String.Regex` module to match the regular
233
- -- | expression pattern passed as the `String`
234
- -- | argument to the parser.
229
+ -- | Compile a regular expression string into a regular expression parser.
230
+ -- |
231
+ -- | This function will use the `Data.String.Regex.regex` function to compile and return a parser which can be used
232
+ -- | in a `ParserT String m` monad.
235
233
-- |
236
234
-- | This parser will try to match the regular expression pattern starting
237
235
-- | at the current parser position. On success, it will return the matched
238
236
-- | substring.
239
237
-- |
240
- -- | If the `Regex` pattern string fails to compile then this parser will fail.
241
- -- | (Note: It’s not possible to use a precompiled `Regex` because this parser
242
- -- | must set flags and make adjustments to the `Regex` pattern string.)
238
+ -- | [*MDN Regular Expressions Cheatsheet*](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions/Cheatsheet)
239
+ -- |
240
+ -- | This function should be called outside the context of a `ParserT String m` monad, because this function might
241
+ -- | fail with a `Left` RegExp compilation error message.
242
+ -- | If you call this function inside of the `ParserT String m` monad and then `fail` the parse when the compilation fails,
243
+ -- | then that could be confusing because a parser failure is supposed to indicate an invalid input string.
244
+ -- | If the compilation failure occurs in an `alt` then the compilation failure might not be reported at all and instead
245
+ -- | the input string would be parsed incorrectly.
243
246
-- |
244
247
-- | This parser may be useful for quickly consuming a large section of the
245
- -- | input `String`, because in a JavaScript runtime environment the ` RegExp`
248
+ -- | input `String`, because in a JavaScript runtime environment the RegExp
246
249
-- | runtime is a lot faster than primitive parsers.
247
250
-- |
248
- -- | [*MDN Regular Expressions Cheatsheet*](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions/Cheatsheet)
251
+ -- | #### Example
252
+ -- |
253
+ -- | This example shows how to compile and run the `xMany` parser which will
254
+ -- | capture the regular expression pattern `x*`.
255
+ -- |
256
+ -- | ```purescript
257
+ -- | case regex "x*" noFlags of
258
+ -- | Left compileError -> unsafeCrashWith $ "xMany failed to compile: " <> compileError
259
+ -- | Right xMany -> runParser "xxxZ" do
260
+ -- | xMany
261
+ -- | ```
249
262
-- |
250
263
-- | #### Flags
251
264
-- |
252
- -- | The `Record flags` argument to the parser is for `Regex` flags. Here are
253
- -- | the default flags.
265
+ -- | Set `RegexFlags` with the `Semigroup` instance like this.
254
266
-- |
255
267
-- | ```purescript
256
- -- | { dotAll: true
257
- -- | ignoreCase: false
258
- -- | unicode: true
259
- -- | }
268
+ -- | regex "x*" (dotAll <> ignoreCase)
260
269
-- | ```
261
270
-- |
262
- -- | To use the defaults, pass
263
- -- | `{}` as the flags argument. For case-insensitive pattern matching, pass
264
- -- | `{ignoreCase: true}` as the flags argument.
265
- -- |
266
- -- | The other `Data.String.Regex.Flags.RegexFlagsRec` fields are mostly
267
- -- | nonsense in the context of parsing
268
- -- | and use of the other flags may cause strange behavior in the parser.
271
+ -- | The `dotAll`, `unicode`, and `ignoreCase` flags might make sense for a `regex` parser. The other flags will
272
+ -- | probably cause surprising behavior and you should avoid them.
269
273
-- |
270
274
-- | [*MDN Advanced searching with flags*](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions#advanced_searching_with_flags)
275
+ regex :: forall m . String -> RegexFlags -> Either String (ParserT String m String )
276
+ regex pattern flags =
277
+ Regex .regex (" ^(" <> pattern <> " )" ) flags <#> \regexobj ->
278
+ consumeWith \input -> do
279
+ case NonEmptyArray .head <$> Regex .match regexobj input of
280
+ Just (Just consumed) -> do
281
+ let remainder = SCU .drop (SCU .length consumed) input
282
+ Right { value: consumed, consumed, remainder }
283
+ _ ->
284
+ Left " No Regex pattern match"
285
+
286
+ -- | Consume a portion of the input string while yielding a value.
271
287
-- |
272
- -- | #### Example
288
+ -- | Takes a consumption function which takes the remaining input `String`
289
+ -- | as its argument and returns three fields:
273
290
-- |
274
- -- | ```
275
- -- | runParser "ababXX" (regex {} "(ab)+")
276
- -- | ```
277
- -- | ```
278
- -- | (Right "abab")
279
- -- | ```
280
- regex
281
- :: forall m flags f_
282
- . Monad m
283
- => Union flags RegexFlagsRow f_
284
- => Nub f_ RegexFlagsRow
285
- => Record flags
286
- -> String
287
- -> ParserT String m String
288
- regex flags pattern =
289
- -- Prefix a ^ to ensure the pattern only matches the current position in the parse
290
- case Regex .regex (" ^(" <> pattern <> " )" ) flags' of
291
- Left paterr ->
292
- fail $ " Regex pattern error " <> paterr
293
- Right regexobj ->
294
- consumeWith \input -> do
295
- case NonEmptyArray .head <$> Regex .match regexobj input of
296
- Just (Just consumed) -> do
297
- let remainder = SCU .drop (SCU .length consumed) input
298
- Right { value: consumed, consumed, remainder }
299
- _ ->
300
- Left " No Regex pattern match"
301
- where
302
- flags' = RegexFlags
303
- ( merge flags
304
- { dotAll: true
305
- , global: false
306
- , ignoreCase: false
307
- , multiline: false
308
- , sticky: false
309
- , unicode: true
310
- } :: RegexFlagsRec
311
- )
312
-
313
- -- | The fields from `Data.String.Regex.Flags.RegexFlagsRec`.
314
- type RegexFlagsRow =
315
- ( dotAll :: Boolean
316
- , global :: Boolean
317
- , ignoreCase :: Boolean
318
- , multiline :: Boolean
319
- , sticky :: Boolean
320
- , unicode :: Boolean
321
- )
322
-
323
- -- | Consumes a portion of the input string while yielding a value.
324
291
-- | * `value` is the value to return.
325
- -- | * `consumed` is the input that was consumed and is used to update the parser position.
326
- -- | * `remainder` is the new input state .
292
+ -- | * `consumed` is the input `String` that was consumed. It is used to update the parser position.
293
+ -- | * `remainder` is the new remaining input `String` .
327
294
consumeWith
328
295
:: forall m a
329
296
. (String -> Either String { value :: a , consumed :: String , remainder :: String } )
0 commit comments