@@ -37,7 +37,6 @@ module Parsing.String
3737 , noneOfCodePoints
3838 , match
3939 , regex
40- , RegexFlagsRow
4140 , consumeWith
4241 ) where
4342
@@ -55,12 +54,10 @@ import Data.String (CodePoint, Pattern(..), codePointAt, length, null, singleton
5554import Data.String as String
5655import Data.String.CodeUnits as SCU
5756import Data.String.Regex as Regex
58- import Data.String.Regex.Flags (RegexFlags (..), RegexFlagsRec )
57+ import Data.String.Regex.Flags (RegexFlags )
5958import Data.Tuple (Tuple (..), fst )
6059import Partial.Unsafe (unsafePartial )
61- import Prim.Row (class Nub , class Union )
62- import Record (merge )
63- import Parsing (ParseError (..), ParseState (..), ParserT (..), fail )
60+ import Parsing (ParseError (..), ParseState (..), ParserT (..))
6461import Parsing.Combinators ((<?>), (<~?>))
6562import Parsing.Pos (Position (..))
6663
@@ -229,101 +226,71 @@ match p = do
229226 -- boundary.
230227 pure $ Tuple (SCU .take (SCU .length input1 - SCU .length input2) input1) x
231228
232- -- | Parser which uses the `Data.String.Regex` module to match the regular
233- -- | expression pattern passed as the `String`
234- -- | argument to the parser.
229+ -- | Compile a regular expression string into a regular expression parser.
230+ -- |
231+ -- | This function will use the `Data.String.Regex.regex` function to compile and return a parser which can be used
232+ -- | in a `ParserT String m` monad.
235233-- |
236234-- | This parser will try to match the regular expression pattern starting
237235-- | at the current parser position. On success, it will return the matched
238236-- | substring.
239237-- |
240- -- | If the `Regex` pattern string fails to compile then this parser will fail.
241- -- | (Note: It’s not possible to use a precompiled `Regex` because this parser
242- -- | must set flags and make adjustments to the `Regex` pattern string.)
238+ -- | [*MDN Regular Expressions Cheatsheet*](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions/Cheatsheet)
239+ -- |
240+ -- | This function should be called outside the context of a `ParserT String m` monad, because this function might
241+ -- | fail with a `Left` RegExp compilation error message.
242+ -- | If you call this function inside of the `ParserT String m` monad and then `fail` the parse when the compilation fails,
243+ -- | then that could be confusing because a parser failure is supposed to indicate an invalid input string.
244+ -- | If the compilation failure occurs in an `alt` then the compilation failure might not be reported at all and instead
245+ -- | the input string would be parsed incorrectly.
243246-- |
244247-- | This parser may be useful for quickly consuming a large section of the
245- -- | input `String`, because in a JavaScript runtime environment the ` RegExp`
248+ -- | input `String`, because in a JavaScript runtime environment the RegExp
246249-- | runtime is a lot faster than primitive parsers.
247250-- |
248- -- | [*MDN Regular Expressions Cheatsheet*](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions/Cheatsheet)
251+ -- | #### Example
252+ -- |
253+ -- | This example shows how to compile and run the `xMany` parser which will
254+ -- | capture the regular expression pattern `x*`.
255+ -- |
256+ -- | ```purescript
257+ -- | case regex "x*" noFlags of
258+ -- | Left compileError -> unsafeCrashWith $ "xMany failed to compile: " <> compileError
259+ -- | Right xMany -> runParser "xxxZ" do
260+ -- | xMany
261+ -- | ```
249262-- |
250263-- | #### Flags
251264-- |
252- -- | The `Record flags` argument to the parser is for `Regex` flags. Here are
253- -- | the default flags.
265+ -- | Set `RegexFlags` with the `Semigroup` instance like this.
254266-- |
255267-- | ```purescript
256- -- | { dotAll: true
257- -- | ignoreCase: false
258- -- | unicode: true
259- -- | }
268+ -- | regex "x*" (dotAll <> ignoreCase)
260269-- | ```
261270-- |
262- -- | To use the defaults, pass
263- -- | `{}` as the flags argument. For case-insensitive pattern matching, pass
264- -- | `{ignoreCase: true}` as the flags argument.
265- -- |
266- -- | The other `Data.String.Regex.Flags.RegexFlagsRec` fields are mostly
267- -- | nonsense in the context of parsing
268- -- | and use of the other flags may cause strange behavior in the parser.
271+ -- | The `dotAll`, `unicode`, and `ignoreCase` flags might make sense for a `regex` parser. The other flags will
272+ -- | probably cause surprising behavior and you should avoid them.
269273-- |
270274-- | [*MDN Advanced searching with flags*](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions#advanced_searching_with_flags)
275+ regex :: forall m . String -> RegexFlags -> Either String (ParserT String m String )
276+ regex pattern flags =
277+ Regex .regex (" ^(" <> pattern <> " )" ) flags <#> \regexobj ->
278+ consumeWith \input -> do
279+ case NonEmptyArray .head <$> Regex .match regexobj input of
280+ Just (Just consumed) -> do
281+ let remainder = SCU .drop (SCU .length consumed) input
282+ Right { value: consumed, consumed, remainder }
283+ _ ->
284+ Left " No Regex pattern match"
285+
286+ -- | Consume a portion of the input string while yielding a value.
271287-- |
272- -- | #### Example
288+ -- | Takes a consumption function which takes the remaining input `String`
289+ -- | as its argument and returns three fields:
273290-- |
274- -- | ```
275- -- | runParser "ababXX" (regex {} "(ab)+")
276- -- | ```
277- -- | ```
278- -- | (Right "abab")
279- -- | ```
280- regex
281- :: forall m flags f_
282- . Monad m
283- => Union flags RegexFlagsRow f_
284- => Nub f_ RegexFlagsRow
285- => Record flags
286- -> String
287- -> ParserT String m String
288- regex flags pattern =
289- -- Prefix a ^ to ensure the pattern only matches the current position in the parse
290- case Regex .regex (" ^(" <> pattern <> " )" ) flags' of
291- Left paterr ->
292- fail $ " Regex pattern error " <> paterr
293- Right regexobj ->
294- consumeWith \input -> do
295- case NonEmptyArray .head <$> Regex .match regexobj input of
296- Just (Just consumed) -> do
297- let remainder = SCU .drop (SCU .length consumed) input
298- Right { value: consumed, consumed, remainder }
299- _ ->
300- Left " No Regex pattern match"
301- where
302- flags' = RegexFlags
303- ( merge flags
304- { dotAll: true
305- , global: false
306- , ignoreCase: false
307- , multiline: false
308- , sticky: false
309- , unicode: true
310- } :: RegexFlagsRec
311- )
312-
313- -- | The fields from `Data.String.Regex.Flags.RegexFlagsRec`.
314- type RegexFlagsRow =
315- ( dotAll :: Boolean
316- , global :: Boolean
317- , ignoreCase :: Boolean
318- , multiline :: Boolean
319- , sticky :: Boolean
320- , unicode :: Boolean
321- )
322-
323- -- | Consumes a portion of the input string while yielding a value.
324291-- | * `value` is the value to return.
325- -- | * `consumed` is the input that was consumed and is used to update the parser position.
326- -- | * `remainder` is the new input state .
292+ -- | * `consumed` is the input `String` that was consumed. It is used to update the parser position.
293+ -- | * `remainder` is the new remaining input `String` .
327294consumeWith
328295 :: forall m a
329296 . (String -> Either String { value :: a , consumed :: String , remainder :: String } )
0 commit comments