@@ -36,19 +36,27 @@ module Text.Parsing.Parser.String
3636 , noneOf
3737 , noneOfCodePoints
3838 , match
39+ , regex
40+ , RegexFlagsRow
3941 ) where
4042
4143import Prelude hiding (between )
4244
4345import Control.Monad.State (get , put )
4446import Data.Array (notElem )
47+ import Data.Array.NonEmpty as NonEmptyArray
4548import Data.Char (fromCharCode )
4649import Data.CodePoint.Unicode (isSpace )
50+ import Data.Either (Either (..))
4751import Data.Foldable (elem )
4852import Data.Maybe (Maybe (..))
4953import Data.String (CodePoint , Pattern (..), length , null , singleton , splitAt , stripPrefix , uncons )
5054import Data.String.CodeUnits as SCU
55+ import Data.String.Regex as Regex
56+ import Data.String.Regex.Flags (RegexFlags (..), RegexFlagsRec )
5157import Data.Tuple (Tuple (..), fst )
58+ import Prim.Row (class Nub , class Union )
59+ import Record (merge )
5260import Text.Parsing.Parser (ParseState (..), ParserT , consume , fail )
5361import Text.Parsing.Parser.Combinators (skipMany , tryRethrow , (<?>), (<~?>))
5462import Text.Parsing.Parser.Pos (Position (..))
@@ -208,3 +216,82 @@ match p = do
208216-- | to something other than `newtype CodePoint = CodePoint Int`.
209217unCodePoint :: CodePoint -> Int
210218unCodePoint = unsafeCoerce
219+
220+ -- | Parser which uses the `Data.String.Regex` module to match the regular
221+ -- | expression pattern passed as the `String`
222+ -- | argument to the parser.
223+ -- |
224+ -- | This parser will try to match the regular expression pattern starting
225+ -- | at the current parser position. On success, it will return the matched
226+ -- | substring.
227+ -- |
228+ -- | This parser may be useful for quickly consuming a large section of the
229+ -- | input `String`, because in a JavaScript runtime environment the `RegExp`
230+ -- | runtime is a lot faster than primitive parsers.
231+ -- |
232+ -- | [*MDN Regular Expressions Cheatsheet*](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions/Cheatsheet)
233+ -- |
234+ -- | The `Record flags` argument to the parser is for `Regex` flags. Here are
235+ -- | the default flags.
236+ -- |
237+ -- | ```purescript
238+ -- | { dotAll: true
239+ -- | ignoreCase: false
240+ -- | unicode: true
241+ -- | }
242+ -- | ```
243+ -- |
244+ -- | If you want to use the defaults then pass
245+ -- | `{}` as the flags argument. For case-insensitive pattern matching, pass
246+ -- | `{ignoreCase: true}` as the flags argument.
247+ -- | The other `Data.String.Regex.Flags.RegexFlagsRec` fields are mostly
248+ -- | nonsense in the context of parsing
249+ -- | and use of the other flags may cause strange behavior in the parser.
250+ -- |
251+ -- | [*MDN Advanced searching with flags*](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions#advanced_searching_with_flags)
252+ -- |
253+ -- | If the `Regex` pattern string fails to compile then this parser will fail.
254+ -- | (Note: It’s not possible to use a precompiled `Regex` because this parser
255+ -- | must set flags and make adjustments to the `Regex` pattern string.)
256+ regex
257+ :: forall m flags f_
258+ . Monad m
259+ => Union flags RegexFlagsRow f_
260+ => Nub f_ RegexFlagsRow
261+ => Record flags
262+ -> String
263+ -> ParserT String m String
264+ regex flags pattern =
265+ -- Prefix a ^ to ensure the pattern only matches the current position in the parse
266+ case Regex .regex (" ^(" <> pattern <> " )" ) flags' of
267+ Left paterr ->
268+ fail $ " Regex pattern error " <> paterr
269+ Right regexobj -> do
270+ ParseState input position _ <- get
271+ case NonEmptyArray .head <$> Regex .match regexobj input of
272+ Just (Just matched) -> do
273+ let remainder = SCU .drop (SCU .length matched) input
274+ put $ ParseState remainder (updatePosString position matched) true
275+ pure matched
276+ _ -> fail $ " No Regex pattern match"
277+ where
278+ flags' = RegexFlags
279+ ( merge flags
280+ { dotAll: true
281+ , global: false
282+ , ignoreCase: false
283+ , multiline: false
284+ , sticky: false
285+ , unicode: true
286+ } :: RegexFlagsRec
287+ )
288+
289+ -- | The fields from `Data.String.Regex.Flags.RegexFlagsRec`.
290+ type RegexFlagsRow =
291+ ( dotAll :: Boolean
292+ , global :: Boolean
293+ , ignoreCase :: Boolean
294+ , multiline :: Boolean
295+ , sticky :: Boolean
296+ , unicode :: Boolean
297+ )
0 commit comments