@@ -15,22 +15,30 @@ module Parsing.String.Basic
1515 , alphaNum
1616 , intDecimal
1717 , number
18- , module Parsing.String
18+ , whiteSpace
19+ , skipSpaces
20+ , oneOf
21+ , oneOfCodePoints
22+ , noneOf
23+ , noneOfCodePoints
1924 ) where
2025
2126import Prelude
2227
28+ import Data.Array (elem , notElem )
2329import Data.CodePoint.Unicode (isAlpha , isAlphaNum , isDecDigit , isHexDigit , isLower , isOctDigit , isSpace , isUpper )
30+ import Data.Either (Either (..))
2431import Data.Int as Data.Int
2532import Data.Maybe (Maybe (..))
2633import Data.Number (infinity , nan )
2734import Data.Number as Data.Number
28- import Data.String (CodePoint )
35+ import Data.String (CodePoint , singleton , takeWhile )
2936import Data.String.CodePoints (codePointFromChar )
30- import Data.Tuple (Tuple (..))
37+ import Data.String.CodeUnits as SCU
38+ import Data.Tuple (Tuple (..), fst )
3139import Parsing (ParserT , fail )
32- import Parsing.Combinators (choice , skipMany , (<?>))
33- import Parsing.String (noneOf , noneOfCodePoints , oneOf , oneOfCodePoints , skipSpaces , whiteSpace )
40+ import Parsing.Combinators (choice , skipMany , (<?>), (<~?>) )
41+ import Parsing.String (consumeWith , match , satisfy , satisfyCodePoint )
3442import Parsing.String as Parser.String
3543
3644-- | Parse a digit. Matches any char that satisfies `Data.CodePoint.Unicode.isDecDigit`.
@@ -94,8 +102,8 @@ number =
94102 , Parser.String .string " NaN" *> pure nan
95103 , do
96104 Tuple section _ <- Parser.String .match do
97- _ <- Parser.String . oneOf [ ' +' , ' -' , ' .' , ' 0' , ' 1' , ' 2' , ' 3' , ' 4' , ' 5' , ' 6' , ' 7' , ' 8' , ' 9' ]
98- skipMany $ Parser.String . oneOf [ ' e' , ' E' , ' +' , ' -' , ' .' , ' 0' , ' 1' , ' 2' , ' 3' , ' 4' , ' 5' , ' 6' , ' 7' , ' 8' , ' 9' ]
105+ _ <- oneOf [ ' +' , ' -' , ' .' , ' 0' , ' 1' , ' 2' , ' 3' , ' 4' , ' 5' , ' 6' , ' 7' , ' 8' , ' 9' ]
106+ skipMany $ oneOf [ ' e' , ' E' , ' +' , ' -' , ' .' , ' 0' , ' 1' , ' 2' , ' 3' , ' 4' , ' 5' , ' 6' , ' 7' , ' 8' , ' 9' ]
99107 -- https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/parseFloat
100108 case Data.Number .fromString section of
101109 Nothing -> fail $ " Could not parse Number " <> section
@@ -113,12 +121,40 @@ number =
113121intDecimal :: forall m . ParserT String m Int
114122intDecimal = do
115123 Tuple section _ <- Parser.String .match do
116- _ <- Parser.String . oneOf [ ' +' , ' -' , ' 0' , ' 1' , ' 2' , ' 3' , ' 4' , ' 5' , ' 6' , ' 7' , ' 8' , ' 9' ]
117- skipMany $ Parser.String . oneOf [ ' 0' , ' 1' , ' 2' , ' 3' , ' 4' , ' 5' , ' 6' , ' 7' , ' 8' , ' 9' ]
124+ _ <- oneOf [ ' +' , ' -' , ' 0' , ' 1' , ' 2' , ' 3' , ' 4' , ' 5' , ' 6' , ' 7' , ' 8' , ' 9' ]
125+ skipMany $ oneOf [ ' 0' , ' 1' , ' 2' , ' 3' , ' 4' , ' 5' , ' 6' , ' 7' , ' 8' , ' 9' ]
118126 case Data.Int .fromString section of
119127 Nothing -> fail $ " Could not parse Int " <> section
120128 Just x -> pure x
121129
122130-- | Helper function
123131satisfyCP :: forall m . (CodePoint -> Boolean ) -> ParserT String m Char
124132satisfyCP p = Parser.String .satisfy (p <<< codePointFromChar)
133+
134+ -- | Match zero or more whitespace characters satisfying
135+ -- | `Data.CodePoint.Unicode.isSpace`. Always succeeds.
136+ whiteSpace :: forall m . ParserT String m String
137+ whiteSpace = fst <$> match skipSpaces
138+
139+ -- | Skip whitespace characters and throw them away. Always succeeds.
140+ skipSpaces :: forall m . ParserT String m Unit
141+ skipSpaces = consumeWith \input -> do
142+ let consumed = takeWhile isSpace input
143+ let remainder = SCU .drop (SCU .length consumed) input
144+ Right { value: unit, consumed, remainder }
145+
146+ -- | Match one of the BMP `Char`s in the array.
147+ oneOf :: forall m . Array Char -> ParserT String m Char
148+ oneOf ss = satisfy (flip elem ss) <~?> \_ -> " one of " <> show ss
149+
150+ -- | Match any BMP `Char` not in the array.
151+ noneOf :: forall m . Array Char -> ParserT String m Char
152+ noneOf ss = satisfy (flip notElem ss) <~?> \_ -> " none of " <> show ss
153+
154+ -- | Match one of the Unicode characters in the array.
155+ oneOfCodePoints :: forall m . Array CodePoint -> ParserT String m CodePoint
156+ oneOfCodePoints ss = satisfyCodePoint (flip elem ss) <~?> \_ -> " one of " <> show (singleton <$> ss)
157+
158+ -- | Match any Unicode character not in the array.
159+ noneOfCodePoints :: forall m . Array CodePoint -> ParserT String m CodePoint
160+ noneOfCodePoints ss = satisfyCodePoint (flip notElem ss) <~?> \_ -> " none of " <> show (singleton <$> ss)
0 commit comments