@@ -317,54 +317,41 @@ impl Hir {
317317 }
318318 }
319319
320- /// Build an HIR expression for `.`.
321- ///
322- /// A `.` expression matches any character except for a newline terminator.
323- /// To build an expression that matches any character, including newline
324- /// terminators, use the `any_char` method.
325- pub fn dot_char ( ) -> Hir {
326- let mut cls = ClassUnicode :: empty ( ) ;
327- cls. push ( ClassUnicodeRange :: new ( '\0' , '\x09' ) ) ;
328- cls. push ( ClassUnicodeRange :: new ( '\x0B' , '\u{10FFFF}' ) ) ;
329- Hir :: class ( Class :: Unicode ( cls) )
330- }
331-
332- /// Build an HIR expression for `(?-u:.)`.
333- ///
334- /// A non-Unicode `.` expression matches any byte except for a newline
335- /// terminator. To build an expression that matches any byte, including
336- /// newline terminators, use the `any_byte` method.
337- pub fn dot_byte ( ) -> Hir {
338- let mut cls = ClassBytes :: empty ( ) ;
339- cls. push ( ClassBytesRange :: new ( b'\0' , b'\x09' ) ) ;
340- cls. push ( ClassBytesRange :: new ( b'\x0B' , b'\xFF' ) ) ;
341- Hir :: class ( Class :: Bytes ( cls) )
342- }
343-
344- /// Build an HIR expression for `(?s:.)`.
345- ///
346- /// A `(?s:.)` expression matches any character, including newline
347- /// terminators. To build an expression that matches any character except
348- /// for newline terminators, use the `dot_char` method.
349- ///
350- /// Note that `(?s:)` is equivalent to `\p{any}`.
351- pub fn any_char ( ) -> Hir {
352- let mut cls = ClassUnicode :: empty ( ) ;
353- cls. push ( ClassUnicodeRange :: new ( '\0' , '\u{10FFFF}' ) ) ;
354- Hir :: class ( Class :: Unicode ( cls) )
355- }
356-
357- /// Build an HIR expression for `(?s-u:.)`.
358- ///
359- /// A `(?s-u:.)` expression matches any byte, including newline terminators.
360- /// To build an expression that matches any byte except for newline
361- /// terminators, use the `dot_byte` method.
362- ///
363- /// Note that `(?s-u:.)` is equivalent to `(?-u:[\x00-\xFF])`.
364- pub fn any_byte ( ) -> Hir {
365- let mut cls = ClassBytes :: empty ( ) ;
366- cls. push ( ClassBytesRange :: new ( b'\0' , b'\xFF' ) ) ;
367- Hir :: class ( Class :: Bytes ( cls) )
320+ /// Returns an HIR expression for `.`.
321+ ///
322+ /// * [`Dot::AnyChar`] maps to `(?su:.)`.
323+ /// * [`Dot::AnyByte`] maps to `(?s-u:.)`.
324+ /// * [`Dot::AnyCharExceptNL`] maps to `(?u-s:.)`.
325+ /// * [`Dot::AnyByteExceptNL`] maps to `(?-su:.)`.
326+ ///
327+ /// Note that this is a convenience routine for constructing the correct
328+ /// character class based on the value of `Dot`. There is no explicit "dot"
329+ /// HIR value. It is just an abbreviation for a common character class.
330+ pub fn dot ( dot : Dot ) -> Hir {
331+ match dot {
332+ Dot :: AnyChar => {
333+ let mut cls = ClassUnicode :: empty ( ) ;
334+ cls. push ( ClassUnicodeRange :: new ( '\0' , '\u{10FFFF}' ) ) ;
335+ Hir :: class ( Class :: Unicode ( cls) )
336+ }
337+ Dot :: AnyByte => {
338+ let mut cls = ClassBytes :: empty ( ) ;
339+ cls. push ( ClassBytesRange :: new ( b'\0' , b'\xFF' ) ) ;
340+ Hir :: class ( Class :: Bytes ( cls) )
341+ }
342+ Dot :: AnyCharExceptNL => {
343+ let mut cls = ClassUnicode :: empty ( ) ;
344+ cls. push ( ClassUnicodeRange :: new ( '\0' , '\x09' ) ) ;
345+ cls. push ( ClassUnicodeRange :: new ( '\x0B' , '\u{10FFFF}' ) ) ;
346+ Hir :: class ( Class :: Unicode ( cls) )
347+ }
348+ Dot :: AnyByteExceptNL => {
349+ let mut cls = ClassBytes :: empty ( ) ;
350+ cls. push ( ClassBytesRange :: new ( b'\0' , b'\x09' ) ) ;
351+ cls. push ( ClassBytesRange :: new ( b'\x0B' , b'\xFF' ) ) ;
352+ Hir :: class ( Class :: Bytes ( cls) )
353+ }
354+ }
368355 }
369356}
370357
@@ -1233,6 +1220,31 @@ impl Repetition {
12331220 }
12341221}
12351222
1223+ /// A type describing the different flavors of `.`.
1224+ ///
1225+ /// This type is meant to be used with [`Hir::dot`], which is a convenience
1226+ /// routine for building HIR values derived from the `.` regex.
1227+ #[ non_exhaustive]
1228+ #[ derive( Clone , Copy , Debug , Eq , PartialEq ) ]
1229+ pub enum Dot {
1230+ /// Matches the UTF-8 encoding of any Unicode scalar value.
1231+ ///
1232+ /// This is equivalent to `(?su:.)` and also `\p{any}`.
1233+ AnyChar ,
1234+ /// Matches any byte value.
1235+ ///
1236+ /// This is equivalent to `(?s-u:.)` and also `(?-u:[\x00-\xFF])`.
1237+ AnyByte ,
1238+ /// Matches the UTF-8 encoding of any Unicode scalar value except for `\n`.
1239+ ///
1240+ /// This is equivalent to `(?u-s:.)` and also `[\p{any}--\n]`.
1241+ AnyCharExceptNL ,
1242+ /// Matches any byte value except for `\n`.
1243+ ///
1244+ /// This is equivalent to `(?-su:.)` and also `(?-u:[[\x00-\xFF]--\n])`.
1245+ AnyByteExceptNL ,
1246+ }
1247+
12361248/// A custom `Drop` impl is used for `HirKind` such that it uses constant stack
12371249/// space but heap space proportional to the depth of the total `Hir`.
12381250impl Drop for Hir {
0 commit comments