@@ -81,8 +81,7 @@ impl<'s> ScriptSource<'s> {
8181 let mut rest = source. content ;
8282
8383 // Whitespace may precede a frontmatter but must end with a newline
84- const WHITESPACE : [ char ; 4 ] = [ ' ' , '\t' , '\r' , '\n' ] ;
85- let trimmed = rest. trim_start_matches ( WHITESPACE ) ;
84+ let trimmed = rest. trim_start_matches ( is_whitespace) ;
8685 if trimmed. len ( ) != rest. len ( ) {
8786 let trimmed_len = rest. len ( ) - trimmed. len ( ) ;
8887 let last_trimmed_index = trimmed_len - 1 ;
@@ -116,7 +115,7 @@ impl<'s> ScriptSource<'s> {
116115 anyhow:: bail!( "no closing `{fence_pattern}` found for frontmatter" ) ;
117116 } ;
118117 let ( info, rest) = rest. split_at ( info_end_index) ;
119- let info = info. trim_matches ( WHITESPACE ) ;
118+ let info = info. trim_matches ( is_whitespace ) ;
120119 if !info. is_empty ( ) {
121120 source. info = Some ( info) ;
122121 }
@@ -134,7 +133,7 @@ impl<'s> ScriptSource<'s> {
134133 let rest = & rest[ frontmatter_nl + nl_fence_pattern. len ( ) ..] ;
135134
136135 let ( after_closing_fence, rest) = rest. split_once ( "\n " ) . unwrap_or ( ( rest, "" ) ) ;
137- let after_closing_fence = after_closing_fence. trim_matches ( WHITESPACE ) ;
136+ let after_closing_fence = after_closing_fence. trim_matches ( is_whitespace ) ;
138137 if !after_closing_fence. is_empty ( ) {
139138 // extra characters beyond the original fence pattern, even if they are extra `-`
140139 anyhow:: bail!( "trailing characters found after frontmatter close" ) ;
@@ -188,6 +187,40 @@ fn strip_shebang(input: &str) -> Option<usize> {
188187 None
189188}
190189
190+ /// True if `c` is considered a whitespace according to Rust language definition.
191+ /// See [Rust language reference](https://doc.rust-lang.org/reference/whitespace.html)
192+ /// for definitions of these classes.
193+ ///
194+ /// See rust-lang/rust's compiler/rustc_lexer/src/lib.rs `is_whitespace`
195+ fn is_whitespace ( c : char ) -> bool {
196+ // This is Pattern_White_Space.
197+ //
198+ // Note that this set is stable (ie, it doesn't change with different
199+ // Unicode versions), so it's ok to just hard-code the values.
200+
201+ matches ! (
202+ c,
203+ // Usual ASCII suspects
204+ '\u{0009}' // \t
205+ | '\u{000A}' // \n
206+ | '\u{000B}' // vertical tab
207+ | '\u{000C}' // form feed
208+ | '\u{000D}' // \r
209+ | '\u{0020}' // space
210+
211+ // NEXT LINE from latin1
212+ | '\u{0085}'
213+
214+ // Bidi markers
215+ | '\u{200E}' // LEFT-TO-RIGHT MARK
216+ | '\u{200F}' // RIGHT-TO-LEFT MARK
217+
218+ // Dedicated whitespace characters from Unicode
219+ | '\u{2028}' // LINE SEPARATOR
220+ | '\u{2029}' // PARAGRAPH SEPARATOR
221+ )
222+ }
223+
191224#[ cfg( test) ]
192225mod test_expand {
193226 use snapbox:: assert_data_eq;
0 commit comments