@@ -221,6 +221,7 @@ impl core::fmt::Debug for ShortFileName {
221221}
222222
223223/// Used to store a Long File Name
224+ #[ derive( Debug ) ]
224225pub struct LfnBuffer < ' a > {
225226 /// We fill this buffer in from the back
226227 inner : & ' a mut [ u8 ] ,
@@ -230,6 +231,8 @@ pub struct LfnBuffer<'a> {
230231 free : usize ,
231232 /// Did we overflow?
232233 overflow : bool ,
234+ /// If a surrogate-pair is split over two directory entries, remember half of it here.
235+ unpaired_surrogate : Option < u16 > ,
233236}
234237
235238impl < ' a > LfnBuffer < ' a > {
@@ -240,19 +243,34 @@ impl<'a> LfnBuffer<'a> {
240243 inner : storage,
241244 free : len,
242245 overflow : false ,
246+ unpaired_surrogate : None ,
243247 }
244248 }
245249
246250 /// Empty out this buffer
247251 pub fn clear ( & mut self ) {
248252 self . free = self . inner . len ( ) ;
249253 self . overflow = false ;
254+ self . unpaired_surrogate = None ;
250255 }
251256
252- /// Push the 13 UCS-2 characters into this string
257+ /// Push the 13 UTF-16 codepoints into this string.
253258 ///
254259 /// We assume they are pushed last-chunk-first, as you would find
255260 /// them on disk.
261+ ///
262+ /// Any chunk starting with a half of a surrogate pair has that saved for the next call.
263+ ///
264+ /// ```text
265+ /// [de00, 002e, 0074, 0078, 0074, 0000, ffff, ffff, ffff, ffff, ffff, ffff, ffff]
266+ /// [0041, 0042, 0030, 0031, 0032, 0033, 0034, 0035, 0036, 0037, 0038, 0039, d83d]
267+ ///
268+ /// Would map to
269+ ///
270+ /// 0041 0042 0030 0031 0032 0033 0034 0035 0036 0037 0038 0039 1f600 002e 0074 0078 0074, or
271+ ///
272+ /// "AB0123456789😀.txt"
273+ /// ```
256274 pub fn push ( & mut self , buffer : & [ u16 ; 13 ] ) {
257275 // find the first null, if any
258276 let null_idx = buffer
@@ -261,25 +279,70 @@ impl<'a> LfnBuffer<'a> {
261279 . unwrap_or ( buffer. len ( ) ) ;
262280 // take all the wide chars, up to the null (or go to the end)
263281 let buffer = & buffer[ 0 ..null_idx] ;
264- for ch in buffer. iter ( ) . rev ( ) {
265- let ch = char:: from_u32 ( * ch as u32 ) . unwrap_or ( '?' ) ;
282+
283+ // This next part will convert the 16-bit values into chars, noting that
284+ // chars outside the Basic Multilingual Plane will require two 16-bit
285+ // values to encode (see UTF-16 Surrogate Pairs).
286+ //
287+ // We cache the decoded chars into this array so we can iterate them
288+ // backwards. It's 60 bytes, but it'll have to do.
289+ let mut char_vec: heapless:: Vec < char , 13 > = heapless:: Vec :: new ( ) ;
290+ // Now do the decode, including the unpaired surrogate (if any) from
291+ // last time (maybe it has a pair now!)
292+ let mut is_first = true ;
293+ for ch in char:: decode_utf16 (
294+ buffer
295+ . iter ( )
296+ . cloned ( )
297+ . chain ( self . unpaired_surrogate . take ( ) . iter ( ) . cloned ( ) ) ,
298+ ) {
299+ match ch {
300+ Ok ( ch) => {
301+ char_vec. push ( ch) . expect ( "Vec was full!?" ) ;
302+ }
303+ Err ( e) => {
304+ // OK, so we found half a surrogate pair and nothing to go
305+ // with it. Was this the first codepoint in the chunk?
306+ if is_first {
307+ // it was - the other half is probably in the next chunk
308+ // so save this for next time
309+ trace ! ( "LFN saved {:?}" , e. unpaired_surrogate( ) ) ;
310+ self . unpaired_surrogate = Some ( e. unpaired_surrogate ( ) ) ;
311+ } else {
312+ // it wasn't - can't deal with it these mid-sequence, so
313+ // replace it
314+ trace ! ( "LFN replaced {:?}" , e. unpaired_surrogate( ) ) ;
315+ char_vec. push ( '\u{fffd}' ) . expect ( "Vec was full?!" ) ;
316+ }
317+ }
318+ }
319+ is_first = false ;
320+ }
321+
322+ for ch in char_vec. iter ( ) . rev ( ) {
266323 trace ! ( "LFN push {:?}" , ch) ;
267- let mut ch_bytes = [ 0u8 ; 4 ] ;
268- // a buffer of length 4 is always enough
269- let ch_str = ch. encode_utf8 ( & mut ch_bytes) ;
270- if self . free < ch_str. len ( ) {
324+ // a buffer of length 4 is enough to encode any char
325+ let mut encoded_ch = [ 0u8 ; 4 ] ;
326+ let encoded_ch = ch. encode_utf8 ( & mut encoded_ch) ;
327+ if self . free < encoded_ch. len ( ) {
328+ // the LFN buffer they gave us was not long enough. Note for
329+ // later, so we don't show them garbage.
271330 self . overflow = true ;
272331 return ;
273332 }
274- // store the encoded character in the buffer, working backwards
275- for b in ch_str. bytes ( ) . rev ( ) {
333+ // Store the encoded char in the buffer, working backwards. We
334+ // already checked there was enough space.
335+ for b in encoded_ch. bytes ( ) . rev ( ) {
276336 self . free -= 1 ;
277337 self . inner [ self . free ] = b;
278338 }
279339 }
280340 }
281341
282342 /// View this LFN buffer as a string-slice
343+ ///
344+ /// If the buffer overflowed while parsing the LFN, or if this buffer is
345+ /// empty, you get an empty string.
283346 pub fn as_str ( & self ) -> & str {
284347 if self . overflow {
285348 ""
@@ -418,6 +481,22 @@ mod test {
418481 ] ) ;
419482 assert_eq ! ( buf. as_str( ) , "ABCDEFGHIJKLM0123∂" ) ;
420483 }
484+
485+ #[ test]
486+ fn two_piece_split_surrogate ( ) {
487+ let mut storage = [ 0u8 ; 64 ] ;
488+ let mut buf: LfnBuffer = LfnBuffer :: new ( & mut storage) ;
489+
490+ buf. push ( & [
491+ 0xde00 , 0x002e , 0x0074 , 0x0078 , 0x0074 , 0x0000 , 0xffff , 0xffff , 0xffff , 0xffff , 0xffff ,
492+ 0xffff , 0xffff ,
493+ ] ) ;
494+ buf. push ( & [
495+ 0xd83d , 0xde00 , 0x0030 , 0x0031 , 0x0032 , 0x0033 , 0x0034 , 0x0035 , 0x0036 , 0x0037 , 0x0038 ,
496+ 0x0039 , 0xd83d ,
497+ ] ) ;
498+ assert_eq ! ( buf. as_str( ) , "😀0123456789😀.txt" ) ;
499+ }
421500}
422501
423502// ****************************************************************************
0 commit comments