@@ -92,6 +92,9 @@ pub fn decode_utf8(src: &[u8]) -> Option<(char, usize)> {
9292 return None ;
9393 }
9494 let b1 = src[ 1 ] ;
95+ if 0b11_000000 & b1 != TAG_CONT {
96+ return None ;
97+ }
9598 let cp = ( ( b0 & !TAG_TWO ) as u32 ) << 6
9699 | ( ( b1 & !TAG_CONT ) as u32 ) ;
97100 match cp {
@@ -104,6 +107,12 @@ pub fn decode_utf8(src: &[u8]) -> Option<(char, usize)> {
104107 return None ;
105108 }
106109 let ( b1, b2) = ( src[ 1 ] , src[ 2 ] ) ;
110+ if 0b11_000000 & b1 != TAG_CONT {
111+ return None ;
112+ }
113+ if 0b11_000000 & b2 != TAG_CONT {
114+ return None ;
115+ }
107116 let cp = ( ( b0 & !TAG_THREE ) as u32 ) << 12
108117 | ( ( b1 & !TAG_CONT ) as u32 ) << 6
109118 | ( ( b2 & !TAG_CONT ) as u32 ) ;
@@ -118,6 +127,15 @@ pub fn decode_utf8(src: &[u8]) -> Option<(char, usize)> {
118127 return None ;
119128 }
120129 let ( b1, b2, b3) = ( src[ 1 ] , src[ 2 ] , src[ 3 ] ) ;
130+ if 0b11_000000 & b1 != TAG_CONT {
131+ return None ;
132+ }
133+ if 0b11_000000 & b2 != TAG_CONT {
134+ return None ;
135+ }
136+ if 0b11_000000 & b3 != TAG_CONT {
137+ return None ;
138+ }
121139 let cp = ( ( b0 & !TAG_FOUR ) as u32 ) << 18
122140 | ( ( b1 & !TAG_CONT ) as u32 ) << 12
123141 | ( ( b2 & !TAG_CONT ) as u32 ) << 6
@@ -236,6 +254,8 @@ mod tests {
236254 assert_eq ! ( decode_utf8( & [ 0xFF ] ) , None ) ;
237255 // Surrogate pair
238256 assert_eq ! ( decode_utf8( & [ 0xED , 0xA0 , 0x81 ] ) , None ) ;
257+ // Invalid continuation byte.
258+ assert_eq ! ( decode_utf8( & [ 0xD4 , 0xC2 ] ) , None ) ;
239259 // Bad lengths
240260 assert_eq ! ( decode_utf8( & [ 0xC3 ] ) , None ) ; // 2 bytes
241261 assert_eq ! ( decode_utf8( & [ 0xEF , 0xBF ] ) , None ) ; // 3 bytes
0 commit comments