11use anyhow:: Context ;
22use ra_ap_ide_db:: line_index:: LineIndex ;
33use ra_ap_parser:: Edition ;
4+ use std:: borrow:: Cow ;
45mod archive;
56mod config;
67pub mod generated;
78mod translate;
89pub mod trap;
910use ra_ap_syntax:: ast:: SourceFile ;
10- use ra_ap_syntax:: AstNode ;
11+ use ra_ap_syntax:: { AstNode , SyntaxError , TextRange , TextSize } ;
12+
13+ fn from_utf8_lossy ( v : & [ u8 ] ) -> ( Cow < ' _ , str > , Option < SyntaxError > ) {
14+ let mut iter = v. utf8_chunks ( ) ;
15+ let ( first_valid, first_invalid) = if let Some ( chunk) = iter. next ( ) {
16+ let valid = chunk. valid ( ) ;
17+ let invalid = chunk. invalid ( ) ;
18+ if invalid. is_empty ( ) {
19+ debug_assert_eq ! ( valid. len( ) , v. len( ) ) ;
20+ return ( Cow :: Borrowed ( valid) , None ) ;
21+ }
22+ ( valid, invalid)
23+ } else {
24+ return ( Cow :: Borrowed ( "" ) , None ) ;
25+ } ;
26+
27+ const REPLACEMENT : & str = "\u{FFFD} " ;
28+ let error_start = first_valid. len ( ) as u32 ;
29+ let error_end = error_start + first_invalid. len ( ) as u32 ;
30+ let error_range = TextRange :: new ( TextSize :: new ( error_start) , TextSize :: new ( error_end) ) ;
31+ let error = SyntaxError :: new ( "invalid utf-8 sequence" . to_owned ( ) , error_range) ;
32+ let mut res = String :: with_capacity ( v. len ( ) ) ;
33+ res. push_str ( first_valid) ;
34+
35+ res. push_str ( REPLACEMENT ) ;
36+
37+ for chunk in iter {
38+ res. push_str ( chunk. valid ( ) ) ;
39+ if !chunk. invalid ( ) . is_empty ( ) {
40+ res. push_str ( REPLACEMENT ) ;
41+ }
42+ }
43+
44+ ( Cow :: Owned ( res) , Some ( error) )
45+ }
1146
1247fn extract (
1348 archiver : & archive:: Archiver ,
@@ -18,13 +53,15 @@ fn extract(
1853 let file = std:: fs:: canonicalize ( & file) . unwrap_or ( file) ;
1954 archiver. archive ( & file) ;
2055 let input = std:: fs:: read ( & file) ?;
21- let input = String :: from_utf8 ( input) ? ;
56+ let ( input, err ) = from_utf8_lossy ( & input) ;
2257 let line_index = LineIndex :: new ( & input) ;
2358 let display_path = file. to_string_lossy ( ) ;
2459 let mut trap = traps. create ( "source" , & file) ;
2560 let label = trap. emit_file ( & file) ;
2661 let mut translator = translate:: Translator :: new ( trap, label, line_index) ;
27-
62+ if let Some ( err) = err {
63+ translator. emit_parse_error ( display_path. as_ref ( ) , err) ;
64+ }
2865 let parse = ra_ap_syntax:: ast:: SourceFile :: parse ( & input, Edition :: CURRENT ) ;
2966 for err in parse. errors ( ) {
3067 translator. emit_parse_error ( display_path. as_ref ( ) , err) ;
0 commit comments