11use crate :: { EarlyContext , EarlyLintPass , LintContext } ;
22use rustc_ast:: ast;
33use rustc_data_structures:: fx:: FxHashMap ;
4- use rustc_span:: symbol:: { Ident , SymbolStr } ;
5- use std:: hash:: { Hash , Hasher } ;
6- use std:: ops:: Deref ;
4+ use rustc_span:: symbol:: SymbolStr ;
75
86declare_lint ! {
97 pub NON_ASCII_IDENTS ,
@@ -19,158 +17,133 @@ declare_lint! {
1917 crate_level_only
2018}
2119
22- // FIXME: Change this to warn.
2320declare_lint ! {
2421 pub CONFUSABLE_IDENTS ,
25- Allow ,
22+ Warn ,
2623 "detects visually confusable pairs between identifiers" ,
2724 crate_level_only
2825}
2926
3027declare_lint_pass ! ( NonAsciiIdents => [ NON_ASCII_IDENTS , UNCOMMON_CODEPOINTS , CONFUSABLE_IDENTS ] ) ;
3128
32- enum CowBoxSymStr {
33- Interned ( SymbolStr ) ,
34- Owned ( Box < str > ) ,
35- }
36-
37- impl Deref for CowBoxSymStr {
38- type Target = str ;
39-
40- fn deref ( & self ) -> & str {
41- match self {
42- CowBoxSymStr :: Interned ( interned) => interned,
43- CowBoxSymStr :: Owned ( ref owned) => owned,
44- }
45- }
46- }
47-
48- impl Hash for CowBoxSymStr {
49- #[ inline]
50- fn hash < H : Hasher > ( & self , state : & mut H ) {
51- Hash :: hash ( & * * self , state)
52- }
53- }
54-
55- impl PartialEq < CowBoxSymStr > for CowBoxSymStr {
56- #[ inline]
57- fn eq ( & self , other : & CowBoxSymStr ) -> bool {
58- PartialEq :: eq ( & * * self , & * * other)
59- }
60- }
61-
62- impl Eq for CowBoxSymStr { }
63-
64- fn calc_skeleton ( symbol_str : SymbolStr , buffer : & ' _ mut String ) -> CowBoxSymStr {
65- use std:: mem:: swap;
66- use unicode_security:: confusable_detection:: skeleton;
67- buffer. clear ( ) ;
68- buffer. extend ( skeleton ( & symbol_str) ) ;
69- if symbol_str == * buffer {
70- CowBoxSymStr :: Interned ( symbol_str)
71- } else {
72- let mut owned = String :: new ( ) ;
73- swap ( buffer, & mut owned) ;
74- CowBoxSymStr :: Owned ( owned. into_boxed_str ( ) )
75- }
76- }
77-
78- fn is_in_ascii_confusable_closure ( c : char ) -> bool {
79- // FIXME: move this table to `unicode_security` crate.
80- // data here corresponds to Unicode 13.
81- const ASCII_CONFUSABLE_CLOSURE : & [ ( u64 , u64 ) ] = & [ ( 0x00 , 0x7f ) , ( 0xba , 0xba ) , ( 0x2080 , 0x2080 ) ] ;
82- let c = c as u64 ;
83- for & ( range_start, range_end) in ASCII_CONFUSABLE_CLOSURE {
84- if c >= range_start && c <= range_end {
85- return true ;
86- }
87- }
88- false
89- }
90-
91- fn is_in_ascii_confusable_closure_relevant_list ( c : char ) -> bool {
92- // FIXME: move this table to `unicode_security` crate.
93- // data here corresponds to Unicode 13.
94- const ASCII_CONFUSABLE_CLOSURE_RELEVANT_LIST : & [ u64 ] = & [
95- 0x22 , 0x25 , 0x27 , 0x2f , 0x30 , 0x31 , 0x49 , 0x4f , 0x60 , 0x6c , 0x6d , 0x6e , 0x72 , 0x7c , 0xba ,
96- 0x2080 ,
97- ] ;
98- let c = c as u64 ;
99- for & item in ASCII_CONFUSABLE_CLOSURE_RELEVANT_LIST {
100- if c == item {
101- return true ;
102- }
103- }
104- false
105- }
106-
10729impl EarlyLintPass for NonAsciiIdents {
10830 fn check_crate ( & mut self , cx : & EarlyContext < ' _ > , _: & ast:: Crate ) {
10931 use rustc_session:: lint:: Level ;
110- if cx. builder . lint_level ( CONFUSABLE_IDENTS ) . 0 == Level :: Allow {
32+ use rustc_span:: Span ;
33+ use unicode_security:: GeneralSecurityProfile ;
34+ use utils:: CowBoxSymStr ;
35+
36+ let check_non_ascii_idents = cx. builder . lint_level ( NON_ASCII_IDENTS ) . 0 != Level :: Allow ;
37+ let check_uncommon_codepoints =
38+ cx. builder . lint_level ( UNCOMMON_CODEPOINTS ) . 0 != Level :: Allow ;
39+ let check_confusable_idents = cx. builder . lint_level ( CONFUSABLE_IDENTS ) . 0 != Level :: Allow ;
40+
41+ if !check_non_ascii_idents && !check_uncommon_codepoints && !check_confusable_idents {
11142 return ;
11243 }
44+
45+ let mut has_non_ascii_idents = false ;
11346 let symbols = cx. sess . parse_sess . symbol_gallery . symbols . lock ( ) ;
114- let mut symbol_strs_and_spans = Vec :: with_capacity ( symbols. len ( ) ) ;
115- let mut in_fast_path = true ;
116- for ( symbol, sp) in symbols. iter ( ) {
117- // fast path
47+ for ( symbol, & sp) in symbols. iter ( ) {
11848 let symbol_str = symbol. as_str ( ) ;
119- if !symbol_str. chars ( ) . all ( is_in_ascii_confusable_closure) {
120- // fallback to slow path.
121- symbol_strs_and_spans. clear ( ) ;
122- in_fast_path = false ;
123- break ;
49+ if symbol_str. is_ascii ( ) {
50+ continue ;
12451 }
125- if symbol_str. chars ( ) . any ( is_in_ascii_confusable_closure_relevant_list) {
126- symbol_strs_and_spans. push ( ( symbol_str, * sp) ) ;
52+ has_non_ascii_idents = true ;
53+ cx. struct_span_lint ( NON_ASCII_IDENTS , sp, |lint| {
54+ lint. build ( "identifier contains non-ASCII characters" ) . emit ( )
55+ } ) ;
56+ if check_uncommon_codepoints
57+ && !symbol_str. chars ( ) . all ( GeneralSecurityProfile :: identifier_allowed)
58+ {
59+ cx. struct_span_lint ( UNCOMMON_CODEPOINTS , sp, |lint| {
60+ lint. build ( "identifier contains uncommon Unicode codepoints" ) . emit ( )
61+ } )
12762 }
12863 }
129- if !in_fast_path {
130- // slow path
131- for ( symbol, sp) in symbols. iter ( ) {
64+
65+ if has_non_ascii_idents && check_confusable_idents {
66+ let mut skeleton_map: FxHashMap < CowBoxSymStr , ( SymbolStr , Span , bool ) > =
67+ FxHashMap :: with_capacity_and_hasher ( symbols. len ( ) , Default :: default ( ) ) ;
68+ let mut str_buf = String :: new ( ) ;
69+ for ( symbol, & sp) in symbols. iter ( ) {
70+ fn calc_skeleton ( symbol_str : & SymbolStr , buffer : & mut String ) -> CowBoxSymStr {
71+ use std:: mem:: replace;
72+ use unicode_security:: confusable_detection:: skeleton;
73+ buffer. clear ( ) ;
74+ buffer. extend ( skeleton ( symbol_str) ) ;
75+ if * symbol_str == * buffer {
76+ CowBoxSymStr :: Interned ( symbol_str. clone ( ) )
77+ } else {
78+ let owned = replace ( buffer, String :: new ( ) ) ;
79+ CowBoxSymStr :: Owned ( owned. into_boxed_str ( ) )
80+ }
81+ }
13282 let symbol_str = symbol. as_str ( ) ;
133- symbol_strs_and_spans. push ( ( symbol_str, * sp) ) ;
83+ let is_ascii = symbol_str. is_ascii ( ) ;
84+ let skeleton = calc_skeleton ( & symbol_str, & mut str_buf) ;
85+ skeleton_map
86+ . entry ( skeleton)
87+ . and_modify ( |( existing_symbolstr, existing_span, existing_is_ascii) | {
88+ if !* existing_is_ascii || !is_ascii {
89+ cx. struct_span_lint ( CONFUSABLE_IDENTS , sp, |lint| {
90+ lint. build ( & format ! (
91+ "identifier pair considered confusable between `{}` and `{}`" ,
92+ existing_symbolstr, symbol_str
93+ ) )
94+ . span_label (
95+ * existing_span,
96+ "this is where the previous identifier occurred" ,
97+ )
98+ . emit ( ) ;
99+ } ) ;
100+ }
101+ if * existing_is_ascii && !is_ascii {
102+ * existing_symbolstr = symbol_str. clone ( ) ;
103+ * existing_span = sp;
104+ * existing_is_ascii = is_ascii;
105+ }
106+ } )
107+ . or_insert ( ( symbol_str, sp, is_ascii) ) ;
134108 }
135109 }
136- drop ( symbols) ;
137- symbol_strs_and_spans. sort_by_key ( |x| x. 0 . clone ( ) ) ;
138- let mut skeleton_map =
139- FxHashMap :: with_capacity_and_hasher ( symbol_strs_and_spans. len ( ) , Default :: default ( ) ) ;
140- let mut str_buf = String :: new ( ) ;
141- for ( symbol_str, sp) in symbol_strs_and_spans {
142- let skeleton = calc_skeleton ( symbol_str. clone ( ) , & mut str_buf) ;
143- skeleton_map
144- . entry ( skeleton)
145- . and_modify ( |( existing_symbolstr, existing_span) | {
146- cx. struct_span_lint ( CONFUSABLE_IDENTS , sp, |lint| {
147- lint. build ( & format ! (
148- "identifier pair considered confusable between `{}` and `{}`" ,
149- existing_symbolstr, symbol_str
150- ) )
151- . span_label (
152- * existing_span,
153- "this is where the previous identifier occurred" ,
154- )
155- . emit ( ) ;
156- } ) ;
157- } )
158- . or_insert ( ( symbol_str, sp) ) ;
110+ }
111+ }
112+
113+ mod utils {
114+ use rustc_span:: symbol:: SymbolStr ;
115+ use std:: hash:: { Hash , Hasher } ;
116+ use std:: ops:: Deref ;
117+
118+ pub ( super ) enum CowBoxSymStr {
119+ Interned ( SymbolStr ) ,
120+ Owned ( Box < str > ) ,
121+ }
122+
123+ impl Deref for CowBoxSymStr {
124+ type Target = str ;
125+
126+ fn deref ( & self ) -> & str {
127+ match self {
128+ CowBoxSymStr :: Interned ( interned) => interned,
129+ CowBoxSymStr :: Owned ( ref owned) => owned,
130+ }
159131 }
160132 }
161- fn check_ident ( & mut self , cx : & EarlyContext < ' _ > , ident : Ident ) {
162- use unicode_security :: GeneralSecurityProfile ;
163- let name_str = ident . name . as_str ( ) ;
164- if name_str . is_ascii ( ) {
165- return ;
133+
134+ impl Hash for CowBoxSymStr {
135+ # [ inline ]
136+ fn hash < H : Hasher > ( & self , state : & mut H ) {
137+ Hash :: hash ( & * * self , state )
166138 }
167- cx. struct_span_lint ( NON_ASCII_IDENTS , ident. span , |lint| {
168- lint. build ( "identifier contains non-ASCII characters" ) . emit ( )
169- } ) ;
170- if !name_str. chars ( ) . all ( GeneralSecurityProfile :: identifier_allowed) {
171- cx. struct_span_lint ( UNCOMMON_CODEPOINTS , ident. span , |lint| {
172- lint. build ( "identifier contains uncommon Unicode codepoints" ) . emit ( )
173- } )
139+ }
140+
141+ impl PartialEq < CowBoxSymStr > for CowBoxSymStr {
142+ #[ inline]
143+ fn eq ( & self , other : & CowBoxSymStr ) -> bool {
144+ PartialEq :: eq ( & * * self , & * * other)
174145 }
175146 }
147+
148+ impl Eq for CowBoxSymStr { }
176149}
0 commit comments