@@ -24,21 +24,35 @@ declare_lint! {
2424 crate_level_only
2525}
2626
27- declare_lint_pass ! ( NonAsciiIdents => [ NON_ASCII_IDENTS , UNCOMMON_CODEPOINTS , CONFUSABLE_IDENTS ] ) ;
27+ declare_lint ! {
28+ pub MIXED_SCRIPT_CONFUSABLES ,
29+ Warn ,
30+ "detects Unicode scripts whose mixed script confusables codepoints are solely used" ,
31+ crate_level_only
32+ }
33+
34+ declare_lint_pass ! ( NonAsciiIdents => [ NON_ASCII_IDENTS , UNCOMMON_CODEPOINTS , CONFUSABLE_IDENTS , MIXED_SCRIPT_CONFUSABLES ] ) ;
2835
2936impl EarlyLintPass for NonAsciiIdents {
3037 fn check_crate ( & mut self , cx : & EarlyContext < ' _ > , _: & ast:: Crate ) {
3138 use rustc_session:: lint:: Level ;
3239 use rustc_span:: Span ;
40+ use std:: collections:: BTreeMap ;
3341 use unicode_security:: GeneralSecurityProfile ;
3442 use utils:: CowBoxSymStr ;
3543
3644 let check_non_ascii_idents = cx. builder . lint_level ( NON_ASCII_IDENTS ) . 0 != Level :: Allow ;
3745 let check_uncommon_codepoints =
3846 cx. builder . lint_level ( UNCOMMON_CODEPOINTS ) . 0 != Level :: Allow ;
3947 let check_confusable_idents = cx. builder . lint_level ( CONFUSABLE_IDENTS ) . 0 != Level :: Allow ;
48+ let check_mixed_script_confusables =
49+ cx. builder . lint_level ( MIXED_SCRIPT_CONFUSABLES ) . 0 != Level :: Allow ;
4050
41- if !check_non_ascii_idents && !check_uncommon_codepoints && !check_confusable_idents {
51+ if !check_non_ascii_idents
52+ && !check_uncommon_codepoints
53+ && !check_confusable_idents
54+ && !check_mixed_script_confusables
55+ {
4256 return ;
4357 }
4458
@@ -107,6 +121,115 @@ impl EarlyLintPass for NonAsciiIdents {
107121 . or_insert ( ( symbol_str, sp, is_ascii) ) ;
108122 }
109123 }
124+
125+ if has_non_ascii_idents && check_mixed_script_confusables {
126+ use unicode_security:: is_potential_mixed_script_confusable_char;
127+ use unicode_security:: mixed_script:: AugmentedScriptSet ;
128+
129+ #[ derive( Clone ) ]
130+ enum ScriptSetUsage {
131+ Suspicious ( Vec < char > , Span ) ,
132+ Verified ,
133+ }
134+
135+ let mut script_states: FxHashMap < AugmentedScriptSet , ScriptSetUsage > =
136+ FxHashMap :: default ( ) ;
137+ let latin_augmented_script_set = AugmentedScriptSet :: for_char ( 'A' ) ;
138+ script_states. insert ( latin_augmented_script_set, ScriptSetUsage :: Verified ) ;
139+
140+ let mut has_suspicous = false ;
141+ for ( symbol, & sp) in symbols. iter ( ) {
142+ let symbol_str = symbol. as_str ( ) ;
143+ for ch in symbol_str. chars ( ) {
144+ if ch. is_ascii ( ) {
145+ // all ascii characters are covered by exception.
146+ continue ;
147+ }
148+ if !GeneralSecurityProfile :: identifier_allowed ( ch) {
149+ // this character is covered by `uncommon_codepoints` lint.
150+ continue ;
151+ }
152+ let augmented_script_set = AugmentedScriptSet :: for_char ( ch) ;
153+ script_states
154+ . entry ( augmented_script_set)
155+ . and_modify ( |existing_state| {
156+ if let ScriptSetUsage :: Suspicious ( ch_list, _) = existing_state {
157+ if is_potential_mixed_script_confusable_char ( ch) {
158+ ch_list. push ( ch) ;
159+ } else {
160+ * existing_state = ScriptSetUsage :: Verified ;
161+ }
162+ }
163+ } )
164+ . or_insert_with ( || {
165+ if !is_potential_mixed_script_confusable_char ( ch) {
166+ ScriptSetUsage :: Verified
167+ } else {
168+ has_suspicous = true ;
169+ ScriptSetUsage :: Suspicious ( vec ! [ ch] , sp)
170+ }
171+ } ) ;
172+ }
173+ }
174+
175+ if has_suspicous {
176+ let verified_augmented_script_sets = script_states
177+ . iter ( )
178+ . flat_map ( |( k, v) | match v {
179+ ScriptSetUsage :: Verified => Some ( * k) ,
180+ _ => None ,
181+ } )
182+ . collect :: < Vec < _ > > ( ) ;
183+
184+ // we're sorting the output here.
185+ let mut lint_reports: BTreeMap < ( Span , Vec < char > ) , AugmentedScriptSet > =
186+ BTreeMap :: new ( ) ;
187+
188+ ' outerloop: for ( augment_script_set, usage) in script_states {
189+ let ( mut ch_list, sp) = match usage {
190+ ScriptSetUsage :: Verified => continue ,
191+ ScriptSetUsage :: Suspicious ( ch_list, sp) => ( ch_list, sp) ,
192+ } ;
193+
194+ if augment_script_set. is_all ( ) {
195+ continue ;
196+ }
197+
198+ for existing in verified_augmented_script_sets. iter ( ) {
199+ if existing. is_all ( ) {
200+ continue ;
201+ }
202+ let mut intersect = * existing;
203+ intersect. intersect_with ( augment_script_set) ;
204+ if !intersect. is_empty ( ) && !intersect. is_all ( ) {
205+ continue ' outerloop;
206+ }
207+ }
208+
209+ ch_list. sort ( ) ;
210+ ch_list. dedup ( ) ;
211+ lint_reports. insert ( ( sp, ch_list) , augment_script_set) ;
212+ }
213+
214+ for ( ( sp, ch_list) , script_set) in lint_reports {
215+ cx. struct_span_lint ( MIXED_SCRIPT_CONFUSABLES , sp, |lint| {
216+ let message = format ! (
217+ "The usage of Script Group `{}` in this crate consists solely of mixed script confusables" ,
218+ script_set) ;
219+ let mut note = "The usage includes " . to_string ( ) ;
220+ for ( idx, ch) in ch_list. into_iter ( ) . enumerate ( ) {
221+ if idx != 0 {
222+ note += ", " ;
223+ }
224+ let char_info = format ! ( "'{}' (U+{:04X})" , ch, ch as u32 ) ;
225+ note += & char_info;
226+ }
227+ note += "." ;
228+ lint. build ( & message) . note ( & note) . note ( "Please recheck to make sure their usages are indeed what you want." ) . emit ( )
229+ } ) ;
230+ }
231+ }
232+ }
110233 }
111234}
112235
0 commit comments