@@ -11,16 +11,21 @@ use std::cmp;
1111mod tests;
1212
1313/// Finds the Levenshtein distance between two strings.
14- pub fn lev_distance ( a : & str , b : & str ) -> usize {
15- // cases which don't require further computation
16- if a. is_empty ( ) {
17- return b. chars ( ) . count ( ) ;
18- } else if b. is_empty ( ) {
19- return a. chars ( ) . count ( ) ;
14+ ///
15+ /// Returns None if the distance exceeds the limit.
16+ pub fn lev_distance ( a : & str , b : & str , limit : usize ) -> Option < usize > {
17+ let n = a. chars ( ) . count ( ) ;
18+ let m = b. chars ( ) . count ( ) ;
19+ let min_dist = if n < m { m - n } else { n - m } ;
20+
21+ if min_dist > limit {
22+ return None ;
23+ }
24+ if n == 0 || m == 0 {
25+ return ( min_dist <= limit) . then_some ( min_dist) ;
2026 }
2127
22- let mut dcol: Vec < _ > = ( 0 ..=b. len ( ) ) . collect ( ) ;
23- let mut t_last = 0 ;
28+ let mut dcol: Vec < _ > = ( 0 ..=m) . collect ( ) ;
2429
2530 for ( i, sc) in a. chars ( ) . enumerate ( ) {
2631 let mut current = i;
@@ -35,10 +40,10 @@ pub fn lev_distance(a: &str, b: &str) -> usize {
3540 dcol[ j + 1 ] = cmp:: min ( dcol[ j + 1 ] , dcol[ j] ) + 1 ;
3641 }
3742 current = next;
38- t_last = j;
3943 }
4044 }
41- dcol[ t_last + 1 ]
45+
46+ ( dcol[ m] <= limit) . then_some ( dcol[ m] )
4247}
4348
4449/// Finds the best match for a given word in the given iterator.
@@ -51,39 +56,38 @@ pub fn lev_distance(a: &str, b: &str) -> usize {
5156/// on an edge case with a lower(upper)case letters mismatch.
5257#[ cold]
5358pub fn find_best_match_for_name (
54- name_vec : & [ Symbol ] ,
59+ candidates : & [ Symbol ] ,
5560 lookup : Symbol ,
5661 dist : Option < usize > ,
5762) -> Option < Symbol > {
5863 let lookup = lookup. as_str ( ) ;
59- let max_dist = dist . unwrap_or_else ( || cmp :: max ( lookup. len ( ) , 3 ) / 3 ) ;
64+ let lookup_uppercase = lookup. to_uppercase ( ) ;
6065
6166 // Priority of matches:
6267 // 1. Exact case insensitive match
6368 // 2. Levenshtein distance match
6469 // 3. Sorted word match
65- if let Some ( case_insensitive_match) =
66- name_vec. iter ( ) . find ( |candidate| candidate. as_str ( ) . to_uppercase ( ) == lookup. to_uppercase ( ) )
67- {
68- return Some ( * case_insensitive_match) ;
70+ if let Some ( c) = candidates. iter ( ) . find ( |c| c. as_str ( ) . to_uppercase ( ) == lookup_uppercase) {
71+ return Some ( * c) ;
6972 }
70- let levenshtein_match = name_vec
71- . iter ( )
72- . filter_map ( |& name| {
73- let dist = lev_distance ( lookup, name. as_str ( ) ) ;
74- if dist <= max_dist { Some ( ( name, dist) ) } else { None }
75- } )
76- // Here we are collecting the next structure:
77- // (levenshtein_match, levenshtein_distance)
78- . fold ( None , |result, ( candidate, dist) | match result {
79- None => Some ( ( candidate, dist) ) ,
80- Some ( ( c, d) ) => Some ( if dist < d { ( candidate, dist) } else { ( c, d) } ) ,
81- } ) ;
82- if levenshtein_match. is_some ( ) {
83- levenshtein_match. map ( |( candidate, _) | candidate)
84- } else {
85- find_match_by_sorted_words ( name_vec, lookup)
73+
74+ let mut dist = dist. unwrap_or_else ( || cmp:: max ( lookup. len ( ) , 3 ) / 3 ) ;
75+ let mut best = None ;
76+ for c in candidates {
77+ match lev_distance ( lookup, c. as_str ( ) , dist) {
78+ Some ( 0 ) => return Some ( * c) ,
79+ Some ( d) => {
80+ dist = d - 1 ;
81+ best = Some ( * c) ;
82+ }
83+ None => { }
84+ }
8685 }
86+ if best. is_some ( ) {
87+ return best;
88+ }
89+
90+ find_match_by_sorted_words ( candidates, lookup)
8791}
8892
8993fn find_match_by_sorted_words ( iter_names : & [ Symbol ] , lookup : & str ) -> Option < Symbol > {
0 commit comments