|
11 | 11 | use std::cmp; |
12 | 12 | use symbol::Symbol; |
13 | 13 |
|
14 | | -/// To find the Levenshtein distance between two strings |
| 14 | +/// Find the Levenshtein distance between two strings |
15 | 15 | pub fn lev_distance(a: &str, b: &str) -> usize { |
16 | 16 | // cases which don't require further computation |
17 | 17 | if a.is_empty() { |
@@ -41,10 +41,12 @@ pub fn lev_distance(a: &str, b: &str) -> usize { |
41 | 41 | } dcol[t_last + 1] |
42 | 42 | } |
43 | 43 |
|
44 | | -/// To find the best match for a given string from an iterator of names |
| 44 | +/// Find the best match for a given word in the given iterator |
| 45 | +/// |
45 | 46 | /// As a loose rule to avoid the obviously incorrect suggestions, it takes |
46 | 47 | /// an optional limit for the maximum allowable edit distance, which defaults |
47 | 48 | /// to one-third of the given word. |
| 49 | +/// |
48 | 50 | /// Besides Levenshtein, we use case insensitive comparison to improve accuracy on an edge case with |
49 | 51 | /// a lower(upper)case letters mismatch. |
50 | 52 | pub fn find_best_match_for_name<'a, T>(iter_names: T, |
@@ -105,3 +107,39 @@ fn test_lev_distance() { |
105 | 107 | assert_eq!(lev_distance(b, c), 1); |
106 | 108 | assert_eq!(lev_distance(c, b), 1); |
107 | 109 | } |
| 110 | + |
| 111 | +#[test] |
| 112 | +fn test_find_best_match_for_name() { |
| 113 | + use with_globals; |
| 114 | + with_globals(|| { |
| 115 | + let input = vec![Symbol::intern("aaab"), Symbol::intern("aaabc")]; |
| 116 | + assert_eq!( |
| 117 | + find_best_match_for_name(input.iter(), "aaaa", None), |
| 118 | + Some(Symbol::intern("aaab")) |
| 119 | + ); |
| 120 | + |
| 121 | + assert_eq!( |
| 122 | + find_best_match_for_name(input.iter(), "1111111111", None), |
| 123 | + None |
| 124 | + ); |
| 125 | + |
| 126 | + let input = vec![Symbol::intern("aAAA")]; |
| 127 | + assert_eq!( |
| 128 | + find_best_match_for_name(input.iter(), "AAAA", None), |
| 129 | + Some(Symbol::intern("aAAA")) |
| 130 | + ); |
| 131 | + |
| 132 | + let input = vec![Symbol::intern("AAAA")]; |
| 133 | + // Returns None because `lev_distance > max_dist / 3` |
| 134 | + assert_eq!( |
| 135 | + find_best_match_for_name(input.iter(), "aaaa", None), |
| 136 | + None |
| 137 | + ); |
| 138 | + |
| 139 | + let input = vec![Symbol::intern("AAAA")]; |
| 140 | + assert_eq!( |
| 141 | + find_best_match_for_name(input.iter(), "aaaa", Some(4)), |
| 142 | + Some(Symbol::intern("AAAA")) |
| 143 | + ); |
| 144 | + }) |
| 145 | +} |
0 commit comments