@@ -42,16 +42,33 @@ public class RatcliffObershelpTest {
4242 public final void testSimilarity () {
4343 System .out .println ("similarity" );
4444 RatcliffObershelp instance = new RatcliffObershelp ();
45+
46+ // test data from other algorithms
47+ // "My string" vs "My tsring"
48+ // Substrings:
49+ // "ring" ==> 4, "My s" ==> 3, "s" ==> 1
50+ // Ratcliff-Obershelp = 2*(sum of substrings)/(length of s1 + length of s2)
51+ // = 2*(4 + 3 + 1) / (9 + 9)
52+ // = 16/18
53+ // = 0.888888
4554 assertEquals (
4655 0.888888 ,
4756 instance .similarity ("My string" , "My tsring" ),
4857 0.000001 );
49-
58+
59+ // test data from other algorithms
60+ // "My string" vs "My tsring"
61+ // Substrings:
62+ // "My " ==> 3, "tri" ==> 3, "g" ==> 1
63+ // Ratcliff-Obershelp = 2*(sum of substrings)/(length of s1 + length of s2)
64+ // = 2*(3 + 3 + 1) / (9 + 9)
65+ // = 14/18
66+ // = 0.777778
5067 assertEquals (
5168 0.777778 ,
5269 instance .similarity ("My string" , "My ntrisg" ),
5370 0.000001 );
54-
71+
5572 // test data from essay by Ilya Ilyankou
5673 // "Comparison of Jaro-Winkler and Ratcliff/Obershelp algorithms
5774 // in spell check"
@@ -61,15 +78,17 @@ public final void testSimilarity() {
6178 0.857 ,
6279 instance .similarity ("MATEMATICA" , "MATHEMATICS" ),
6380 0.001 );
64-
65- // these following test data were based on stringmetric
81+
82+ // test data from stringmetric
6683 // https://github.com/rockymadden/stringmetric
6784 // expected output is 0.7368421052631579
6885 assertEquals (
6986 0.736842 ,
7087 instance .similarity ("aleksander" , "alexandre" ),
7188 0.000001 );
72-
89+
90+ // test data from stringmetric
91+ // https://github.com/rockymadden/stringmetric
7392 // expected output is 0.6666666666666666
7493 assertEquals (
7594 0.666666 ,
@@ -84,6 +103,14 @@ public final void testSimilarity() {
84103 instance .similarity ("WIKIMEDIA" , "WIKIMANIA" ),
85104 0.000001 );
86105
106+ // test data from wikipedia
107+ // https://en.wikipedia.org/wiki/Gestalt_Pattern_Matching
108+ // expected output is 24/40 = 0.65
109+ assertEquals (
110+ 0.6 ,
111+ instance .similarity ("GESTALT PATTERN MATCHING" , "GESTALT PRACTICE" ),
112+ 0.000001 );
113+
87114 NullEmptyTests .testSimilarity (instance );
88115 }
89116
0 commit comments