Skip to content

Commit b517f00

Browse files
committed
Better handling of null/empty values, refactor tests. #28
1 parent d8ef94f commit b517f00

File tree

16 files changed

+156
-197
lines changed

16 files changed

+156
-197
lines changed

src/main/java/info/debatty/java/stringsimilarity/ShingleBased.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@
2727
import java.util.HashMap;
2828
import java.util.Map;
2929
import java.util.regex.Pattern;
30+
31+
import info.debatty.java.utils.NullEmptyUtil;
3032
import net.jcip.annotations.Immutable;
3133

3234
/**
@@ -103,6 +105,10 @@ public int getK() {
103105
public final Map<String, Integer> getProfile(final String string) {
104106
HashMap<String, Integer> shingles = new HashMap<String, Integer>();
105107

108+
if (NullEmptyUtil.isNullOrEmpty(string)) {
109+
return Collections.unmodifiableMap(shingles);
110+
}
111+
106112
String string_no_space = SPACE_REG.matcher(string).replaceAll(" ");
107113
for (int i = 0; i < (string_no_space.length() - k + 1); i++) {
108114
String shingle = string_no_space.substring(i, i + k);

src/main/java/info/debatty/java/utils/NullEmptyUtil.java

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,12 @@ public final class NullEmptyUtil {
1111
* @return Returns a value if either strings are empty or null, or null if both strings have a value.
1212
*/
1313
public static Double normalizedSimilarity(String s1, String s2) {
14-
if (s1 == null && s2 == null) {
15-
return 1d;
16-
} else if (s1 == null || s2 == null) {
17-
return 0d;
18-
} else if (s1.equals("") && s2.equals("")) {
14+
boolean s1empty = isNullOrEmpty(s1),
15+
s2empty = isNullOrEmpty(s2);
16+
17+
if (s1empty && s2empty) {
1918
return 1d;
20-
} else if (s1.equals("") || s2.equals("")) {
19+
} else if (s1empty || s2empty) {
2120
return 0d;
2221
}
2322

@@ -43,14 +42,21 @@ public static Double normalizedDistance(String s1, String s2) {
4342
* @return Returns a value if either strings are empty or null, or null if both strings have a value.
4443
*/
4544
public static Double lengthDistance(String s1, String s2) {
46-
if (s1 == null && s2 == null) {
45+
boolean s1empty = isNullOrEmpty(s1),
46+
s2empty = isNullOrEmpty(s2);
47+
48+
if (s1empty && s2empty) {
4749
return 0d;
48-
} else if (s1 == null || s1.equals("")) {
50+
} else if (s1empty) {
4951
return (double) s2.length();
50-
} else if (s2 == null || s2.equals("")) {
52+
} else if (s2empty) {
5153
return (double) s1.length();
5254
}
5355

5456
return null;
5557
}
58+
59+
public static boolean isNullOrEmpty(String s) {
60+
return s == null || s.equals("");
61+
}
5662
}

src/test/java/info/debatty/java/stringsimilarity/CosineTest.java

Lines changed: 7 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828
import java.io.IOException;
2929
import java.io.InputStream;
3030
import java.io.InputStreamReader;
31+
32+
import info.debatty.java.stringsimilarity.testutil.NullEmptyTests;
3133
import org.junit.Test;
3234
import static org.junit.Assert.*;
3335

@@ -46,6 +48,8 @@ public final void testSimilarity() {
4648
Cosine instance = new Cosine();
4749
double result = instance.similarity("ABC", "ABCE");
4850
assertEquals(0.71, result, 0.01);
51+
52+
NullEmptyTests.testSimilarity(instance);
4953
}
5054

5155
/**
@@ -74,25 +78,11 @@ public final void testLargeString() throws IOException {
7478
}
7579

7680
@Test
77-
public final void testEmptyStrings() {
81+
public final void testDistance() {
7882
Cosine instance = new Cosine();
79-
assertEquals(1.0, instance.similarity("", ""), 0.1);
80-
assertEquals(0.0, instance.distance("", ""), 0.1);
81-
assertEquals(0.0, instance.similarity("", "foo"), 0.1);
82-
assertEquals(0.0, instance.similarity("foo", ""), 0.1);
83-
assertEquals(1.0, instance.distance("", "foo"), 0.1);
84-
assertEquals(1.0, instance.distance("foo", ""), 0.1);
85-
}
83+
NullEmptyTests.testDistance(instance);
8684

87-
@Test
88-
public final void testNullStrings() {
89-
Cosine instance = new Cosine();
90-
assertEquals(1.0, instance.similarity((String)null, null), 0.1);
91-
assertEquals(0.0, instance.distance(null, null), 0.1);
92-
assertEquals(0.0, instance.similarity(null, "foo"), 0.1);
93-
assertEquals(0.0, instance.similarity("foo", null), 0.1);
94-
assertEquals(1.0, instance.distance(null, "foo"), 0.1);
95-
assertEquals(1.0, instance.distance("foo", null), 0.1);
85+
// TODO: regular (non-null/empty) distance tests
9686
}
9787

9888
private static String readResourceFile(String file) throws IOException {

src/test/java/info/debatty/java/stringsimilarity/DamerauTest.java

Lines changed: 2 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424

2525
package info.debatty.java.stringsimilarity;
2626

27+
import info.debatty.java.stringsimilarity.testutil.NullEmptyTests;
2728
import org.junit.Test;
2829
import static org.junit.Assert.assertEquals;
2930

@@ -43,21 +44,6 @@ public final void testDistance() {
4344
assertEquals(1.0, instance.distance("ABCDEF", "ABDCEF"), 0.0);
4445
assertEquals(2.0, instance.distance("ABCDEF", "BACDFE"), 0.0);
4546
assertEquals(1.0, instance.distance("ABCDEF", "ABCDE"), 0.0);
46-
}
47-
48-
@Test
49-
public final void testEmptyStrings() {
50-
Damerau instance = new Damerau();
51-
assertEquals(0.0, instance.distance("", ""), 0.1);
52-
assertEquals(3.0, instance.distance("", "foo"), 0.1);
53-
assertEquals(3.0, instance.distance("foo", ""), 0.1);
54-
}
55-
56-
@Test
57-
public final void testNullStrings() {
58-
Damerau instance = new Damerau();
59-
assertEquals(0.0, instance.distance(null, null), 0.1);
60-
assertEquals(3.0, instance.distance(null, "foo"), 0.1);
61-
assertEquals(3.0, instance.distance("foo", null), 0.1);
47+
NullEmptyTests.testDistance(instance);
6248
}
6349
}

src/test/java/info/debatty/java/stringsimilarity/JaccardTest.java

Lines changed: 6 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,10 @@
2424

2525
package info.debatty.java.stringsimilarity;
2626

27-
import org.junit.After;
28-
import org.junit.AfterClass;
29-
import org.junit.Before;
30-
import org.junit.BeforeClass;
27+
import info.debatty.java.stringsimilarity.testutil.NullEmptyTests;
3128
import org.junit.Test;
32-
import static org.junit.Assert.*;
29+
30+
import static org.junit.Assert.assertEquals;
3331

3432
/**
3533
*
@@ -51,6 +49,8 @@ public void testSimilarity() {
5149
// => 3 / 5 = 0.6
5250
double result = instance.similarity("ABCDE", "ABCDF");
5351
assertEquals(0.6, result, 0.0);
52+
53+
NullEmptyTests.testSimilarity(instance);
5454
}
5555

5656
/**
@@ -65,27 +65,7 @@ public void testDistance() {
6565
double expResult = 0.4;
6666
double result = instance.distance("ABCDE", "ABCDF");
6767
assertEquals(expResult, result, 0.0);
68-
}
6968

70-
@Test
71-
public final void testEmptyStrings() {
72-
Jaccard instance = new Jaccard();
73-
assertEquals(1.0, instance.similarity("", ""), 0.1);
74-
assertEquals(0.0, instance.distance("", ""), 0.1);
75-
assertEquals(0.0, instance.similarity("", "foo"), 0.1);
76-
assertEquals(0.0, instance.similarity("foo", ""), 0.1);
77-
assertEquals(1.0, instance.distance("", "foo"), 0.1);
78-
assertEquals(1.0, instance.distance("foo", ""), 0.1);
79-
}
80-
81-
@Test
82-
public final void testNullStrings() {
83-
Jaccard instance = new Jaccard();
84-
assertEquals(1.0, instance.similarity(null, null), 0.1);
85-
assertEquals(0.0, instance.distance(null, null), 0.1);
86-
assertEquals(0.0, instance.similarity(null, "foo"), 0.1);
87-
assertEquals(0.0, instance.similarity("foo", null), 0.1);
88-
assertEquals(1.0, instance.distance(null, "foo"), 0.1);
89-
assertEquals(1.0, instance.distance("foo", null), 0.1);
69+
NullEmptyTests.testDistance(instance);
9070
}
9171
}

src/test/java/info/debatty/java/stringsimilarity/JaroWinklerTest.java

Lines changed: 6 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424

2525
package info.debatty.java.stringsimilarity;
2626

27+
import info.debatty.java.stringsimilarity.testutil.NullEmptyTests;
2728
import org.junit.Test;
2829
import static org.junit.Assert.*;
2930

@@ -50,27 +51,15 @@ public final void testSimilarity() {
5051
0.896296,
5152
instance.similarity("My string", "My ntrisg"),
5253
0.000001);
53-
}
5454

55-
@Test
56-
public final void testEmptyStrings() {
57-
JaroWinkler instance = new JaroWinkler();
58-
assertEquals(1.0, instance.similarity("", ""), 0.1);
59-
assertEquals(0.0, instance.distance("", ""), 0.1);
60-
assertEquals(0.0, instance.similarity("", "foo"), 0.1);
61-
assertEquals(0.0, instance.similarity("foo", ""), 0.1);
62-
assertEquals(1.0, instance.distance("", "foo"), 0.1);
63-
assertEquals(1.0, instance.distance("foo", ""), 0.1);
55+
NullEmptyTests.testSimilarity(instance);
6456
}
6557

6658
@Test
67-
public final void testNullStrings() {
59+
public final void testDistance() {
6860
JaroWinkler instance = new JaroWinkler();
69-
assertEquals(1.0, instance.similarity(null, null), 0.1);
70-
assertEquals(0.0, instance.distance(null, null), 0.1);
71-
assertEquals(0.0, instance.similarity(null, "foo"), 0.1);
72-
assertEquals(0.0, instance.similarity("foo", null), 0.1);
73-
assertEquals(1.0, instance.distance(null, "foo"), 0.1);
74-
assertEquals(1.0, instance.distance("foo", null), 0.1);
61+
NullEmptyTests.testDistance(instance);
62+
63+
// TODO: regular (non-null/empty) distance tests
7564
}
7665
}

src/test/java/info/debatty/java/stringsimilarity/LevenshteinTest.java

Lines changed: 2 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424

2525
package info.debatty.java.stringsimilarity;
2626

27+
import info.debatty.java.stringsimilarity.testutil.NullEmptyTests;
2728
import org.junit.Test;
2829
import static org.junit.Assert.assertEquals;
2930

@@ -43,21 +44,7 @@ public final void testDistance() {
4344
assertEquals(1.0, instance.distance("My string", "My tring"), 0.0);
4445
assertEquals(2.0, instance.distance("My string", "M string2"), 0.0);
4546
assertEquals(1.0, instance.distance("My string", "My $tring"), 0.0);
46-
}
4747

48-
@Test
49-
public final void testEmptyStrings() {
50-
Levenshtein instance = new Levenshtein();
51-
assertEquals(0.0, instance.distance("", ""), 0.1);
52-
assertEquals(3.0, instance.distance("", "foo"), 0.1);
53-
assertEquals(3.0, instance.distance("foo", ""), 0.1);
54-
}
55-
56-
@Test
57-
public final void testNullStrings() {
58-
Levenshtein instance = new Levenshtein();
59-
assertEquals(0.0, instance.distance(null, null), 0.1);
60-
assertEquals(3.0, instance.distance(null, "foo"), 0.1);
61-
assertEquals(3.0, instance.distance("foo", null), 0.1);
48+
NullEmptyTests.testDistance(instance);
6249
}
6350
}

src/test/java/info/debatty/java/stringsimilarity/LongestCommonSubsequenceTest.java

Lines changed: 4 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,10 @@
2323
*/
2424
package info.debatty.java.stringsimilarity;
2525

26-
import org.junit.After;
27-
import org.junit.AfterClass;
28-
import org.junit.Before;
29-
import org.junit.BeforeClass;
26+
import info.debatty.java.stringsimilarity.testutil.NullEmptyTests;
3027
import org.junit.Test;
31-
import static org.junit.Assert.*;
28+
29+
import static org.junit.Assert.assertEquals;
3230

3331
/**
3432
*
@@ -47,21 +45,7 @@ public void testDistance() {
4745
assertEquals(4, instance.distance("AGCAT", "GAC"), 0.0);
4846

4947
assertEquals(1, instance.distance("AGCAT", "AGCT"), 0.0);
50-
}
51-
52-
@Test
53-
public final void testEmptyStrings() {
54-
LongestCommonSubsequence instance = new LongestCommonSubsequence();
55-
assertEquals(0.0, instance.distance("", ""), 0.1);
56-
assertEquals(3.0, instance.distance("", "foo"), 0.1);
57-
assertEquals(3.0, instance.distance("foo", ""), 0.1);
58-
}
5948

60-
@Test
61-
public final void testNullStrings() {
62-
LongestCommonSubsequence instance = new LongestCommonSubsequence();
63-
assertEquals(0.0, instance.distance(null, null), 0.1);
64-
assertEquals(3.0, instance.distance(null, "foo"), 0.1);
65-
assertEquals(3.0, instance.distance("foo", null), 0.1);
49+
NullEmptyTests.testDistance(instance);
6650
}
6751
}
Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,16 @@
11
package info.debatty.java.stringsimilarity;
22

3+
import info.debatty.java.stringsimilarity.testutil.NullEmptyTests;
34
import org.junit.Test;
45

56
import static org.junit.Assert.*;
67

78
public class MetricLCSTest {
89
@Test
9-
public final void testEmptyStrings() {
10+
public final void testDistance() {
1011
MetricLCS instance = new MetricLCS();
11-
assertEquals(0.0, instance.distance("", ""), 0.1);
12-
assertEquals(1.0, instance.distance("", "foo"), 0.1);
13-
assertEquals(1.0, instance.distance("foo", ""), 0.1);
14-
}
12+
NullEmptyTests.testDistance(instance);
1513

16-
@Test
17-
public final void testNullStrings() {
18-
MetricLCS instance = new MetricLCS();
19-
assertEquals(0.0, instance.distance(null, null), 0.1);
20-
assertEquals(1.0, instance.distance(null, "foo"), 0.1);
21-
assertEquals(1.0, instance.distance("foo", null), 0.1);
14+
// TODO: regular (non-null/empty) distance tests
2215
}
2316
}

src/test/java/info/debatty/java/stringsimilarity/NGramTest.java

Lines changed: 2 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424

2525
package info.debatty.java.stringsimilarity;
2626

27+
import info.debatty.java.stringsimilarity.testutil.NullEmptyTests;
2728
import org.junit.Assert;
2829
import static org.junit.Assert.assertEquals;
2930
import org.junit.Test;
@@ -50,23 +51,7 @@ public void testDistance() {
5051

5152
assertEquals(0.0, ngram.distance("SIJK", "SIJK"), 0.0);
5253
assertEquals(0.0, ngram.distance("S", "S"), 0.0);
53-
}
5454

55-
@Test
56-
public final void testEmptyStrings() {
57-
NGram instance = new NGram();
58-
assertEquals(0.0, instance.distance("", ""), 0.1);
59-
assertEquals(1.0, instance.distance("", "foo"), 0.1);
60-
assertEquals(1.0, instance.distance("foo", ""), 0.1);
61-
assertEquals(1.0, instance.distance("", "S"), 0.0);
62-
assertEquals(1.0, instance.distance("", "SIJK"), 0.0);
63-
}
64-
65-
@Test
66-
public final void testNullStrings() {
67-
NGram instance = new NGram();
68-
assertEquals(0.0, instance.distance(null, null), 0.1);
69-
assertEquals(1.0, instance.distance(null, "foo"), 0.1);
70-
assertEquals(1.0, instance.distance("foo", null), 0.1);
55+
NullEmptyTests.testDistance(ngram);
7156
}
7257
}

0 commit comments

Comments
 (0)