Skip to content

Commit 31f95b8

Browse files
authored
Add files via upload
Added Ratcliff-Obershelp implementation, ported from .Net code by Ligi (https://github.com/dxpux)
1 parent eeb33dc commit 31f95b8

File tree

1 file changed

+130
-0
lines changed

1 file changed

+130
-0
lines changed
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
/*
2+
* The MIT License
3+
*
4+
* Copyright 2015 Thibault Debatty.
5+
*
6+
* Permission is hereby granted, free of charge, to any person obtaining a copy
7+
* of this software and associated documentation files (the "Software"), to deal
8+
* in the Software without restriction, including without limitation the rights
9+
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10+
* copies of the Software, and to permit persons to whom the Software is
11+
* furnished to do so, subject to the following conditions:
12+
*
13+
* The above copyright notice and this permission notice shall be included in
14+
* all copies or substantial portions of the Software.
15+
*
16+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22+
* THE SOFTWARE.
23+
*/
24+
package info.debatty.java.stringsimilarity;
25+
26+
import info.debatty.java.stringsimilarity.interfaces.NormalizedStringSimilarity;
27+
import info.debatty.java.stringsimilarity.interfaces.NormalizedStringDistance;
28+
import java.util.*;
29+
30+
import net.jcip.annotations.Immutable;
31+
32+
/**
33+
* Ratcliff/Obershelp pattern recognition
34+
* The Ratcliff/Obershelp algorithm computes the similarity of two strings a
35+
* the doubled number of matching characters divided by the total number of
36+
* characters in the two strings. Matching characters are those in the longest
37+
* common subsequence plus, recursively, matching characters in the unmatched
38+
* region on either side of the longest common subsequence.
39+
* The Ratcliff/Obershelp distance is computed as 1 - Ratcliff/Obershelp similarity.
40+
*
41+
* @author Ligi https://github.com/dxpux (as a patch for fuzzystring)
42+
* Ported to java from .net by denmase
43+
*/
44+
@Immutable
45+
public class RatcliffObershelp implements
46+
NormalizedStringSimilarity, NormalizedStringDistance {
47+
48+
/**
49+
* Compute the Ratcliff-Obershelp similarity between strings.
50+
*
51+
* @param s1 The first string to compare.
52+
* @param s2 The second string to compare.
53+
* @return The RatcliffObershelp similarity in the range [0, 1]
54+
* @throws NullPointerException if s1 or s2 is null.
55+
*/
56+
public final double similarity(String source, String target) {
57+
if (source == null) {
58+
throw new NullPointerException("source must not be null");
59+
}
60+
61+
if (target == null) {
62+
throw new NullPointerException("target must not be null");
63+
}
64+
65+
if (source.equals(target)) {
66+
return 1;
67+
}
68+
69+
List<String> matches; // = new ArrayList<>();
70+
matches = getMatchQueue(source, target);
71+
int sumOfMatches = 0;
72+
Iterator it;
73+
it = matches.iterator();
74+
75+
// Display element by element using Iterator
76+
while (it.hasNext()) {
77+
String element = it.next().toString();
78+
//System.out.println(element);
79+
sumOfMatches += element.length();
80+
}
81+
return 2.0d * sumOfMatches / (source.length() + target.length());
82+
}
83+
84+
/**
85+
* Return 1 - similarity.
86+
*
87+
* @param s1 The first string to compare.
88+
* @param s2 The second string to compare.
89+
* @return 1 - similarity
90+
* @throws NullPointerException if s1 or s2 is null.
91+
*/
92+
public final double distance(final String s1, final String s2) {
93+
return 1.0 - similarity(s1, s2);
94+
}
95+
96+
private static List<String> getMatchQueue(String source, String target) {
97+
List<String> list = new ArrayList<>();
98+
String match = frontMaxMatch(source, target);
99+
if (match.length() > 0) {
100+
String frontSource = source.substring(0, source.indexOf(match));
101+
String frontTarget = target.substring(0, target.indexOf(match));
102+
List<String> frontQueue = getMatchQueue(frontSource, frontTarget);
103+
104+
String endSource = source.substring(source.indexOf(match) + match.length());
105+
String endTarget = target.substring(target.indexOf(match) + match.length());
106+
List<String> endQueue = getMatchQueue(endSource, endTarget);
107+
108+
list.add(match);
109+
list.addAll(frontQueue);
110+
list.addAll(endQueue);
111+
}
112+
return list;
113+
}
114+
115+
private static String frontMaxMatch(String firstString, String secondString) {
116+
int longest = 0;
117+
String longestSubstring = "";
118+
119+
for (int i = 0; i < firstString.length(); ++i) {
120+
for (int j = i + 1; j <= firstString.length(); ++j) {
121+
String substring = firstString.substring(i, j);
122+
if (secondString.contains(substring) && substring.length() > longest) {
123+
longest = substring.length();
124+
longestSubstring = substring;
125+
}
126+
}
127+
}
128+
return longestSubstring;
129+
}
130+
}

0 commit comments

Comments
 (0)