1+ /*
2+ * The MIT License
3+ *
4+ * Copyright 2015 Thibault Debatty.
5+ *
6+ * Permission is hereby granted, free of charge, to any person obtaining a copy
7+ * of this software and associated documentation files (the "Software"), to deal
8+ * in the Software without restriction, including without limitation the rights
9+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10+ * copies of the Software, and to permit persons to whom the Software is
11+ * furnished to do so, subject to the following conditions:
12+ *
13+ * The above copyright notice and this permission notice shall be included in
14+ * all copies or substantial portions of the Software.
15+ *
16+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22+ * THE SOFTWARE.
23+ */
24+ package info .debatty .java .stringsimilarity ;
25+
26+ import info .debatty .java .stringsimilarity .interfaces .NormalizedStringSimilarity ;
27+ import info .debatty .java .stringsimilarity .interfaces .NormalizedStringDistance ;
28+ import java .util .*;
29+
30+ import net .jcip .annotations .Immutable ;
31+
32+ /**
33+ * Ratcliff/Obershelp pattern recognition
34+ * The Ratcliff/Obershelp algorithm computes the similarity of two strings a
35+ * the doubled number of matching characters divided by the total number of
36+ * characters in the two strings. Matching characters are those in the longest
37+ * common subsequence plus, recursively, matching characters in the unmatched
38+ * region on either side of the longest common subsequence.
39+ * The Ratcliff/Obershelp distance is computed as 1 - Ratcliff/Obershelp similarity.
40+ *
41+ * @author Ligi https://github.com/dxpux (as a patch for fuzzystring)
42+ * Ported to java from .net by denmase
43+ */
44+ @ Immutable
45+ public class RatcliffObershelp implements
46+ NormalizedStringSimilarity , NormalizedStringDistance {
47+
48+ /**
49+ * Compute the Ratcliff-Obershelp similarity between strings.
50+ *
51+ * @param s1 The first string to compare.
52+ * @param s2 The second string to compare.
53+ * @return The RatcliffObershelp similarity in the range [0, 1]
54+ * @throws NullPointerException if s1 or s2 is null.
55+ */
56+ public final double similarity (String source , String target ) {
57+ if (source == null ) {
58+ throw new NullPointerException ("source must not be null" );
59+ }
60+
61+ if (target == null ) {
62+ throw new NullPointerException ("target must not be null" );
63+ }
64+
65+ if (source .equals (target )) {
66+ return 1 ;
67+ }
68+
69+ List <String > matches ; // = new ArrayList<>();
70+ matches = getMatchQueue (source , target );
71+ int sumOfMatches = 0 ;
72+ Iterator it ;
73+ it = matches .iterator ();
74+
75+ // Display element by element using Iterator
76+ while (it .hasNext ()) {
77+ String element = it .next ().toString ();
78+ //System.out.println(element);
79+ sumOfMatches += element .length ();
80+ }
81+ return 2.0d * sumOfMatches / (source .length () + target .length ());
82+ }
83+
84+ /**
85+ * Return 1 - similarity.
86+ *
87+ * @param s1 The first string to compare.
88+ * @param s2 The second string to compare.
89+ * @return 1 - similarity
90+ * @throws NullPointerException if s1 or s2 is null.
91+ */
92+ public final double distance (final String s1 , final String s2 ) {
93+ return 1.0 - similarity (s1 , s2 );
94+ }
95+
96+ private static List <String > getMatchQueue (String source , String target ) {
97+ List <String > list = new ArrayList <>();
98+ String match = frontMaxMatch (source , target );
99+ if (match .length () > 0 ) {
100+ String frontSource = source .substring (0 , source .indexOf (match ));
101+ String frontTarget = target .substring (0 , target .indexOf (match ));
102+ List <String > frontQueue = getMatchQueue (frontSource , frontTarget );
103+
104+ String endSource = source .substring (source .indexOf (match ) + match .length ());
105+ String endTarget = target .substring (target .indexOf (match ) + match .length ());
106+ List <String > endQueue = getMatchQueue (endSource , endTarget );
107+
108+ list .add (match );
109+ list .addAll (frontQueue );
110+ list .addAll (endQueue );
111+ }
112+ return list ;
113+ }
114+
115+ private static String frontMaxMatch (String firstString , String secondString ) {
116+ int longest = 0 ;
117+ String longestSubstring = "" ;
118+
119+ for (int i = 0 ; i < firstString .length (); ++i ) {
120+ for (int j = i + 1 ; j <= firstString .length (); ++j ) {
121+ String substring = firstString .substring (i , j );
122+ if (secondString .contains (substring ) && substring .length () > longest ) {
123+ longest = substring .length ();
124+ longestSubstring = substring ;
125+ }
126+ }
127+ }
128+ return longestSubstring ;
129+ }
130+ }
0 commit comments