11package info .debatty .java .stringsimilarity ;
22
33import info .debatty .java .stringsimilarity .interfaces .NormalizedStringDistance ;
4+ import net .jcip .annotations .Immutable ;
45
56/**
67 * N-Gram Similarity as defined by Kondrak, "N-Gram Similarity and Distance",
1314 *
1415 * http://webdocs.cs.ualberta.ca/~kondrak/papers/spire05.pdf
1516 */
17+ @ Immutable
1618public class NGram implements NormalizedStringDistance {
1719
20+ private static final int DEFAULT_N = 2 ;
1821 private final int n ;
1922
20- public NGram (int n ) {
23+ /**
24+ * Instantiate with given value for n-gram length.
25+ * @param n
26+ */
27+ public NGram (final int n ) {
2128 this .n = n ;
2229 }
2330
31+ /**
32+ * Instantiate with default value for n-gram length (2).
33+ */
2434 public NGram () {
25- this .n = 2 ;
35+ this .n = DEFAULT_N ;
2636 }
2737
28- @ Override
29- public double distance (String s0 , String s1 ) {
38+ /**
39+ * Compute n-gram distance.
40+ * @param s0
41+ * @param s1
42+ * @return
43+ */
44+ public final double distance (final String s0 , final String s1 ) {
3045 final char special = '\n' ;
3146 final int sl = s0 .length ();
3247 final int tl = s1 .length ();
@@ -50,9 +65,9 @@ public double distance(String s0, String s1) {
5065 }
5166
5267 char [] sa = new char [sl + n - 1 ];
53- float p [] ; //'previous' cost array, horizontally
54- float d [] ; // cost array, horizontally
55- float _d [] ; //placeholder to assist in swapping p and d
68+ float [] p ; //'previous' cost array, horizontally
69+ float [] d ; // cost array, horizontally
70+ float [] d2 ; //placeholder to assist in swapping p and d
5671
5772 //construct sa with prefix
5873 for (int i = 0 ; i < sa .length ; i ++) {
@@ -76,7 +91,7 @@ public double distance(String s0, String s1) {
7691 }
7792
7893 for (j = 1 ; j <= tl ; j ++) {
79- //construct t_j n-gram
94+ //construct t_j n-gram
8095 if (j < n ) {
8196 for (int ti = 0 ; ti < n - j ; ti ++) {
8297 t_j [ti ] = special ; //add prefix
@@ -95,18 +110,21 @@ public double distance(String s0, String s1) {
95110 for (int ni = 0 ; ni < n ; ni ++) {
96111 if (sa [i - 1 + ni ] != t_j [ni ]) {
97112 cost ++;
98- } else if (sa [i - 1 + ni ] == special ) { //discount matches on prefix
113+ } else if (sa [i - 1 + ni ] == special ) {
114+ //discount matches on prefix
99115 tn --;
100116 }
101117 }
102118 float ec = (float ) cost / tn ;
103- // minimum of cell to the left+1, to the top+1, diagonally left and up +cost
104- d [i ] = Math .min (Math .min (d [i - 1 ] + 1 , p [i ] + 1 ), p [i - 1 ] + ec );
119+ // minimum of cell to the left+1, to the top+1,
120+ // diagonally left and up +cost
121+ d [i ] = Math .min (
122+ Math .min (d [i - 1 ] + 1 , p [i ] + 1 ), p [i - 1 ] + ec );
105123 }
106124 // copy current distance counts to 'previous row' distance counts
107- _d = p ;
125+ d2 = p ;
108126 p = d ;
109- d = _d ;
127+ d = d2 ;
110128 }
111129
112130 // our last action in the above loop was to switch d and p, so p now
0 commit comments