Skip to content

Commit de272cf

Browse files
committed
reimplemented Kendall Tau distance in O(n lg n) time by counting inversions with modified mergesort
1 parent 9217c51 commit de272cf

File tree

4 files changed

+73
-18
lines changed

4 files changed

+73
-18
lines changed

docs/api/org/cicirello/permutations/distance/KendallTauDistance.html

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@
22
<!-- NewPage -->
33
<html lang="en">
44
<head>
5-
<!-- Generated by javadoc (1.8.0_05) on Wed Aug 08 13:08:41 EDT 2018 -->
5+
<!-- Generated by javadoc (1.8.0_05) on Fri Aug 17 14:25:52 EDT 2018 -->
66
<title>KendallTauDistance (JavaPermutationTools - A Java API for computation on permutations)</title>
7-
<meta name="date" content="2018-08-08">
7+
<meta name="date" content="2018-08-17">
88
<link rel="stylesheet" type="text/css" href="../../../../stylesheet.css" title="Style">
99
<script type="text/javascript" src="../../../../script.js"></script>
1010
</head>
@@ -124,7 +124,7 @@ <h2 title="Class KendallTauDistance" class="title">Class KendallTauDistance</h2>
124124
<p>Kendall Tau distance is sometimes also known as bubble sort distance, as it is
125125
the number of adjacent swaps necessary to transform one permutation into the other.</p>
126126

127-
<p>Another way of describing it is the number of pairs of elements whose order is opposite
127+
<p>Another way of describing it is the number of pairs of elements whose order is inverted
128128
in one permutation relative to the other.</p>
129129

130130
<p>For example, consider p1 = [0, 1, 2, 3, 4] and p2 = [0, 3, 2, 1, 4].
@@ -136,7 +136,8 @@ <h2 title="Class KendallTauDistance" class="title">Class KendallTauDistance</h2>
136136

137137
<p>Kendall originally normalized the distance, but more recently many do not. Our implementation does not normalize.</p>
138138

139-
<p>Runtime: O(n^2), where n is the permutation length.</p>
139+
<p>Runtime: O(n lg n), where n is the permutation length. This runtime is achieved using a modified version of
140+
mergesort to count the inversions.</p>
140141

141142
<p>Kendall Tau distance originally described in:<br>
142143
M. G. Kendall, "A new measure of rank correlation," Biometrika, vol. 30, no. 1/2, pp. 81–93, June 1938.</p></div>

lib/jpt1.jar

261 Bytes
Binary file not shown.

src/org/cicirello/permutations/distance/KendallTauDistance.java

Lines changed: 48 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,15 @@
2121
package org.cicirello.permutations.distance;
2222

2323
import org.cicirello.permutations.Permutation;
24+
import java.util.Arrays;
25+
2426
/**
2527
* Kendall Tau Distance:
2628
*
2729
* <p>Kendall Tau distance is sometimes also known as bubble sort distance, as it is
2830
* the number of adjacent swaps necessary to transform one permutation into the other.</p>
2931
*
30-
* <p>Another way of describing it is the number of pairs of elements whose order is opposite
32+
* <p>Another way of describing it is the number of pairs of elements whose order is inverted
3133
* in one permutation relative to the other.</p>
3234
*
3335
* <p>For example, consider p1 = [0, 1, 2, 3, 4] and p2 = [0, 3, 2, 1, 4].
@@ -39,39 +41,71 @@
3941
*
4042
* <p>Kendall originally normalized the distance, but more recently many do not. Our implementation does not normalize.</p>
4143
*
42-
* <p>Runtime: O(n^2), where n is the permutation length.</p>
44+
* <p>Runtime: O(n lg n), where n is the permutation length. This runtime is achieved using a modified version of
45+
* mergesort to count the inversions.</p>
4346
*
4447
* <p>Kendall Tau distance originally described in:<br>
4548
* M. G. Kendall, "A new measure of rank correlation," Biometrika, vol. 30, no. 1/2, pp. 81–93, June 1938.</p>
4649
*
4750
* @author <a href=https://www.cicirello.org/ target=_top>Vincent A. Cicirello</a>, <a href=https://www.cicirello.org/ target=_top>https://www.cicirello.org/</a>
48-
* @version 2.18.8.2
51+
* @version 2.18.8.17
4952
* @since 1.0
5053
*
5154
*/
5255
public class KendallTauDistance extends AbstractPermutationDistanceMeasurer {
5356

54-
55-
5657
/**
5758
* {@inheritDoc}
5859
*/
5960
@Override
6061
public int distance(Permutation p1, Permutation p2) {
61-
int count = 0;
62-
int L1 = p1.length();
63-
62+
63+
int n = p2.length();
64+
65+
// use inverse of p1 as a relabeling
6466
int[] invP1 = p1.getInverse();
65-
int[] invP2 = p2.getInverse();
6667

67-
for (int i = 0; i < L1-1; i++) {
68-
for (int j = i+1; j < L1; j++) {
69-
if ((invP1[i]-invP1[j])*(invP2[i]-invP2[j]) < 0) count++;
68+
// relabel array copy of p2
69+
int[] arrayP2 = new int[n];
70+
for (int i = 0; i < n; i++) {
71+
arrayP2[i] = invP1[p2.get(i)];
72+
}
73+
return countInversions(arrayP2);
74+
}
75+
76+
private int countInversions(int[] array) {
77+
if (array.length <= 1) return 0;
78+
int m = array.length / 2;
79+
int[] left = Arrays.copyOfRange(array, 0, m);
80+
int[] right = Arrays.copyOfRange(array, m, array.length);
81+
int count = countInversions(left) + countInversions(right);
82+
int i = 0;
83+
int j = 0;
84+
int k = 0;
85+
while (i < left.length && j < right.length) {
86+
if (left[i] < right[j]) {
87+
array[k] = left[i];
88+
i++;
89+
k++;
90+
} else {
91+
// inversions
92+
count += (left.length - i);
93+
array[k] = right[j];
94+
j++;
95+
k++;
7096
}
7197
}
72-
98+
while (i < left.length) {
99+
array[k] = left[i];
100+
i++;
101+
k++;
102+
}
103+
while (j < right.length) {
104+
array[k] = right[j];
105+
j++;
106+
k++;
107+
}
73108
return count;
74109
}
75110

76-
77111
}

tests/org/cicirello/permutations/distance/PermutationDistanceTests.java

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,26 @@ public void testKendallTauDistance() {
243243
expected = 2*n-3;
244244
assertEquals("end points swapped", expected, d.distance(p,copy));
245245
}
246+
Permutation p = new Permutation(6);
247+
for (Permutation q : p) {
248+
assertEquals("checking consistence with naive implementation", naiveKendalTau(p,q), d.distance(p,q));
249+
}
250+
}
251+
252+
private int naiveKendalTau(Permutation p1, Permutation p2) {
253+
int count = 0;
254+
int L1 = p1.length();
255+
256+
int[] invP1 = p1.getInverse();
257+
int[] invP2 = p2.getInverse();
258+
259+
for (int i = 0; i < L1-1; i++) {
260+
for (int j = i+1; j < L1; j++) {
261+
if ((invP1[i]-invP1[j])*(invP2[i]-invP2[j]) < 0) count++;
262+
}
263+
}
264+
265+
return count;
246266
}
247267

248268
@Test

0 commit comments

Comments
 (0)