2121package org .cicirello .permutations .distance ;
2222
2323import org .cicirello .permutations .Permutation ;
24+ import java .util .Arrays ;
25+
2426/**
2527 * Kendall Tau Distance:
2628 *
2729 * <p>Kendall Tau distance is sometimes also known as bubble sort distance, as it is
2830 * the number of adjacent swaps necessary to transform one permutation into the other.</p>
2931 *
30- * <p>Another way of describing it is the number of pairs of elements whose order is opposite
32+ * <p>Another way of describing it is the number of pairs of elements whose order is inverted
3133 * in one permutation relative to the other.</p>
3234 *
3335 * <p>For example, consider p1 = [0, 1, 2, 3, 4] and p2 = [0, 3, 2, 1, 4].
3941 *
4042 * <p>Kendall originally normalized the distance, but more recently many do not. Our implementation does not normalize.</p>
4143 *
42- * <p>Runtime: O(n^2), where n is the permutation length.</p>
44+ * <p>Runtime: O(n lg n), where n is the permutation length. This runtime is achieved using a modified version of
45+ * mergesort to count the inversions.</p>
4346 *
4447 * <p>Kendall Tau distance originally described in:<br>
4548 * M. G. Kendall, "A new measure of rank correlation," Biometrika, vol. 30, no. 1/2, pp. 81–93, June 1938.</p>
4649 *
4750 * @author <a href=https://www.cicirello.org/ target=_top>Vincent A. Cicirello</a>, <a href=https://www.cicirello.org/ target=_top>https://www.cicirello.org/</a>
48- * @version 2.18.8.2
51+ * @version 2.18.8.17
4952 * @since 1.0
5053 *
5154 */
5255public class KendallTauDistance extends AbstractPermutationDistanceMeasurer {
5356
54-
55-
5657 /**
5758 * {@inheritDoc}
5859 */
5960 @ Override
6061 public int distance (Permutation p1 , Permutation p2 ) {
61- int count = 0 ;
62- int L1 = p1 .length ();
63-
62+
63+ int n = p2 .length ();
64+
65+ // use inverse of p1 as a relabeling
6466 int [] invP1 = p1 .getInverse ();
65- int [] invP2 = p2 .getInverse ();
6667
67- for (int i = 0 ; i < L1 -1 ; i ++) {
68- for (int j = i +1 ; j < L1 ; j ++) {
69- if ((invP1 [i ]-invP1 [j ])*(invP2 [i ]-invP2 [j ]) < 0 ) count ++;
68+ // relabel array copy of p2
69+ int [] arrayP2 = new int [n ];
70+ for (int i = 0 ; i < n ; i ++) {
71+ arrayP2 [i ] = invP1 [p2 .get (i )];
72+ }
73+ return countInversions (arrayP2 );
74+ }
75+
76+ private int countInversions (int [] array ) {
77+ if (array .length <= 1 ) return 0 ;
78+ int m = array .length / 2 ;
79+ int [] left = Arrays .copyOfRange (array , 0 , m );
80+ int [] right = Arrays .copyOfRange (array , m , array .length );
81+ int count = countInversions (left ) + countInversions (right );
82+ int i = 0 ;
83+ int j = 0 ;
84+ int k = 0 ;
85+ while (i < left .length && j < right .length ) {
86+ if (left [i ] < right [j ]) {
87+ array [k ] = left [i ];
88+ i ++;
89+ k ++;
90+ } else {
91+ // inversions
92+ count += (left .length - i );
93+ array [k ] = right [j ];
94+ j ++;
95+ k ++;
7096 }
7197 }
72-
98+ while (i < left .length ) {
99+ array [k ] = left [i ];
100+ i ++;
101+ k ++;
102+ }
103+ while (j < right .length ) {
104+ array [k ] = right [j ];
105+ j ++;
106+ k ++;
107+ }
73108 return count ;
74109 }
75110
76-
77111}
0 commit comments