init commit kendall spearman ordinal cats

pandeconscious · pandeconscious · commit 1f8c6280cb92 · 2025-10-23T10:46:48.000Z
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -11633,6 +11633,11 @@ def corr(
         data = self._get_numeric_data() if numeric_only else self
         cols = data.columns
         idx = cols.copy()
+
+        if method in ("spearman", "kendall"):
+            data = data._transform_ord_cat_cols_to_coded_cols()
+
+
         mat = data.to_numpy(dtype=float, na_value=np.nan, copy=False)
 
         if method == "pearson":
@@ -11926,7 +11931,8 @@ def corrwith(
             correl = num / dom
 
         elif method in ["kendall", "spearman"] or callable(method):
-
+            left = left._convert_ordered_cat_to_code()
+            right = right._convert_ordered_cat_to_code()
             def c(x):
                 return nanops.nancorr(x[0], x[1], method=method)
 
@@ -11957,6 +11963,25 @@ def c(x):
 
         return correl
 
+    def _transform_ord_cat_cols_to_coded_cols(self) -> DataFrame:
+        """
+        any ordered categorical columns are transformed to the respectice caregorical codes
+        other columns remain untouched
+        """
+        categ = self.select_dtypes("category")
+        if len(categ.columns) == 0:
+            return self
+
+        cols_convert = categ.loc[:, categ.agg(lambda x: x.cat.ordered)].columns
+
+        if len(cols_convert) > 0:
+            data = self.copy(deep=False)
+            data[cols_convert] = data[cols_convert].transform(
+                lambda x: x.cat.codes.replace(-1, np.nan)
+            )
+            return data
+        return self
+
     # ----------------------------------------------------------------------
     # ndarray-like stats methods
 
diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -2687,6 +2687,12 @@ def corr(
         this, other = self.align(other, join="inner")
         if len(this) == 0:
             return np.nan
+    
+        if method in ("spearman", "kendall"):
+            if this.dtype == "category" and this.cat.ordered:
+                this = this.cat.codes.replace(-1, np.nan)
+            if other.dtype == "category" and other.cat.ordered:
+                other = other.cat.codes.replace(-1, np.nan)
 
         this_values = this.to_numpy(dtype=float, na_value=np.nan, copy=False)
         other_values = other.to_numpy(dtype=float, na_value=np.nan, copy=False)
diff --git a/pandas/tests/series/methods/test_cov_corr.py b/pandas/tests/series/methods/test_cov_corr.py
@@ -11,7 +11,6 @@
 )
 import pandas._testing as tm
 
-
 class TestSeriesCov:
     def test_cov(self, datetime_series):
         # full overlap
@@ -184,3 +183,31 @@ def test_corr_callable_method(self, datetime_series):
         df = pd.DataFrame([s1, s2])
         expected = pd.DataFrame([{0: 1.0, 1: 0}, {0: 0, 1: 1.0}])
         tm.assert_almost_equal(df.transpose().corr(method=my_corr), expected)
+    
+    @pytest.mark.parametrize("method", ["kendall", "spearman"])
+    def test_corr_rank_ordered_categorical(self, method,):
+        stats = pytest.importorskip("scipy.stats")
+        method_scipy_func = {
+            "kendall": stats.kendalltau,
+            "spearman": stats.spearmanr
+        }
+        ser_ord_cat = pd.Series( pd.Categorical(
+             ["low", "m", "h", "vh"], 
+             categories=["low", "m", "h", "vh"], ordered=True
+             ))
+        ser_ord_cat_codes = ser_ord_cat.cat.codes.replace(-1, np.nan)
+        ser_ord_int = pd.Series([0, 1, 2, 3])
+        ser_ord_float = pd.Series([2.0, 3.0, 4.5, 6.5])
+    
+        corr_calc = ser_ord_cat.corr(ser_ord_int, method=method)
+        corr_expected = method_scipy_func[method](ser_ord_cat_codes, ser_ord_int)[0]
+        tm.assert_almost_equal(corr_calc, corr_expected)
+
+        corr_calc = ser_ord_cat.corr(ser_ord_float, method=method)
+        corr_expected = method_scipy_func[method](ser_ord_cat_codes, ser_ord_float)[0]
+        tm.assert_almost_equal(corr_calc, corr_expected)
+
+        corr_calc = ser_ord_cat.corr(ser_ord_cat, method=method)
+        corr_expected = method_scipy_func[method](ser_ord_cat_codes, ser_ord_cat_codes)[0]
+        tm.assert_almost_equal(corr_calc, corr_expected)
+        
diff --git a/test_corr.py b/test_corr.py
@@ -0,0 +1,23 @@
+import pandas as pd
+df = pd.DataFrame({'a' : [1, 2, 3, 4], 'b' : [4, 3, 2, 1]})
+df['b'] = df['b'].astype('category').cat.set_categories([4, 3, 2, 1], ordered=True)
+#import pdb; pdb.set_trace()
+crr = df.corr(method='spearman')
+print(crr)
+
+
+df = pd.DataFrame({'a' : [1, 2, 3, 4], 'b' : ["vh", "h", "m", "l"]})
+df['b'] = df['b'].astype('category').cat.set_categories(["vh", "h", "m", "l"], ordered=True)
+#import pdb; pdb.set_trace()
+print(df)
+print(df.dtypes)
+crr = df.corr(method='spearman')
+print(crr)
+
+ser_ord_cat = pd.Series( pd.Categorical(
+             ["vh", "h", "m", "low"], 
+             categories=["vh", "h", "m", "low"], ordered=True
+             ))
+print(ser_ord_cat)
+crr = ser_ord_cat.corr(ser_ord_cat, method='spearman')
+print(crr)