Skip to content

Commit b90726f

Browse files
pre commit fixes v2
1 parent e069810 commit b90726f

File tree

5 files changed

+158
-62
lines changed

5 files changed

+158
-62
lines changed

pandas/core/frame.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11670,7 +11670,6 @@ def corr(
1167011670
if method in ("spearman", "kendall"):
1167111671
data = data._transform_ord_cat_cols_to_coded_cols()
1167211672

11673-
1167411673
mat = data.to_numpy(dtype=float, na_value=np.nan, copy=False)
1167511674

1167611675
if method == "pearson":
@@ -11966,6 +11965,7 @@ def corrwith(
1196611965
elif method in ["kendall", "spearman"] or callable(method):
1196711966
left = left._transform_ord_cat_cols_to_coded_cols()
1196811967
right = right._transform_ord_cat_cols_to_coded_cols()
11968+
1196911969
def c(x):
1197011970
return nanops.nancorr(x[0], x[1], method=method)
1197111971

@@ -11998,8 +11998,8 @@ def c(x):
1199811998

1199911999
def _transform_ord_cat_cols_to_coded_cols(self) -> DataFrame:
1200012000
"""
12001-
any ordered categorical columns are transformed to the respectice caregorical codes
12002-
other columns remain untouched
12001+
any ordered categorical columns are transformed to the respective
12002+
categorical codes while other columns remain untouched
1200312003
"""
1200412004
categ = self.select_dtypes("category")
1200512005
if len(categ.columns) == 0:

pandas/core/series.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2686,7 +2686,7 @@ def corr(
26862686
this, other = self.align(other, join="inner")
26872687
if len(this) == 0:
26882688
return np.nan
2689-
2689+
26902690
if method in ("spearman", "kendall"):
26912691
if this.dtype == "category" and this.cat.ordered:
26922692
this = this.cat.codes.replace(-1, np.nan)

pandas/tests/frame/methods/test_cov_corr.py

Lines changed: 80 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from itertools import combinations
2+
23
import numpy as np
34
import pytest
45

@@ -252,24 +253,45 @@ def test_corr_numeric_only(self, meth, numeric_only):
252253
else:
253254
with pytest.raises(ValueError, match="could not convert string to float"):
254255
df.corr(meth, numeric_only=numeric_only)
255-
256+
256257
@pytest.mark.parametrize("method", ["kendall", "spearman"])
257-
def test_corr_rank_ordered_categorical(self, method,):
258+
def test_corr_rank_ordered_categorical(
259+
self,
260+
method,
261+
):
258262
df = DataFrame(
259263
{
260-
"ord_cat": pd.Series(pd.Categorical(["low", "m", "h", "vh"], categories=["low", "m", "h", "vh"], ordered=True)),
261-
"ord_cat_none": pd.Series(pd.Categorical(["low", "m", "h", None], categories=["low", "m", "h"], ordered=True)),
262-
"ord_int": pd.Series([0, 1, 2, 3]),
263-
"ord_float": pd.Series([2.0, 3.0, 4.5, 6.5]),
264-
"ord_float_nan": pd.Series([2.0, 3.0, 4.5, np.nan]),
265-
"ord_cat_shuff": pd.Series(pd.Categorical(["m", "h", "vh", "low"], categories=["low", "m", "h", "vh"], ordered=True)),
264+
"ord_cat": Series(
265+
pd.Categorical(
266+
["low", "m", "h", "vh"],
267+
categories=["low", "m", "h", "vh"],
268+
ordered=True,
269+
)
270+
),
271+
"ord_cat_none": Series(
272+
pd.Categorical(
273+
["low", "m", "h", None],
274+
categories=["low", "m", "h"],
275+
ordered=True,
276+
)
277+
),
278+
"ord_int": Series([0, 1, 2, 3]),
279+
"ord_float": Series([2.0, 3.0, 4.5, 6.5]),
280+
"ord_float_nan": Series([2.0, 3.0, 4.5, np.nan]),
281+
"ord_cat_shuff": Series(
282+
pd.Categorical(
283+
["m", "h", "vh", "low"],
284+
categories=["low", "m", "h", "vh"],
285+
ordered=True,
286+
)
287+
),
288+
"ord_int_shuff": Series([2, 3, 0, 1]),
266289
}
267290
)
268291
corr_calc = df.corr(method=method)
269-
for col1, col2 in combinations(["ord_cat", "ord_int", "ord_float"], r=2):
270-
expected = df[col1].corr(df[col2], method=method)
271-
tm.assert_almost_equal(corr_calc[col1][col2], expected)
272-
292+
for col1, col2 in combinations(df.columns, r=2):
293+
corr_expected = df[col1].corr(df[col2], method=method)
294+
tm.assert_almost_equal(corr_calc[col1][col2], corr_expected)
273295

274296

275297
class TestDataFrameCorrWith:
@@ -512,3 +534,49 @@ def test_cov_with_missing_values(self):
512534
result2 = df.dropna().cov()
513535
tm.assert_frame_equal(result1, expected)
514536
tm.assert_frame_equal(result2, expected)
537+
538+
@pytest.mark.parametrize("method", ["kendall", "spearman"])
539+
def test_corr_rank_ordered_categorical(
540+
self,
541+
method,
542+
):
543+
df1 = DataFrame(
544+
{
545+
"a": Series(
546+
pd.Categorical(
547+
["low", "m", "h", "vh"],
548+
categories=["low", "m", "h", "vh"],
549+
ordered=True,
550+
)
551+
),
552+
"b": Series(
553+
pd.Categorical(
554+
["low", "m", "h", None],
555+
categories=["low", "m", "h"],
556+
ordered=True,
557+
)
558+
),
559+
"c": Series([0, 1, 2, 3]),
560+
"d": Series([2.0, 3.0, 4.5, 6.5]),
561+
}
562+
)
563+
564+
df2 = DataFrame(
565+
{
566+
"a": Series([2.0, 3.0, 4.5, np.nan]),
567+
"b": Series(
568+
pd.Categorical(
569+
["m", "h", "vh", "low"],
570+
categories=["low", "m", "h", "vh"],
571+
ordered=True,
572+
)
573+
),
574+
"c": Series([2, 3, 0, 1]),
575+
"d": Series([2.0, 3.0, 4.5, 6.5]),
576+
}
577+
)
578+
579+
corr_calc = df1.corrwith(df2, method=method)
580+
for col in df1.columns:
581+
corr_expected = df1[col].corr(df2[col], method=method)
582+
tm.assert_almost_equal(corr_calc.get(col), corr_expected)

pandas/tests/series/methods/test_cov_corr.py

Lines changed: 56 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
)
1212
import pandas._testing as tm
1313

14+
1415
class TestSeriesCov:
1516
def test_cov(self, datetime_series):
1617
# full overlap
@@ -183,54 +184,77 @@ def test_corr_callable_method(self, datetime_series):
183184
df = pd.DataFrame([s1, s2])
184185
expected = pd.DataFrame([{0: 1.0, 1: 0}, {0: 0, 1: 1.0}])
185186
tm.assert_almost_equal(df.transpose().corr(method=my_corr), expected)
186-
187+
187188
@pytest.mark.parametrize("method", ["kendall", "spearman"])
188-
def test_corr_rank_ordered_categorical(self, method,):
189+
def test_corr_rank_ordered_categorical(
190+
self,
191+
method,
192+
):
189193
stats = pytest.importorskip("scipy.stats")
190-
method_scipy_func = {
191-
"kendall": stats.kendalltau,
192-
"spearman": stats.spearmanr
193-
}
194-
ser_ord_cat = pd.Series( pd.Categorical(
195-
["low", "med", "high", "very_high"],
196-
categories=["low", "med", "high", "very_high"], ordered=True
197-
))
194+
method_scipy_func = {"kendall": stats.kendalltau, "spearman": stats.spearmanr}
195+
ser_ord_cat = Series(
196+
pd.Categorical(
197+
["low", "med", "high", "very_high"],
198+
categories=["low", "med", "high", "very_high"],
199+
ordered=True,
200+
)
201+
)
198202
ser_ord_cat_codes = ser_ord_cat.cat.codes.replace(-1, np.nan)
199-
ser_ord_int = pd.Series([0, 1, 2, 3])
200-
ser_ord_float = pd.Series([2.0, 3.0, 4.5, 6.5])
201-
203+
ser_ord_int = Series([0, 1, 2, 3])
204+
ser_ord_float = Series([2.0, 3.0, 4.5, 6.5])
205+
202206
corr_calc = ser_ord_cat.corr(ser_ord_int, method=method)
203-
corr_expected = method_scipy_func[method](ser_ord_cat_codes, ser_ord_int, nan_policy="omit")[0]
207+
corr_expected = method_scipy_func[method](
208+
ser_ord_cat_codes, ser_ord_int, nan_policy="omit"
209+
)[0]
204210
tm.assert_almost_equal(corr_calc, corr_expected)
205211

206212
corr_calc = ser_ord_cat.corr(ser_ord_float, method=method)
207-
corr_expected = method_scipy_func[method](ser_ord_cat_codes, ser_ord_float, nan_policy="omit")[0]
213+
corr_expected = method_scipy_func[method](
214+
ser_ord_cat_codes, ser_ord_float, nan_policy="omit"
215+
)[0]
208216
tm.assert_almost_equal(corr_calc, corr_expected)
209217

210218
corr_calc = ser_ord_cat.corr(ser_ord_cat, method=method)
211-
corr_expected = method_scipy_func[method](ser_ord_cat_codes, ser_ord_cat_codes, nan_policy="omit")[0]
219+
corr_expected = method_scipy_func[method](
220+
ser_ord_cat_codes, ser_ord_cat_codes, nan_policy="omit"
221+
)[0]
212222
tm.assert_almost_equal(corr_calc, corr_expected)
213223

214-
ser_ord_cat_shuff = pd.Series( pd.Categorical(
215-
["high", "low", "very_high", "med"],
216-
categories=["low", "med", "high", "very_high"], ordered=True
217-
))
224+
ser_ord_cat_shuff = Series(
225+
pd.Categorical(
226+
["high", "low", "very_high", "med"],
227+
categories=["low", "med", "high", "very_high"],
228+
ordered=True,
229+
)
230+
)
218231
ser_ord_cat_shuff_codes = ser_ord_cat_shuff.cat.codes.replace(-1, np.nan)
219-
232+
220233
corr_calc = ser_ord_cat_shuff.corr(ser_ord_cat, method=method)
221-
corr_expected = method_scipy_func[method](ser_ord_cat_shuff_codes, ser_ord_cat_codes, nan_policy="omit")[0]
234+
corr_expected = method_scipy_func[method](
235+
ser_ord_cat_shuff_codes, ser_ord_cat_codes, nan_policy="omit"
236+
)[0]
222237
tm.assert_almost_equal(corr_calc, corr_expected)
223238

224239
corr_calc = ser_ord_cat_shuff.corr(ser_ord_cat_shuff, method=method)
225-
corr_expected = method_scipy_func[method](ser_ord_cat_shuff_codes, ser_ord_cat_shuff_codes, nan_policy="omit")[0]
240+
corr_expected = method_scipy_func[method](
241+
ser_ord_cat_shuff_codes, ser_ord_cat_shuff_codes, nan_policy="omit"
242+
)[0]
226243
tm.assert_almost_equal(corr_calc, corr_expected)
227-
228-
ser_ord_cat_with_nan = pd.Series( pd.Categorical(
229-
["h", "low", "vh", None, "m"],
230-
categories=["low", "m", "h", "vh"], ordered=True
231-
))
232-
ser_ord_cat_shuff_with_nan_codes = ser_ord_cat_with_nan.cat.codes.replace(-1, np.nan)
233-
ser_ord_int = pd.Series([2, 0, 1, 3, None])
244+
245+
ser_ord_cat_with_nan = Series(
246+
pd.Categorical(
247+
["h", "low", "vh", None, "m"],
248+
categories=["low", "m", "h", "vh"],
249+
ordered=True,
250+
)
251+
)
252+
ser_ord_cat_shuff_with_nan_codes = ser_ord_cat_with_nan.cat.codes.replace(
253+
-1, np.nan
254+
)
255+
ser_ord_int = Series([2, 0, 1, 3, None])
234256
corr_calc = ser_ord_cat_with_nan.corr(ser_ord_int, method=method)
235-
corr_expected = method_scipy_func[method](ser_ord_cat_shuff_with_nan_codes, ser_ord_int, nan_policy="omit")[0]
236-
tm.assert_almost_equal(corr_calc, corr_expected)
257+
corr_expected = method_scipy_func[method](
258+
ser_ord_cat_shuff_with_nan_codes, ser_ord_int, nan_policy="omit"
259+
)[0]
260+
tm.assert_almost_equal(corr_calc, corr_expected)

test_corr.py

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,27 @@
11
import pandas as pd
2-
df = pd.DataFrame({'a' : [1, 2, 3, 4], 'b' : [4, 3, 2, 1]})
3-
df['b'] = df['b'].astype('category').cat.set_categories([4, 3, 2, 1], ordered=True)
4-
#import pdb; pdb.set_trace()
5-
crr = df.corr(method='spearman')
2+
3+
df = pd.DataFrame({"a": [1, 2, 3, 4], "b": [4, 3, 2, 1]})
4+
df["b"] = df["b"].astype("category").cat.set_categories([4, 3, 2, 1], ordered=True)
5+
# import pdb; pdb.set_trace()
6+
crr = df.corr(method="spearman")
67
print(crr)
78

89

9-
df = pd.DataFrame({'a' : [1, 2, 3, 4], 'b' : ["vh", "h", "m", "l"]})
10-
df['b'] = df['b'].astype('category').cat.set_categories(["vh", "h", "m", "l"], ordered=True)
11-
#import pdb; pdb.set_trace()
10+
df = pd.DataFrame({"a": [1, 2, 3, 4], "b": ["vh", "h", "m", "l"]})
11+
df["b"] = (
12+
df["b"].astype("category").cat.set_categories(["vh", "h", "m", "l"], ordered=True)
13+
)
14+
# import pdb; pdb.set_trace()
1215
print(df)
1316
print(df.dtypes)
14-
crr = df.corr(method='spearman')
17+
crr = df.corr(method="spearman")
1518
print(crr)
1619

17-
ser_ord_cat = pd.Series( pd.Categorical(
18-
["vh", "h", "m", "low"],
19-
categories=["vh", "h", "m", "low"], ordered=True
20-
))
20+
ser_ord_cat = pd.Series(
21+
pd.Categorical(
22+
["vh", "h", "m", "low"], categories=["vh", "h", "m", "low"], ordered=True
23+
)
24+
)
2125
print(ser_ord_cat)
22-
crr = ser_ord_cat.corr(ser_ord_cat, method='spearman')
23-
print(crr)
26+
crr = ser_ord_cat.corr(ser_ord_cat, method="spearman")
27+
print(crr)

0 commit comments

Comments
 (0)