Skip to content

Commit 5b34838

Browse files
limit change to default str dtype
1 parent ff5d50c commit 5b34838

File tree

2 files changed

+31
-1
lines changed

2 files changed

+31
-1
lines changed

pandas/core/frame.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@
144144
TimedeltaArray,
145145
)
146146
from pandas.core.arrays.sparse import SparseFrameAccessor
147+
from pandas.core.arrays.string_ import StringDtype
147148
from pandas.core.construction import (
148149
ensure_wrapped_if_datetimelike,
149150
sanitize_array,
@@ -5087,7 +5088,12 @@ def dtype_predicate(dtype: DtypeObj, dtypes_set) -> bool:
50875088
and getattr(dtype, "_is_numeric", False)
50885089
and not is_bool_dtype(dtype)
50895090
)
5090-
or (dtype.type is str and np.object_ in dtypes_set)
5091+
# backwards compat for the default `str` dtype being selected by object
5092+
or (
5093+
isinstance(dtype, StringDtype)
5094+
and dtype.na_value is np.nan
5095+
and np.object_ in dtypes_set
5096+
)
50915097
)
50925098

50935099
def predicate(arr: ArrayLike) -> bool:

pandas/tests/frame/methods/test_select_dtypes.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -485,3 +485,27 @@ def test_select_dtypes_no_view(self):
485485
result = df.select_dtypes(include=["number"])
486486
result.iloc[0, 0] = 0
487487
tm.assert_frame_equal(df, df_orig)
488+
489+
def test_select_dtype_object_and_str(self, using_infer_string):
490+
# https://github.com/pandas-dev/pandas/issues/61916
491+
df = DataFrame(
492+
{
493+
"a": ["a", "b", "c"],
494+
"b": [1, 2, 3],
495+
"c": pd.array(["a", "b", "c"], dtype="string"),
496+
}
497+
)
498+
499+
# with "object" -> only select the object or default str dtype column
500+
result = df.select_dtypes(include=["object"])
501+
expected = df[["a"]]
502+
tm.assert_frame_equal(result, expected)
503+
504+
# with "string" -> select both the default 'str' and the nullable 'string'
505+
result = df.select_dtypes(include=["string"])
506+
if using_infer_string:
507+
expected = df[["a", "c"]]
508+
else:
509+
expected = df[["c"]]
510+
expected = df[["a", "c"]]
511+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)