Skip to content

Commit bea5166

Browse files
committed
DEPR: rename filter->select
1 parent 56a85b2 commit bea5166

File tree

4 files changed

+112
-64
lines changed

4 files changed

+112
-64
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -715,6 +715,7 @@ Other Deprecations
715715
- Deprecated the ``arg`` parameter of ``Series.map``; pass the added ``func`` argument instead. (:issue:`61260`)
716716
- Deprecated using ``epoch`` date format in :meth:`DataFrame.to_json` and :meth:`Series.to_json`, use ``iso`` instead. (:issue:`57063`)
717717
- Deprecated allowing ``fill_value`` that cannot be held in the original dtype (excepting NA values for integer and bool dtypes) in :meth:`Series.unstack` and :meth:`DataFrame.unstack` (:issue:`12189`, :issue:`53868`)
718+
- Deprecated :meth:`Series.filter` and :meth:`DataFrame.filter`, renaming these to ``select`` (:issue:`26642`)
718719
- Deprecated allowing ``fill_value`` that cannot be held in the original dtype (excepting NA values for integer and bool dtypes) in :meth:`Series.shift` and :meth:`DataFrame.shift` (:issue:`53802`)
719720
- Deprecated slicing on a :class:`Series` or :class:`DataFrame` with a :class:`DatetimeIndex` using a ``datetime.date`` object, explicitly cast to :class:`Timestamp` instead (:issue:`35830`)
720721

pandas/core/generic.py

Lines changed: 41 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5520,7 +5520,8 @@ def _reindex_with_indexers(
55205520
self
55215521
)
55225522

5523-
def filter(
5523+
@final
5524+
def select(
55245525
self,
55255526
items=None,
55265527
like: str | None = None,
@@ -5530,9 +5531,9 @@ def filter(
55305531
"""
55315532
Subset the DataFrame or Series according to the specified index labels.
55325533
5533-
For DataFrame, filter rows or columns depending on ``axis`` argument.
5534-
Note that this routine does not filter based on content.
5535-
The filter is applied to the labels of the index.
5534+
For DataFrame, select rows or columns depending on ``axis`` argument.
5535+
Note that this routine does not select based on content.
5536+
The select is applied to the labels of the index.
55365537
55375538
Parameters
55385539
----------
@@ -5551,7 +5552,7 @@ def filter(
55515552
Returns
55525553
-------
55535554
Same type as caller
5554-
The filtered subset of the DataFrame or Series.
5555+
The selected subset of the DataFrame or Series.
55555556
55565557
See Also
55575558
--------
@@ -5579,22 +5580,54 @@ def filter(
55795580
rabbit 4 5 6
55805581
55815582
>>> # select columns by name
5582-
>>> df.filter(items=["one", "three"])
5583+
>>> df.select(items=["one", "three"])
55835584
one three
55845585
mouse 1 3
55855586
rabbit 4 6
55865587
55875588
>>> # select columns by regular expression
5588-
>>> df.filter(regex="e$", axis=1)
5589+
>>> df.select(regex="e$", axis=1)
55895590
one three
55905591
mouse 1 3
55915592
rabbit 4 6
55925593
55935594
>>> # select rows containing 'bbi'
5594-
>>> df.filter(like="bbi", axis=0)
5595+
>>> df.select(like="bbi", axis=0)
55955596
one two three
55965597
rabbit 4 5 6
55975598
"""
5599+
5600+
return self._filter(items=items, like=like, regex=regex, axis=axis)
5601+
5602+
@final
5603+
def filter(
5604+
self,
5605+
items=None,
5606+
like: str | None = None,
5607+
regex: str | None = None,
5608+
axis: Axis | None = None,
5609+
) -> Self:
5610+
"""
5611+
Use obj.select instead.
5612+
5613+
.. deprecated:: 3.0.0
5614+
"""
5615+
warnings.warn(
5616+
f"{type(self).__name__}.filter is deprecated and will be removed "
5617+
"in a future version. Use obj.select instead.",
5618+
Pandas4Warning,
5619+
stacklevel=find_stack_level(),
5620+
)
5621+
return self._filter(items=items, like=like, regex=regex, axis=axis)
5622+
5623+
@final
5624+
def _filter(
5625+
self,
5626+
items=None,
5627+
like: str | None = None,
5628+
regex: str | None = None,
5629+
axis: Axis | None = None,
5630+
) -> Self:
55985631
nkw = common.count_not_none(items, like, regex)
55995632
if nkw > 1:
56005633
raise TypeError(

pandas/tests/copy_view/test_methods.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -365,14 +365,14 @@ def test_select_dtypes():
365365

366366

367367
@pytest.mark.parametrize(
368-
"filter_kwargs", [{"items": ["a"]}, {"like": "a"}, {"regex": "a"}]
368+
"select_kwargs", [{"items": ["a"]}, {"like": "a"}, {"regex": "a"}]
369369
)
370-
def test_filter(filter_kwargs):
371-
# Case: selecting columns using `filter()` returns a new dataframe
370+
def test_select(select_kwargs):
371+
# Case: selecting columns using `select_kwargs()` returns a new dataframe
372372
# + afterwards modifying the result
373373
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
374374
df_orig = df.copy()
375-
df2 = df.filter(**filter_kwargs)
375+
df2 = df.select(**select_kwargs)
376376
assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
377377

378378
# mutating df2 triggers a copy-on-write for that column/block
Lines changed: 66 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -1,99 +1,101 @@
11
import numpy as np
22
import pytest
33

4+
from pandas.errors import Pandas4Warning
5+
46
import pandas as pd
57
from pandas import DataFrame
68
import pandas._testing as tm
79

810

9-
class TestDataFrameFilter:
10-
def test_filter(self, float_frame, float_string_frame):
11+
class TestDataFrameSelect:
12+
def test_select(self, float_frame, float_string_frame):
1113
# Items
12-
filtered = float_frame.filter(["A", "B", "E"])
13-
assert len(filtered.columns) == 2
14-
assert "E" not in filtered
14+
selected = float_frame.select(["A", "B", "E"])
15+
assert len(selected.columns) == 2
16+
assert "E" not in selected
1517

16-
filtered = float_frame.filter(["A", "B", "E"], axis="columns")
17-
assert len(filtered.columns) == 2
18-
assert "E" not in filtered
18+
selected = float_frame.select(["A", "B", "E"], axis="columns")
19+
assert len(selected.columns) == 2
20+
assert "E" not in selected
1921

2022
# Other axis
2123
idx = float_frame.index[0:4]
22-
filtered = float_frame.filter(idx, axis="index")
24+
selected = float_frame.select(idx, axis="index")
2325
expected = float_frame.reindex(index=idx)
24-
tm.assert_frame_equal(filtered, expected)
26+
tm.assert_frame_equal(selected, expected)
2527

2628
# like
2729
fcopy = float_frame.copy()
2830
fcopy["AA"] = 1
2931

30-
filtered = fcopy.filter(like="A")
31-
assert len(filtered.columns) == 2
32-
assert "AA" in filtered
32+
selected = fcopy.select(like="A")
33+
assert len(selected.columns) == 2
34+
assert "AA" in selected
3335

3436
# like with ints in column names
3537
df = DataFrame(0.0, index=[0, 1, 2], columns=[0, 1, "_A", "_B"])
36-
filtered = df.filter(like="_")
37-
assert len(filtered.columns) == 2
38+
selected = df.select(like="_")
39+
assert len(selected.columns) == 2
3840

3941
# regex with ints in column names
4042
# from PR #10384
4143
df = DataFrame(0.0, index=[0, 1, 2], columns=["A1", 1, "B", 2, "C"])
4244
expected = DataFrame(
4345
0.0, index=[0, 1, 2], columns=pd.Index([1, 2], dtype=object)
4446
)
45-
filtered = df.filter(regex="^[0-9]+$")
46-
tm.assert_frame_equal(filtered, expected)
47+
selected = df.select(regex="^[0-9]+$")
48+
tm.assert_frame_equal(selected, expected)
4749

4850
expected = DataFrame(0.0, index=[0, 1, 2], columns=[0, "0", 1, "1"])
4951
# shouldn't remove anything
50-
filtered = expected.filter(regex="^[0-9]+$")
51-
tm.assert_frame_equal(filtered, expected)
52+
selected = expected.select(regex="^[0-9]+$")
53+
tm.assert_frame_equal(selected, expected)
5254

5355
# pass in None
5456
with pytest.raises(TypeError, match="Must pass"):
55-
float_frame.filter()
57+
float_frame.select()
5658
with pytest.raises(TypeError, match="Must pass"):
57-
float_frame.filter(items=None)
59+
float_frame.select(items=None)
5860
with pytest.raises(TypeError, match="Must pass"):
59-
float_frame.filter(axis=1)
61+
float_frame.select(axis=1)
6062

6163
# test mutually exclusive arguments
6264
with pytest.raises(TypeError, match="mutually exclusive"):
63-
float_frame.filter(items=["one", "three"], regex="e$", like="bbi")
65+
float_frame.select(items=["one", "three"], regex="e$", like="bbi")
6466
with pytest.raises(TypeError, match="mutually exclusive"):
65-
float_frame.filter(items=["one", "three"], regex="e$", axis=1)
67+
float_frame.select(items=["one", "three"], regex="e$", axis=1)
6668
with pytest.raises(TypeError, match="mutually exclusive"):
67-
float_frame.filter(items=["one", "three"], regex="e$")
69+
float_frame.select(items=["one", "three"], regex="e$")
6870
with pytest.raises(TypeError, match="mutually exclusive"):
69-
float_frame.filter(items=["one", "three"], like="bbi", axis=0)
71+
float_frame.select(items=["one", "three"], like="bbi", axis=0)
7072
with pytest.raises(TypeError, match="mutually exclusive"):
71-
float_frame.filter(items=["one", "three"], like="bbi")
73+
float_frame.select(items=["one", "three"], like="bbi")
7274

7375
# objects
74-
filtered = float_string_frame.filter(like="foo")
75-
assert "foo" in filtered
76+
selected = float_string_frame.select(like="foo")
77+
assert "foo" in selected
7678

7779
# unicode columns, won't ascii-encode
7880
df = float_frame.rename(columns={"B": "\u2202"})
79-
filtered = df.filter(like="C")
80-
assert "C" in filtered
81+
selected = df.select(like="C")
82+
assert "C" in selected
8183

82-
def test_filter_regex_search(self, float_frame):
84+
def test_select_regex_search(self, float_frame):
8385
fcopy = float_frame.copy()
8486
fcopy["AA"] = 1
8587

8688
# regex
87-
filtered = fcopy.filter(regex="[A]+")
88-
assert len(filtered.columns) == 2
89-
assert "AA" in filtered
89+
selected = fcopy.select(regex="[A]+")
90+
assert len(selected.columns) == 2
91+
assert "AA" in selected
9092

9193
# doesn't have to be at beginning
9294
df = DataFrame(
9395
{"aBBa": [1, 2], "BBaBB": [1, 2], "aCCa": [1, 2], "aCCaBB": [1, 2]}
9496
)
9597

96-
result = df.filter(regex="BB")
98+
result = df.select(regex="BB")
9799
exp = df[[x for x in df.columns if "BB" in x]]
98100
tm.assert_frame_equal(result, exp)
99101

@@ -104,50 +106,62 @@ def test_filter_regex_search(self, float_frame):
104106
("あ", {"あ": [3, 4]}),
105107
],
106108
)
107-
def test_filter_unicode(self, name, expected_data):
109+
def test_select_unicode(self, name, expected_data):
108110
# GH13101
109111
df = DataFrame({"a": [1, 2], "あ": [3, 4]})
110112
expected = DataFrame(expected_data)
111113

112-
tm.assert_frame_equal(df.filter(like=name), expected)
113-
tm.assert_frame_equal(df.filter(regex=name), expected)
114+
tm.assert_frame_equal(df.select(like=name), expected)
115+
tm.assert_frame_equal(df.select(regex=name), expected)
114116

115-
def test_filter_bytestring(self):
117+
def test_select_bytestring(self):
116118
# GH13101
117119
name = "a"
118120
df = DataFrame({b"a": [1, 2], b"b": [3, 4]})
119121
expected = DataFrame({b"a": [1, 2]})
120122

121-
tm.assert_frame_equal(df.filter(like=name), expected)
122-
tm.assert_frame_equal(df.filter(regex=name), expected)
123+
tm.assert_frame_equal(df.select(like=name), expected)
124+
tm.assert_frame_equal(df.select(regex=name), expected)
123125

124-
def test_filter_corner(self):
126+
def test_select_corner(self):
125127
empty = DataFrame()
126128

127-
result = empty.filter([])
129+
result = empty.select([])
128130
tm.assert_frame_equal(result, empty)
129131

130-
result = empty.filter(like="foo")
132+
result = empty.select(like="foo")
131133
tm.assert_frame_equal(result, empty)
132134

133-
def test_filter_regex_non_string(self):
134-
# GH#5798 trying to filter on non-string columns should drop,
135+
def test_select_regex_non_string(self):
136+
# GH#5798 trying to select on non-string columns should drop,
135137
# not raise
136138
df = DataFrame(np.random.default_rng(2).random((3, 2)), columns=["STRING", 123])
137-
result = df.filter(regex="STRING")
139+
result = df.select(regex="STRING")
138140
expected = df[["STRING"]]
139141
tm.assert_frame_equal(result, expected)
140142

141-
def test_filter_keep_order(self):
143+
def test_select_keep_order(self):
142144
# GH#54980
143145
df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
144-
result = df.filter(items=["B", "A"])
146+
result = df.select(items=["B", "A"])
145147
expected = df[["B", "A"]]
146148
tm.assert_frame_equal(result, expected)
147149

148-
def test_filter_different_dtype(self):
150+
def test_select_different_dtype(self):
149151
# GH#54980
150152
df = DataFrame({1: [1, 2, 3], 2: [4, 5, 6]})
151-
result = df.filter(items=["B", "A"])
153+
result = df.select(items=["B", "A"])
152154
expected = df[[]]
153155
tm.assert_frame_equal(result, expected)
156+
157+
def test_filter_deprecated(self):
158+
# GH#26642
159+
df = DataFrame({1: [1, 2, 3], 2: [4, 5, 6]})
160+
msg = "DataFrame.filter is deprecated"
161+
with tm.assert_produces_warning(Pandas4Warning, match=msg):
162+
df.filter(items=["B", "A"])
163+
164+
ser = df[1]
165+
msg = "Series.filter is deprecated"
166+
with tm.assert_produces_warning(Pandas4Warning, match=msg):
167+
ser.filter([0, 1])

0 commit comments

Comments
 (0)