11import numpy as np
22import pytest
33
4+ from pandas .errors import Pandas4Warning
5+
46import pandas as pd
57from pandas import DataFrame
68import pandas ._testing as tm
79
810
9- class TestDataFrameFilter :
10- def test_filter (self , float_frame , float_string_frame ):
11+ class TestDataFrameSelect :
12+ def test_select (self , float_frame , float_string_frame ):
1113 # Items
12- filtered = float_frame .filter (["A" , "B" , "E" ])
13- assert len (filtered .columns ) == 2
14- assert "E" not in filtered
14+ selected = float_frame .select (["A" , "B" , "E" ])
15+ assert len (selected .columns ) == 2
16+ assert "E" not in selected
1517
16- filtered = float_frame .filter (["A" , "B" , "E" ], axis = "columns" )
17- assert len (filtered .columns ) == 2
18- assert "E" not in filtered
18+ selected = float_frame .select (["A" , "B" , "E" ], axis = "columns" )
19+ assert len (selected .columns ) == 2
20+ assert "E" not in selected
1921
2022 # Other axis
2123 idx = float_frame .index [0 :4 ]
22- filtered = float_frame .filter (idx , axis = "index" )
24+ selected = float_frame .select (idx , axis = "index" )
2325 expected = float_frame .reindex (index = idx )
24- tm .assert_frame_equal (filtered , expected )
26+ tm .assert_frame_equal (selected , expected )
2527
2628 # like
2729 fcopy = float_frame .copy ()
2830 fcopy ["AA" ] = 1
2931
30- filtered = fcopy .filter (like = "A" )
31- assert len (filtered .columns ) == 2
32- assert "AA" in filtered
32+ selected = fcopy .select (like = "A" )
33+ assert len (selected .columns ) == 2
34+ assert "AA" in selected
3335
3436 # like with ints in column names
3537 df = DataFrame (0.0 , index = [0 , 1 , 2 ], columns = [0 , 1 , "_A" , "_B" ])
36- filtered = df .filter (like = "_" )
37- assert len (filtered .columns ) == 2
38+ selected = df .select (like = "_" )
39+ assert len (selected .columns ) == 2
3840
3941 # regex with ints in column names
4042 # from PR #10384
4143 df = DataFrame (0.0 , index = [0 , 1 , 2 ], columns = ["A1" , 1 , "B" , 2 , "C" ])
4244 expected = DataFrame (
4345 0.0 , index = [0 , 1 , 2 ], columns = pd .Index ([1 , 2 ], dtype = object )
4446 )
45- filtered = df .filter (regex = "^[0-9]+$" )
46- tm .assert_frame_equal (filtered , expected )
47+ selected = df .select (regex = "^[0-9]+$" )
48+ tm .assert_frame_equal (selected , expected )
4749
4850 expected = DataFrame (0.0 , index = [0 , 1 , 2 ], columns = [0 , "0" , 1 , "1" ])
4951 # shouldn't remove anything
50- filtered = expected .filter (regex = "^[0-9]+$" )
51- tm .assert_frame_equal (filtered , expected )
52+ selected = expected .select (regex = "^[0-9]+$" )
53+ tm .assert_frame_equal (selected , expected )
5254
5355 # pass in None
5456 with pytest .raises (TypeError , match = "Must pass" ):
55- float_frame .filter ()
57+ float_frame .select ()
5658 with pytest .raises (TypeError , match = "Must pass" ):
57- float_frame .filter (items = None )
59+ float_frame .select (items = None )
5860 with pytest .raises (TypeError , match = "Must pass" ):
59- float_frame .filter (axis = 1 )
61+ float_frame .select (axis = 1 )
6062
6163 # test mutually exclusive arguments
6264 with pytest .raises (TypeError , match = "mutually exclusive" ):
63- float_frame .filter (items = ["one" , "three" ], regex = "e$" , like = "bbi" )
65+ float_frame .select (items = ["one" , "three" ], regex = "e$" , like = "bbi" )
6466 with pytest .raises (TypeError , match = "mutually exclusive" ):
65- float_frame .filter (items = ["one" , "three" ], regex = "e$" , axis = 1 )
67+ float_frame .select (items = ["one" , "three" ], regex = "e$" , axis = 1 )
6668 with pytest .raises (TypeError , match = "mutually exclusive" ):
67- float_frame .filter (items = ["one" , "three" ], regex = "e$" )
69+ float_frame .select (items = ["one" , "three" ], regex = "e$" )
6870 with pytest .raises (TypeError , match = "mutually exclusive" ):
69- float_frame .filter (items = ["one" , "three" ], like = "bbi" , axis = 0 )
71+ float_frame .select (items = ["one" , "three" ], like = "bbi" , axis = 0 )
7072 with pytest .raises (TypeError , match = "mutually exclusive" ):
71- float_frame .filter (items = ["one" , "three" ], like = "bbi" )
73+ float_frame .select (items = ["one" , "three" ], like = "bbi" )
7274
7375 # objects
74- filtered = float_string_frame .filter (like = "foo" )
75- assert "foo" in filtered
76+ selected = float_string_frame .select (like = "foo" )
77+ assert "foo" in selected
7678
7779 # unicode columns, won't ascii-encode
7880 df = float_frame .rename (columns = {"B" : "\u2202 " })
79- filtered = df .filter (like = "C" )
80- assert "C" in filtered
81+ selected = df .select (like = "C" )
82+ assert "C" in selected
8183
82- def test_filter_regex_search (self , float_frame ):
84+ def test_select_regex_search (self , float_frame ):
8385 fcopy = float_frame .copy ()
8486 fcopy ["AA" ] = 1
8587
8688 # regex
87- filtered = fcopy .filter (regex = "[A]+" )
88- assert len (filtered .columns ) == 2
89- assert "AA" in filtered
89+ selected = fcopy .select (regex = "[A]+" )
90+ assert len (selected .columns ) == 2
91+ assert "AA" in selected
9092
9193 # doesn't have to be at beginning
9294 df = DataFrame (
9395 {"aBBa" : [1 , 2 ], "BBaBB" : [1 , 2 ], "aCCa" : [1 , 2 ], "aCCaBB" : [1 , 2 ]}
9496 )
9597
96- result = df .filter (regex = "BB" )
98+ result = df .select (regex = "BB" )
9799 exp = df [[x for x in df .columns if "BB" in x ]]
98100 tm .assert_frame_equal (result , exp )
99101
@@ -104,50 +106,62 @@ def test_filter_regex_search(self, float_frame):
104106 ("あ" , {"あ" : [3 , 4 ]}),
105107 ],
106108 )
107- def test_filter_unicode (self , name , expected_data ):
109+ def test_select_unicode (self , name , expected_data ):
108110 # GH13101
109111 df = DataFrame ({"a" : [1 , 2 ], "あ" : [3 , 4 ]})
110112 expected = DataFrame (expected_data )
111113
112- tm .assert_frame_equal (df .filter (like = name ), expected )
113- tm .assert_frame_equal (df .filter (regex = name ), expected )
114+ tm .assert_frame_equal (df .select (like = name ), expected )
115+ tm .assert_frame_equal (df .select (regex = name ), expected )
114116
115- def test_filter_bytestring (self ):
117+ def test_select_bytestring (self ):
116118 # GH13101
117119 name = "a"
118120 df = DataFrame ({b"a" : [1 , 2 ], b"b" : [3 , 4 ]})
119121 expected = DataFrame ({b"a" : [1 , 2 ]})
120122
121- tm .assert_frame_equal (df .filter (like = name ), expected )
122- tm .assert_frame_equal (df .filter (regex = name ), expected )
123+ tm .assert_frame_equal (df .select (like = name ), expected )
124+ tm .assert_frame_equal (df .select (regex = name ), expected )
123125
124- def test_filter_corner (self ):
126+ def test_select_corner (self ):
125127 empty = DataFrame ()
126128
127- result = empty .filter ([])
129+ result = empty .select ([])
128130 tm .assert_frame_equal (result , empty )
129131
130- result = empty .filter (like = "foo" )
132+ result = empty .select (like = "foo" )
131133 tm .assert_frame_equal (result , empty )
132134
133- def test_filter_regex_non_string (self ):
134- # GH#5798 trying to filter on non-string columns should drop,
135+ def test_select_regex_non_string (self ):
136+ # GH#5798 trying to select on non-string columns should drop,
135137 # not raise
136138 df = DataFrame (np .random .default_rng (2 ).random ((3 , 2 )), columns = ["STRING" , 123 ])
137- result = df .filter (regex = "STRING" )
139+ result = df .select (regex = "STRING" )
138140 expected = df [["STRING" ]]
139141 tm .assert_frame_equal (result , expected )
140142
141- def test_filter_keep_order (self ):
143+ def test_select_keep_order (self ):
142144 # GH#54980
143145 df = DataFrame ({"A" : [1 , 2 , 3 ], "B" : [4 , 5 , 6 ]})
144- result = df .filter (items = ["B" , "A" ])
146+ result = df .select (items = ["B" , "A" ])
145147 expected = df [["B" , "A" ]]
146148 tm .assert_frame_equal (result , expected )
147149
148- def test_filter_different_dtype (self ):
150+ def test_select_different_dtype (self ):
149151 # GH#54980
150152 df = DataFrame ({1 : [1 , 2 , 3 ], 2 : [4 , 5 , 6 ]})
151- result = df .filter (items = ["B" , "A" ])
153+ result = df .select (items = ["B" , "A" ])
152154 expected = df [[]]
153155 tm .assert_frame_equal (result , expected )
156+
157+ def test_filter_deprecated (self ):
158+ # GH#26642
159+ df = DataFrame ({1 : [1 , 2 , 3 ], 2 : [4 , 5 , 6 ]})
160+ msg = "DataFrame.filter is deprecated"
161+ with tm .assert_produces_warning (Pandas4Warning , match = msg ):
162+ df .filter (items = ["B" , "A" ])
163+
164+ ser = df [1 ]
165+ msg = "Series.filter is deprecated"
166+ with tm .assert_produces_warning (Pandas4Warning , match = msg ):
167+ ser .filter ([0 , 1 ])
0 commit comments