11"""
2- Tests that work on both the Python and C engines but do not have a
2+ Tests that work on the Python, C and PyArrow engines but do not have a
33specific classification into the other test modules.
44"""
55import codecs
2121from pandas import DataFrame
2222import pandas ._testing as tm
2323
24- pytestmark = pytest .mark .usefixtures ("pyarrow_skip" )
24+ xfail_pyarrow = pytest .mark .usefixtures ("pyarrow_xfail" )
25+ skip_pyarrow = pytest .mark .usefixtures ("pyarrow_skip" )
2526
2627
2728def test_empty_decimal_marker (all_parsers ):
@@ -33,10 +34,17 @@ def test_empty_decimal_marker(all_parsers):
3334 msg = "Only length-1 decimal markers supported"
3435 parser = all_parsers
3536
37+ if parser .engine == "pyarrow" :
38+ msg = (
39+ "only single character unicode strings can be "
40+ "converted to Py_UCS4, got length 0"
41+ )
42+
3643 with pytest .raises (ValueError , match = msg ):
3744 parser .read_csv (StringIO (data ), decimal = "" )
3845
3946
47+ @skip_pyarrow
4048def test_bad_stream_exception (all_parsers , csv_dir_path ):
4149 # see gh-13652
4250 #
@@ -57,6 +65,7 @@ def test_bad_stream_exception(all_parsers, csv_dir_path):
5765 parser .read_csv (stream )
5866
5967
68+ @skip_pyarrow
6069def test_malformed (all_parsers ):
6170 # see gh-6607
6271 parser = all_parsers
@@ -71,6 +80,7 @@ def test_malformed(all_parsers):
7180 parser .read_csv (StringIO (data ), header = 1 , comment = "#" )
7281
7382
83+ @skip_pyarrow
7484@pytest .mark .parametrize ("nrows" , [5 , 3 , None ])
7585def test_malformed_chunks (all_parsers , nrows ):
7686 data = """ignore
@@ -90,6 +100,7 @@ def test_malformed_chunks(all_parsers, nrows):
90100 reader .read (nrows )
91101
92102
103+ @skip_pyarrow
93104def test_catch_too_many_names (all_parsers ):
94105 # see gh-5156
95106 data = """\
@@ -109,6 +120,7 @@ def test_catch_too_many_names(all_parsers):
109120 parser .read_csv (StringIO (data ), header = 0 , names = ["a" , "b" , "c" , "d" ])
110121
111122
123+ @skip_pyarrow
112124@pytest .mark .parametrize ("nrows" , [0 , 1 , 2 , 3 , 4 , 5 ])
113125def test_raise_on_no_columns (all_parsers , nrows ):
114126 parser = all_parsers
@@ -147,6 +159,10 @@ def test_error_bad_lines(all_parsers):
147159 data = "a\n 1\n 1,2,3\n 4\n 5,6,7"
148160
149161 msg = "Expected 1 fields in line 3, saw 3"
162+
163+ if parser .engine == "pyarrow" :
164+ msg = "CSV parse error: Expected 1 columns, got 3: 1,2,3"
165+
150166 with pytest .raises (ParserError , match = msg ):
151167 parser .read_csv (StringIO (data ), on_bad_lines = "error" )
152168
@@ -156,9 +172,13 @@ def test_warn_bad_lines(all_parsers):
156172 parser = all_parsers
157173 data = "a\n 1\n 1,2,3\n 4\n 5,6,7"
158174 expected = DataFrame ({"a" : [1 , 4 ]})
175+ match_msg = "Skipping line"
176+
177+ if parser .engine == "pyarrow" :
178+ match_msg = "Expected 1 columns, but found 3: 1,2,3"
159179
160180 with tm .assert_produces_warning (
161- ParserWarning , match = "Skipping line" , check_stacklevel = False
181+ ParserWarning , match = match_msg , check_stacklevel = False
162182 ):
163183 result = parser .read_csv (StringIO (data ), on_bad_lines = "warn" )
164184 tm .assert_frame_equal (result , expected )
@@ -174,10 +194,14 @@ def test_read_csv_wrong_num_columns(all_parsers):
174194 parser = all_parsers
175195 msg = "Expected 6 fields in line 3, saw 7"
176196
197+ if parser .engine == "pyarrow" :
198+ msg = "Expected 6 columns, got 7: 6,7,8,9,10,11,12"
199+
177200 with pytest .raises (ParserError , match = msg ):
178201 parser .read_csv (StringIO (data ))
179202
180203
204+ @skip_pyarrow
181205def test_null_byte_char (request , all_parsers ):
182206 # see gh-2741
183207 data = "\x00 ,foo"
@@ -200,6 +224,7 @@ def test_null_byte_char(request, all_parsers):
200224 parser .read_csv (StringIO (data ), names = names )
201225
202226
227+ @skip_pyarrow
203228@pytest .mark .filterwarnings ("always::ResourceWarning" )
204229def test_open_file (request , all_parsers ):
205230 # GH 39024
@@ -238,6 +263,8 @@ def test_bad_header_uniform_error(all_parsers):
238263 "Could not construct index. Requested to use 1 "
239264 "number of columns, but 3 left to parse."
240265 )
266+ elif parser .engine == "pyarrow" :
267+ msg = "CSV parse error: Expected 1 columns, got 4: col1,col2,col3,col4"
241268
242269 with pytest .raises (ParserError , match = msg ):
243270 parser .read_csv (StringIO (data ), index_col = 0 , on_bad_lines = "error" )
@@ -253,9 +280,13 @@ def test_on_bad_lines_warn_correct_formatting(all_parsers):
253280a,b
254281"""
255282 expected = DataFrame ({"1" : "a" , "2" : ["b" ] * 2 })
283+ match_msg = "Skipping line"
284+
285+ if parser .engine == "pyarrow" :
286+ match_msg = "Expected 2 columns, but found 3: a,b,c"
256287
257288 with tm .assert_produces_warning (
258- ParserWarning , match = "Skipping line" , check_stacklevel = False
289+ ParserWarning , match = match_msg , check_stacklevel = False
259290 ):
260291 result = parser .read_csv (StringIO (data ), on_bad_lines = "warn" )
261292 tm .assert_frame_equal (result , expected )
0 commit comments