88import numpy as np
99import pytest
1010
11- from pandas ._config import using_string_dtype
12-
1311from pandas ._libs .parsers import STR_NA_VALUES
1412
1513from pandas import (
@@ -261,7 +259,6 @@ def test_na_value_dict_multi_index(all_parsers, index_col, expected):
261259 tm .assert_frame_equal (result , expected )
262260
263261
264- @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" , strict = False )
265262@pytest .mark .parametrize (
266263 "kwargs,expected" ,
267264 [
@@ -299,7 +296,9 @@ def test_na_value_dict_multi_index(all_parsers, index_col, expected):
299296 ),
300297 ],
301298)
302- def test_na_values_keep_default (all_parsers , kwargs , expected , request ):
299+ def test_na_values_keep_default (
300+ all_parsers , kwargs , expected , request , using_infer_string
301+ ):
303302 data = """\
304303 A,B,C
305304a,1,one
@@ -317,8 +316,9 @@ def test_na_values_keep_default(all_parsers, kwargs, expected, request):
317316 with pytest .raises (ValueError , match = msg ):
318317 parser .read_csv (StringIO (data ), ** kwargs )
319318 return
320- mark = pytest .mark .xfail ()
321- request .applymarker (mark )
319+ if not using_infer_string or "na_values" in kwargs :
320+ mark = pytest .mark .xfail ()
321+ request .applymarker (mark )
322322
323323 result = parser .read_csv (StringIO (data ), ** kwargs )
324324 expected = DataFrame (expected )
@@ -429,23 +429,28 @@ def test_no_keep_default_na_dict_na_values_diff_reprs(all_parsers, col_zero_na_v
429429 tm .assert_frame_equal (result , expected )
430430
431431
432- @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" , strict = False )
433- @xfail_pyarrow # mismatched dtypes in both cases, FutureWarning in the True case
434432@pytest .mark .parametrize (
435433 "na_filter,row_data" ,
436434 [
437435 (True , [[1 , "A" ], [np .nan , np .nan ], [3 , "C" ]]),
438436 (False , [["1" , "A" ], ["nan" , "B" ], ["3" , "C" ]]),
439437 ],
440438)
441- def test_na_values_na_filter_override (all_parsers , na_filter , row_data ):
439+ def test_na_values_na_filter_override (
440+ request , all_parsers , na_filter , row_data , using_infer_string
441+ ):
442+ parser = all_parsers
443+ if parser .engine == "pyarrow" :
444+ # mismatched dtypes in both cases, FutureWarning in the True case
445+ if not (using_infer_string and na_filter ):
446+ mark = pytest .mark .xfail (reason = "pyarrow doesn't support this." )
447+ request .applymarker (mark )
442448 data = """\
443449 A,B
4444501,A
445451nan,B
4464523,C
447453"""
448- parser = all_parsers
449454 result = parser .read_csv (StringIO (data ), na_values = ["B" ], na_filter = na_filter )
450455
451456 expected = DataFrame (row_data , columns = ["A" , "B" ])
@@ -536,7 +541,6 @@ def test_na_values_dict_aliasing(all_parsers):
536541 tm .assert_dict_equal (na_values , na_values_copy )
537542
538543
539- @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" , strict = False )
540544def test_na_values_dict_null_column_name (all_parsers ):
541545 # see gh-57547
542546 parser = all_parsers
@@ -560,11 +564,10 @@ def test_na_values_dict_null_column_name(all_parsers):
560564 return
561565
562566 expected = DataFrame (
563- {None : ["MA" , "NA" , "OA" ], "x" : [1.0 , 2.0 , np .nan ], "y" : [2.0 , 1.0 , 3.0 ]}
567+ {"x" : [1.0 , 2.0 , np .nan ], "y" : [2.0 , 1.0 , 3.0 ]},
568+ index = Index (["MA" , "NA" , "OA" ], dtype = object ),
564569 )
565570
566- expected = expected .set_index (None )
567-
568571 result = parser .read_csv (
569572 StringIO (data ),
570573 index_col = 0 ,
0 commit comments