22
33import pytest
44
5- from pandas .errors import ParserWarning
5+ from pandas .errors import DtypeWarning
66
77import pandas ._testing as tm
88
@@ -22,7 +22,7 @@ def test_leading_zeros_preserved_with_dtype_str(all_parsers, request):
2222
2323 if engine_name == "pyarrow" :
2424 with tm .assert_produces_warning (
25- ParserWarning , match = "pyarrow engine expects a dict mapping "
25+ DtypeWarning , match = "not supported with pyarrow engine "
2626 ):
2727 result = parser .read_csv (
2828 StringIO (data ),
@@ -53,7 +53,7 @@ def test_leading_zeros_preserved_with_dtype_str(all_parsers, request):
5353 raise
5454
5555
56- def test_leading_zeros_preserved_with_dtype_dict_str_only (all_parsers ):
56+ def test_leading_zeros_preserved_with_dtype_dict (all_parsers ):
5757 # GH#57666: pyarrow engine strips leading zeros when dtype=str is passed
5858 # GH#61618: further discussion on ensuring string dtype preservation across engines
5959
@@ -67,7 +67,7 @@ def test_leading_zeros_preserved_with_dtype_dict_str_only(all_parsers):
6767
6868 result = parser .read_csv (
6969 StringIO (data ),
70- dtype = {"col2" : str , "col4" : str },
70+ dtype = {"col2" : str , "col3" : int , " col4" : str },
7171 )
7272
7373 assert result .shape == (4 , 4 )
@@ -82,44 +82,3 @@ def test_leading_zeros_preserved_with_dtype_dict_str_only(all_parsers):
8282 assert result .loc [1 , "col3" ] == 200
8383 assert result .loc [2 , "col3" ] == 201
8484 assert result .loc [3 , "col3" ] == 202
85-
86-
87- def test_leading_zeros_preserved_with_heterogeneous_dtypes (all_parsers ):
88- # GH#57666: pyarrow engine strips leading zeros when dtype=str is passed
89- # GH#61618: further discussion on ensuring string dtype preservation across engines
90-
91- parser = all_parsers
92- engine_name = getattr (parser , "engine" , "unknown" )
93-
94- data = """col1,col2,col3,col4
95- AB,000388907,199,0150
96- CD,101044572,200,0150
97- EF,000023607,201,0205
98- GH,100102040,202,0205"""
99-
100- if engine_name == "pyarrow" :
101- with tm .assert_produces_warning (
102- ParserWarning , match = "may not be handled correctly by the pyarrow engine"
103- ):
104- result = parser .read_csv (
105- StringIO (data ),
106- dtype = {"col2" : str , "col3" : int , "col4" : str },
107- )
108- else :
109- result = parser .read_csv (
110- StringIO (data ),
111- dtype = {"col2" : str , "col3" : int , "col4" : str },
112- )
113-
114- assert result .shape == (4 , 4 )
115- assert list (result .columns ) == ["col1" , "col2" , "col3" , "col4" ]
116-
117- assert result .loc [0 , "col2" ] == "000388907" , "lost zeros in col2 row 0"
118- assert result .loc [2 , "col2" ] == "000023607" , "lost zeros in col2 row 2"
119- assert result .loc [0 , "col4" ] == "0150" , "lost zeros in col4 row 0"
120- assert result .loc [2 , "col4" ] == "0205" , "lost zeros in col4 row 2"
121-
122- assert result .loc [0 , "col3" ] == 199
123- assert result .loc [1 , "col3" ] == 200
124- assert result .loc [2 , "col3" ] == 201
125- assert result .loc [3 , "col3" ] == 202
0 commit comments