@@ -147,7 +147,9 @@ def test_unsupported_dtype(c_parser_only, match, kwargs):
147147
148148@td .skip_if_32bit
149149@pytest .mark .slow
150- def test_precise_conversion (c_parser_only ):
150+ # test numbers between 1 and 2
151+ @pytest .mark .parametrize ("num" , np .linspace (1.0 , 2.0 , num = 21 ))
152+ def test_precise_conversion (c_parser_only , num ):
151153 parser = c_parser_only
152154
153155 normal_errors = []
@@ -156,27 +158,23 @@ def test_precise_conversion(c_parser_only):
156158 def error (val : float , actual_val : Decimal ) -> Decimal :
157159 return abs (Decimal (f"{ val :.100} " ) - actual_val )
158160
159- # test numbers between 1 and 2
160- for num in np .linspace (1.0 , 2.0 , num = 500 ):
161- # 25 decimal digits of precision
162- text = f"a\n { num :.25} "
161+ # 25 decimal digits of precision
162+ text = f"a\n { num :.25} "
163163
164- normal_val = float (
165- parser .read_csv (StringIO (text ), float_precision = "legacy" )["a" ][0 ]
166- )
167- precise_val = float (
168- parser .read_csv (StringIO (text ), float_precision = "high" )["a" ][0 ]
169- )
170- roundtrip_val = float (
171- parser .read_csv (StringIO (text ), float_precision = "round_trip" )["a" ][0 ]
172- )
173- actual_val = Decimal (text [2 :])
164+ normal_val = float (
165+ parser .read_csv (StringIO (text ), float_precision = "legacy" )["a" ][0 ]
166+ )
167+ precise_val = float (parser .read_csv (StringIO (text ), float_precision = "high" )["a" ][0 ])
168+ roundtrip_val = float (
169+ parser .read_csv (StringIO (text ), float_precision = "round_trip" )["a" ][0 ]
170+ )
171+ actual_val = Decimal (text [2 :])
174172
175- normal_errors .append (error (normal_val , actual_val ))
176- precise_errors .append (error (precise_val , actual_val ))
173+ normal_errors .append (error (normal_val , actual_val ))
174+ precise_errors .append (error (precise_val , actual_val ))
177175
178- # round-trip should match float()
179- assert roundtrip_val == float (text [2 :])
176+ # round-trip should match float()
177+ assert roundtrip_val == float (text [2 :])
180178
181179 assert sum (precise_errors ) <= sum (normal_errors )
182180 assert max (precise_errors ) <= max (normal_errors )
@@ -287,7 +285,8 @@ def test_tokenize_CR_with_quoting(c_parser_only):
287285
288286
289287@pytest .mark .slow
290- def test_grow_boundary_at_cap (c_parser_only ):
288+ @pytest .mark .parametrize ("count" , [3 * 2 ** n for n in range (6 )])
289+ def test_grow_boundary_at_cap (c_parser_only , count ):
291290 # See gh-12494
292291 #
293292 # Cause of error was that the C parser
@@ -296,19 +295,18 @@ def test_grow_boundary_at_cap(c_parser_only):
296295 # to capacity, which would later cause a
297296 # buffer overflow error when checking the
298297 # EOF terminator of the CSV stream.
298+ # 3 * 2^n commas was observed to break the parser
299299 parser = c_parser_only
300300
301- def test_empty_header_read (count ):
302- with StringIO ("," * count ) as s :
303- expected = DataFrame (columns = [f"Unnamed: { i } " for i in range (count + 1 )])
304- df = parser .read_csv (s )
305- tm .assert_frame_equal (df , expected )
306-
307- for cnt in range (1 , 101 ):
308- test_empty_header_read (cnt )
301+ with StringIO ("," * count ) as s :
302+ expected = DataFrame (columns = [f"Unnamed: { i } " for i in range (count + 1 )])
303+ df = parser .read_csv (s )
304+ tm .assert_frame_equal (df , expected )
309305
310306
311- def test_parse_trim_buffers (c_parser_only ):
307+ @pytest .mark .slow
308+ @pytest .mark .parametrize ("encoding" , [None , "utf-8" ])
309+ def test_parse_trim_buffers (c_parser_only , encoding ):
312310 # This test is part of a bugfix for gh-13703. It attempts to
313311 # to stress the system memory allocator, to cause it to move the
314312 # stream buffer and either let the OS reclaim the region, or let
@@ -319,6 +317,9 @@ def test_parse_trim_buffers(c_parser_only):
319317 # times it fails due to memory corruption, which causes the
320318 # loaded DataFrame to differ from the expected one.
321319
320+ # Also force 'utf-8' encoding, so that `_string_convert` would take
321+ # a different execution branch.
322+
322323 parser = c_parser_only
323324
324325 # Generate a large mixed-type CSV file on-the-fly (one record is
@@ -374,25 +375,16 @@ def test_parse_trim_buffers(c_parser_only):
374375 )
375376
376377 # Iterate over the CSV file in chunks of `chunksize` lines
377- with parser .read_csv (
378- StringIO (csv_data ), header = None , dtype = object , chunksize = chunksize
379- ) as chunks_ :
380- result = concat (chunks_ , axis = 0 , ignore_index = True )
381-
382- # Check for data corruption if there was no segfault
383- tm .assert_frame_equal (result , expected )
384-
385- # This extra test was added to replicate the fault in gh-5291.
386- # Force 'utf-8' encoding, so that `_string_convert` would take
387- # a different execution branch.
388378 with parser .read_csv (
389379 StringIO (csv_data ),
390380 header = None ,
391381 dtype = object ,
392382 chunksize = chunksize ,
393- encoding = "utf_8" ,
383+ encoding = encoding ,
394384 ) as chunks_ :
395385 result = concat (chunks_ , axis = 0 , ignore_index = True )
386+
387+ # Check for data corruption if there was no segfault
396388 tm .assert_frame_equal (result , expected )
397389
398390
0 commit comments