@@ -224,7 +224,7 @@ def compare(repeat):
224224 )
225225
226226 if path is None :
227- path = str ( temp_file )
227+ path = temp_file
228228 compare (repeat )
229229 else :
230230 compare (repeat )
@@ -342,27 +342,25 @@ def test_cross_engine_pa_fp(df_cross_compat, pa, fp, temp_file):
342342 # cross-compat with differing reading/writing engines
343343
344344 df = df_cross_compat
345- path = str (temp_file )
346- df .to_parquet (path , engine = pa , compression = None )
345+ df .to_parquet (temp_file , engine = pa , compression = None )
347346
348- result = read_parquet (path , engine = fp )
347+ result = read_parquet (temp_file , engine = fp )
349348 tm .assert_frame_equal (result , df )
350349
351- result = read_parquet (path , engine = fp , columns = ["a" , "d" ])
350+ result = read_parquet (temp_file , engine = fp , columns = ["a" , "d" ])
352351 tm .assert_frame_equal (result , df [["a" , "d" ]])
353352
354353
355354def test_cross_engine_fp_pa (df_cross_compat , pa , fp , temp_file ):
356355 # cross-compat with differing reading/writing engines
357356 df = df_cross_compat
358- path = str (temp_file )
359357
360- df .to_parquet (path , engine = fp , compression = None )
358+ df .to_parquet (temp_file , engine = fp , compression = None )
361359
362- result = read_parquet (path , engine = pa )
360+ result = read_parquet (temp_file , engine = pa )
363361 tm .assert_frame_equal (result , df )
364362
365- result = read_parquet (path , engine = pa , columns = ["a" , "d" ])
363+ result = read_parquet (temp_file , engine = pa , columns = ["a" , "d" ])
366364 tm .assert_frame_equal (result , df [["a" , "d" ]])
367365
368366
@@ -388,8 +386,7 @@ def test_error(self, engine, temp_file):
388386 np .array ([1 , 2 , 3 ]),
389387 ]:
390388 msg = "to_parquet only supports IO with DataFrames"
391- path = str (temp_file )
392- self .check_error_on_write (obj , engine , ValueError , msg , path )
389+ self .check_error_on_write (obj , engine , ValueError , msg , temp_file )
393390
394391 def test_columns_dtypes (self , engine , temp_file ):
395392 df = pd .DataFrame ({"string" : list ("abc" ), "int" : list (range (1 , 4 ))})
@@ -535,10 +532,9 @@ def test_write_column_multiindex(self, engine, temp_file):
535532 np .random .default_rng (2 ).standard_normal ((4 , 3 )), columns = mi_columns
536533 )
537534
538- path = str (temp_file )
539535 if engine == "fastparquet" :
540536 self .check_error_on_write (
541- df , engine , TypeError , "Column name must be a string" , path
537+ df , engine , TypeError , "Column name must be a string" , temp_file
542538 )
543539 elif engine == "pyarrow" :
544540 check_round_trip (df , temp_file , engine )
@@ -555,9 +551,8 @@ def test_write_column_multiindex_nonstring(self, engine, temp_file):
555551 np .random .default_rng (2 ).standard_normal ((8 , 8 )), columns = arrays
556552 )
557553 df .columns .names = ["Level1" , "Level2" ]
558- path = str (temp_file )
559554 if engine == "fastparquet" :
560- self .check_error_on_write (df , engine , ValueError , "Column name" , path )
555+ self .check_error_on_write (df , engine , ValueError , "Column name" , temp_file )
561556 elif engine == "pyarrow" :
562557 check_round_trip (df , temp_file , engine )
563558
@@ -601,10 +596,9 @@ def test_write_column_index_nonstring(self, engine, temp_file):
601596 np .random .default_rng (2 ).standard_normal ((8 , 4 )), columns = arrays
602597 )
603598 df .columns .name = "NonStringCol"
604- path = str (temp_file )
605599 if engine == "fastparquet" :
606600 self .check_error_on_write (
607- df , engine , TypeError , "Column name must be a string" , path
601+ df , engine , TypeError , "Column name must be a string" , temp_file
608602 )
609603 else :
610604 check_round_trip (df , temp_file , engine )
@@ -633,11 +627,10 @@ def test_dtype_backend(self, engine, request, temp_file):
633627 "g" : pyarrow .array ([1.0 , 2.0 , 3.0 , None ], "float64" ),
634628 }
635629 )
636- path = str (temp_file )
637630 # write manually with pyarrow to write integers
638- pq .write_table (table , path )
639- result1 = read_parquet (path , engine = engine )
640- result2 = read_parquet (path , engine = engine , dtype_backend = "numpy_nullable" )
631+ pq .write_table (table , temp_file )
632+ result1 = read_parquet (temp_file , engine = engine )
633+ result2 = read_parquet (temp_file , engine = engine , dtype_backend = "numpy_nullable" )
641634
642635 assert result1 ["a" ].dtype == np .dtype ("float64" )
643636 expected = pd .DataFrame (
@@ -756,9 +749,8 @@ def test_to_bytes_without_path_or_buf_provided(self, pa, df_full):
756749 def test_duplicate_columns (self , pa , temp_file ):
757750 # not currently able to handle duplicate columns
758751 df = pd .DataFrame (np .arange (12 ).reshape (4 , 3 ), columns = list ("aaa" )).copy ()
759- path = str (temp_file )
760752 self .check_error_on_write (
761- df , pa , ValueError , "Duplicate column names found" , path
753+ df , pa , ValueError , "Duplicate column names found" , temp_file
762754 )
763755
764756 def test_timedelta (self , pa , temp_file ):
@@ -770,17 +762,17 @@ def test_unsupported(self, pa, temp_file):
770762 df = pd .DataFrame ({"a" : ["a" , 1 , 2.0 ]})
771763 # pyarrow 0.11 raises ArrowTypeError
772764 # older pyarrows raise ArrowInvalid
773- path = str (temp_file )
774- self .check_external_error_on_write (df , pa , pyarrow .ArrowException , path )
765+ self .check_external_error_on_write (df , pa , pyarrow .ArrowException , temp_file )
775766
776767 def test_unsupported_float16 (self , pa , temp_file ):
777768 # #44847, #44914
778769 # Not able to write float 16 column using pyarrow.
779770 data = np .arange (2 , 10 , dtype = np .float16 )
780771 df = pd .DataFrame (data = data , columns = ["fp16" ])
781- path = str (temp_file )
782772 if pa_version_under15p0 :
783- self .check_external_error_on_write (df , pa , pyarrow .ArrowException , path )
773+ self .check_external_error_on_write (
774+ df , pa , pyarrow .ArrowException , temp_file
775+ )
784776 else :
785777 check_round_trip (df , temp_file , pa )
786778
@@ -800,8 +792,7 @@ def test_unsupported_float16_cleanup(self, pa, path_type, temp_file):
800792 data = np .arange (2 , 10 , dtype = np .float16 )
801793 df = pd .DataFrame (data = data , columns = ["fp16" ])
802794
803- path_str = str (temp_file )
804- path = path_type (path_str )
795+ path = path_type (temp_file )
805796 with tm .external_error_raised (pyarrow .ArrowException ):
806797 df .to_parquet (path = path , engine = pa )
807798 assert not os .path .isfile (path )
@@ -1046,9 +1037,8 @@ def test_filter_row_groups(self, pa, temp_file):
10461037 # https://github.com/pandas-dev/pandas/issues/26551
10471038 pytest .importorskip ("pyarrow" )
10481039 df = pd .DataFrame ({"a" : list (range (3 ))})
1049- path = str (temp_file )
1050- df .to_parquet (path , engine = pa )
1051- result = read_parquet (path , pa , filters = [("a" , "==" , 0 )])
1040+ df .to_parquet (temp_file , engine = pa )
1041+ result = read_parquet (temp_file , pa , filters = [("a" , "==" , 0 )])
10521042 assert len (result ) == 1
10531043
10541044 @pytest .mark .filterwarnings ("ignore:make_block is deprecated:DeprecationWarning" )
@@ -1251,29 +1241,27 @@ def test_columns_dtypes_invalid(self, fp, temp_file):
12511241
12521242 err = TypeError
12531243 msg = "Column name must be a string"
1254- path = str (temp_file )
12551244
12561245 # numeric
12571246 df .columns = [0 , 1 ]
1258- self .check_error_on_write (df , fp , err , msg , path )
1247+ self .check_error_on_write (df , fp , err , msg , temp_file )
12591248
12601249 # bytes
12611250 df .columns = [b"foo" , b"bar" ]
1262- self .check_error_on_write (df , fp , err , msg , path )
1251+ self .check_error_on_write (df , fp , err , msg , temp_file )
12631252
12641253 # python object
12651254 df .columns = [
12661255 datetime .datetime (2011 , 1 , 1 , 0 , 0 ),
12671256 datetime .datetime (2011 , 1 , 1 , 1 , 1 ),
12681257 ]
1269- self .check_error_on_write (df , fp , err , msg , path )
1258+ self .check_error_on_write (df , fp , err , msg , temp_file )
12701259
12711260 def test_duplicate_columns (self , fp , temp_file ):
12721261 # not currently able to handle duplicate columns
12731262 df = pd .DataFrame (np .arange (12 ).reshape (4 , 3 ), columns = list ("aaa" )).copy ()
12741263 msg = "Cannot create parquet dataset with duplicate column names"
1275- path = str (temp_file )
1276- self .check_error_on_write (df , fp , ValueError , msg , path )
1264+ self .check_error_on_write (df , fp , ValueError , msg , temp_file )
12771265
12781266 def test_bool_with_none (self , fp , request , temp_file ):
12791267 df = pd .DataFrame ({"a" : [True , None , False ]})
@@ -1286,13 +1274,12 @@ def test_unsupported(self, fp, temp_file):
12861274 # period
12871275 df = pd .DataFrame ({"a" : pd .period_range ("2013" , freq = "M" , periods = 3 )})
12881276 # error from fastparquet -> don't check exact error message
1289- path = str (temp_file )
1290- self .check_error_on_write (df , fp , ValueError , None , path )
1277+ self .check_error_on_write (df , fp , ValueError , None , temp_file )
12911278
12921279 # mixed
12931280 df = pd .DataFrame ({"a" : ["a" , 1 , 2.0 ]})
12941281 msg = "Can't infer object conversion type"
1295- self .check_error_on_write (df , fp , ValueError , msg , path )
1282+ self .check_error_on_write (df , fp , ValueError , msg , temp_file )
12961283
12971284 def test_categorical (self , fp , temp_file ):
12981285 df = pd .DataFrame ({"a" : pd .Categorical (list ("abc" ))})
@@ -1301,9 +1288,8 @@ def test_categorical(self, fp, temp_file):
13011288 def test_filter_row_groups (self , fp , temp_file ):
13021289 d = {"a" : list (range (3 ))}
13031290 df = pd .DataFrame (d )
1304- path = str (temp_file )
1305- df .to_parquet (path , engine = fp , compression = None , row_group_offsets = 1 )
1306- result = read_parquet (path , fp , filters = [("a" , "==" , 0 )])
1291+ df .to_parquet (temp_file , engine = fp , compression = None , row_group_offsets = 1 )
1292+ result = read_parquet (temp_file , fp , filters = [("a" , "==" , 0 )])
13071293 assert len (result ) == 1
13081294
13091295 @pytest .mark .single_cpu
@@ -1403,73 +1389,68 @@ def test_timezone_aware_index(
14031389 check_round_trip (df , temp_file , fp , expected = expected )
14041390
14051391 def test_close_file_handle_on_read_error (self , temp_file ):
1406- path = str (temp_file )
1407- pathlib .Path (path ).write_bytes (b"breakit" )
1392+ pathlib .Path (temp_file ).write_bytes (b"breakit" )
14081393 with tm .external_error_raised (Exception ): # Not important which exception
1409- read_parquet (path , engine = "fastparquet" )
1394+ read_parquet (temp_file , engine = "fastparquet" )
14101395 # The next line raises an error on Windows if the file is still open
1411- pathlib .Path (path ).unlink (missing_ok = False )
1396+ pathlib .Path (temp_file ).unlink (missing_ok = False )
14121397
14131398 def test_bytes_file_name (self , engine , temp_file ):
14141399 # GH#48944
14151400 df = pd .DataFrame (data = {"A" : [0 , 1 ], "B" : [1 , 0 ]})
1416- path = str (temp_file )
1417- with open (path .encode (), "wb" ) as f :
1401+ with open (temp_file , "wb" ) as f :
14181402 df .to_parquet (f )
14191403
1420- result = read_parquet (path , engine = engine )
1404+ result = read_parquet (temp_file , engine = engine )
14211405 tm .assert_frame_equal (result , df )
14221406
14231407 def test_filesystem_notimplemented (self , temp_file ):
14241408 pytest .importorskip ("fastparquet" )
14251409 df = pd .DataFrame (data = {"A" : [0 , 1 ], "B" : [1 , 0 ]})
1426- path = str (temp_file )
14271410 with pytest .raises (NotImplementedError , match = "filesystem is not implemented" ):
1428- df .to_parquet (path , engine = "fastparquet" , filesystem = "foo" )
1411+ df .to_parquet (temp_file , engine = "fastparquet" , filesystem = "foo" )
14291412
1430- pathlib .Path (path ).write_bytes (b"foo" )
1413+ pathlib .Path (temp_file ).write_bytes (b"foo" )
14311414 with pytest .raises (NotImplementedError , match = "filesystem is not implemented" ):
1432- read_parquet (path , engine = "fastparquet" , filesystem = "foo" )
1415+ read_parquet (temp_file , engine = "fastparquet" , filesystem = "foo" )
14331416
14341417 def test_invalid_filesystem (self , temp_file ):
14351418 pytest .importorskip ("pyarrow" )
14361419 df = pd .DataFrame (data = {"A" : [0 , 1 ], "B" : [1 , 0 ]})
1437- path = str (temp_file )
14381420
14391421 with pytest .raises (
14401422 ValueError , match = "filesystem must be a pyarrow or fsspec FileSystem"
14411423 ):
1442- df .to_parquet (path , engine = "pyarrow" , filesystem = "foo" )
1424+ df .to_parquet (temp_file , engine = "pyarrow" , filesystem = "foo" )
14431425
1444- pathlib .Path (path ).write_bytes (b"foo" )
1426+ pathlib .Path (temp_file ).write_bytes (b"foo" )
14451427 with pytest .raises (
14461428 ValueError , match = "filesystem must be a pyarrow or fsspec FileSystem"
14471429 ):
1448- read_parquet (path , engine = "pyarrow" , filesystem = "foo" )
1430+ read_parquet (temp_file , engine = "pyarrow" , filesystem = "foo" )
14491431
14501432 def test_unsupported_pa_filesystem_storage_options (self , temp_file ):
14511433 pa_fs = pytest .importorskip ("pyarrow.fs" )
14521434 df = pd .DataFrame (data = {"A" : [0 , 1 ], "B" : [1 , 0 ]})
1453- path = str (temp_file )
14541435
14551436 with pytest .raises (
14561437 NotImplementedError ,
14571438 match = "storage_options not supported with a pyarrow FileSystem." ,
14581439 ):
14591440 df .to_parquet (
1460- path ,
1441+ temp_file ,
14611442 engine = "pyarrow" ,
14621443 filesystem = pa_fs .LocalFileSystem (),
14631444 storage_options = {"foo" : "bar" },
14641445 )
14651446
1466- pathlib .Path (path ).write_bytes (b"foo" )
1447+ pathlib .Path (temp_file ).write_bytes (b"foo" )
14671448 with pytest .raises (
14681449 NotImplementedError ,
14691450 match = "storage_options not supported with a pyarrow FileSystem." ,
14701451 ):
14711452 read_parquet (
1472- path ,
1453+ temp_file ,
14731454 engine = "pyarrow" ,
14741455 filesystem = pa_fs .LocalFileSystem (),
14751456 storage_options = {"foo" : "bar" },
@@ -1481,7 +1462,6 @@ def test_invalid_dtype_backend(self, engine, temp_file):
14811462 "'pyarrow' are allowed."
14821463 )
14831464 df = pd .DataFrame ({"int" : list (range (1 , 4 ))})
1484- path = str (temp_file )
1485- df .to_parquet (path )
1465+ df .to_parquet (temp_file )
14861466 with pytest .raises (ValueError , match = msg ):
1487- read_parquet (path , dtype_backend = "numpy" )
1467+ read_parquet (temp_file , dtype_backend = "numpy" )
0 commit comments