@@ -86,12 +86,11 @@ def test_stringify_path_fspath(self):
8686 result = icom .stringify_path (p )
8787 assert result == "foo/bar.csv"
8888
89- def test_stringify_file_and_path_like (self ):
89+ def test_stringify_file_and_path_like (self , temp_file ):
9090 # GH 38125: do not stringify file objects that are also path-like
9191 fsspec = pytest .importorskip ("fsspec" )
92- with tm .ensure_clean () as path :
93- with fsspec .open (f"file://{ path } " , mode = "wb" ) as fsspec_obj :
94- assert fsspec_obj == icom .stringify_path (fsspec_obj )
92+ with fsspec .open (f"file://{ temp_file } " , mode = "wb" ) as fsspec_obj :
93+ assert fsspec_obj == icom .stringify_path (fsspec_obj )
9594
9695 @pytest .mark .parametrize ("path_type" , [str , CustomFSPath , Path ])
9796 def test_infer_compression_from_path (self , compression_format , path_type ):
@@ -338,49 +337,53 @@ def test_read_fspath_all(self, reader, module, path, datapath):
338337 ("to_stata" , {"time_stamp" : pd .to_datetime ("2019-01-01 00:00" )}, "os" ),
339338 ],
340339 )
341- def test_write_fspath_all (self , writer_name , writer_kwargs , module ):
340+ def test_write_fspath_all (self , writer_name , writer_kwargs , module , tmp_path ):
342341 if writer_name in ["to_latex" ]: # uses Styler implementation
343342 pytest .importorskip ("jinja2" )
344- p1 = tm . ensure_clean ( "string" )
345- p2 = tm . ensure_clean ( "fspath" )
343+ p1 = tmp_path / "string"
344+ p2 = tmp_path / "fspath"
346345 df = pd .DataFrame ({"A" : [1 , 2 ]})
347346
348- with p1 as string , p2 as fspath :
349- pytest .importorskip (module )
350- mypath = CustomFSPath (fspath )
351- writer = getattr (df , writer_name )
352-
353- writer (string , ** writer_kwargs )
354- writer (mypath , ** writer_kwargs )
355- with open (string , "rb" ) as f_str , open (fspath , "rb" ) as f_path :
356- if writer_name == "to_excel" :
357- # binary representation of excel contains time creation
358- # data that causes flaky CI failures
359- result = pd .read_excel (f_str , ** writer_kwargs )
360- expected = pd .read_excel (f_path , ** writer_kwargs )
361- tm .assert_frame_equal (result , expected )
362- else :
363- result = f_str .read ()
364- expected = f_path .read ()
365- assert result == expected
366-
367- def test_write_fspath_hdf5 (self ):
347+ string = str (p1 )
348+ fspath = str (p2 ) # if hasattr(p2, '__fspath__') else p2
349+
350+ pytest .importorskip (module )
351+ mypath = CustomFSPath (fspath )
352+ writer = getattr (df , writer_name )
353+
354+ writer (string , ** writer_kwargs )
355+ writer (mypath , ** writer_kwargs )
356+ with open (string , "rb" ) as f_str , open (fspath , "rb" ) as f_path :
357+ if writer_name == "to_excel" :
358+ # binary representation of excel contains time creation
359+ # data that causes flaky CI failures
360+ result = pd .read_excel (f_str , ** writer_kwargs )
361+ expected = pd .read_excel (f_path , ** writer_kwargs )
362+ tm .assert_frame_equal (result , expected )
363+ else :
364+ result = f_str .read ()
365+ expected = f_path .read ()
366+ assert result == expected
367+
368+ def test_write_fspath_hdf5 (self , tmp_path ):
368369 # Same test as write_fspath_all, except HDF5 files aren't
369370 # necessarily byte-for-byte identical for a given dataframe, so we'll
370371 # have to read and compare equality
371372 pytest .importorskip ("tables" )
372373
373374 df = pd .DataFrame ({"A" : [1 , 2 ]})
374- p1 = tm .ensure_clean ("string" )
375- p2 = tm .ensure_clean ("fspath" )
375+ p1 = tmp_path / "string"
376+ p2 = tmp_path / "fspath"
377+
378+ string = str (p1 )
379+ fspath = str (p2 )
376380
377- with p1 as string , p2 as fspath :
378- mypath = CustomFSPath (fspath )
379- df .to_hdf (mypath , key = "bar" )
380- df .to_hdf (string , key = "bar" )
381+ mypath = CustomFSPath (fspath )
382+ df .to_hdf (mypath , key = "bar" )
383+ df .to_hdf (string , key = "bar" )
381384
382- result = pd .read_hdf (fspath , key = "bar" )
383- expected = pd .read_hdf (string , key = "bar" )
385+ result = pd .read_hdf (fspath , key = "bar" )
386+ expected = pd .read_hdf (string , key = "bar" )
384387
385388 tm .assert_frame_equal (result , expected )
386389
@@ -432,35 +435,35 @@ def test_next(self, mmap_file):
432435 with pytest .raises (StopIteration , match = r"^$" ):
433436 next (wrapper )
434437
435- def test_unknown_engine (self ):
436- with tm . ensure_clean () as path :
437- df = pd .DataFrame (
438- 1.1 * np .arange (120 ).reshape ((30 , 4 )),
439- columns = pd .Index (list ("ABCD" )),
440- index = pd .Index ([f"i-{ i } " for i in range (30 )]),
441- )
442- df .to_csv (path )
443- with pytest .raises (ValueError , match = "Unknown engine" ):
444- pd .read_csv (path , engine = "pyt" )
445-
446- def test_binary_mode (self ):
438+ def test_unknown_engine (self , temp_file ):
439+ path = temp_file
440+ df = pd .DataFrame (
441+ 1.1 * np .arange (120 ).reshape ((30 , 4 )),
442+ columns = pd .Index (list ("ABCD" )),
443+ index = pd .Index ([f"i-{ i } " for i in range (30 )]),
444+ )
445+ df .to_csv (path )
446+ with pytest .raises (ValueError , match = "Unknown engine" ):
447+ pd .read_csv (path , engine = "pyt" )
448+
449+ def test_binary_mode (self , temp_file ):
447450 """
448451 'encoding' shouldn't be passed to 'open' in binary mode.
449452
450453 GH 35058
451454 """
452- with tm . ensure_clean () as path :
453- df = pd .DataFrame (
454- 1.1 * np .arange (120 ).reshape ((30 , 4 )),
455- columns = pd .Index (list ("ABCD" )),
456- index = pd .Index ([f"i-{ i } " for i in range (30 )]),
457- )
458- df .to_csv (path , mode = "w+b" )
459- tm .assert_frame_equal (df , pd .read_csv (path , index_col = 0 ))
455+ path = temp_file
456+ df = pd .DataFrame (
457+ 1.1 * np .arange (120 ).reshape ((30 , 4 )),
458+ columns = pd .Index (list ("ABCD" )),
459+ index = pd .Index ([f"i-{ i } " for i in range (30 )]),
460+ )
461+ df .to_csv (path , mode = "w+b" )
462+ tm .assert_frame_equal (df , pd .read_csv (path , index_col = 0 ))
460463
461464 @pytest .mark .parametrize ("encoding" , ["utf-16" , "utf-32" ])
462465 @pytest .mark .parametrize ("compression_" , ["bz2" , "xz" ])
463- def test_warning_missing_utf_bom (self , encoding , compression_ ):
466+ def test_warning_missing_utf_bom (self , encoding , compression_ , temp_file ):
464467 """
465468 bz2 and xz do not write the byte order mark (BOM) for utf-16/32.
466469
@@ -473,17 +476,17 @@ def test_warning_missing_utf_bom(self, encoding, compression_):
473476 columns = pd .Index (list ("ABCD" )),
474477 index = pd .Index ([f"i-{ i } " for i in range (30 )]),
475478 )
476- with tm . ensure_clean () as path :
477- with tm .assert_produces_warning (UnicodeWarning , match = "byte order mark" ):
478- df .to_csv (path , compression = compression_ , encoding = encoding )
479-
480- # reading should fail (otherwise we wouldn't need the warning)
481- msg = (
482- r"UTF-\d+ stream does not start with BOM|"
483- r"'utf-\d+' codec can't decode byte"
484- )
485- with pytest .raises (UnicodeError , match = msg ):
486- pd .read_csv (path , compression = compression_ , encoding = encoding )
479+ path = temp_file
480+ with tm .assert_produces_warning (UnicodeWarning , match = "byte order mark" ):
481+ df .to_csv (path , compression = compression_ , encoding = encoding )
482+
483+ # reading should fail (otherwise we wouldn't need the warning)
484+ msg = (
485+ r"UTF-\d+ stream does not start with BOM|"
486+ r"'utf-\d+' codec can't decode byte"
487+ )
488+ with pytest .raises (UnicodeError , match = msg ):
489+ pd .read_csv (path , compression = compression_ , encoding = encoding )
487490
488491
489492def test_is_fsspec_url ():
@@ -514,38 +517,39 @@ def test_is_fsspec_url_chained():
514517
515518
516519@pytest .mark .parametrize ("format" , ["csv" , "json" ])
517- def test_codecs_encoding (format ):
520+ def test_codecs_encoding (format , temp_file ):
518521 # GH39247
519522 expected = pd .DataFrame (
520523 1.1 * np .arange (120 ).reshape ((30 , 4 )),
521524 columns = pd .Index (list ("ABCD" )),
522525 index = pd .Index ([f"i-{ i } " for i in range (30 )]),
523526 )
524- with tm .ensure_clean () as path :
525- with open (path , mode = "w" , encoding = "utf-8" ) as handle :
526- getattr (expected , f"to_{ format } " )(handle )
527- with open (path , encoding = "utf-8" ) as handle :
528- if format == "csv" :
529- df = pd .read_csv (handle , index_col = 0 )
530- else :
531- df = pd .read_json (handle )
527+
528+ path = temp_file
529+ with open (path , mode = "w" , encoding = "utf-8" ) as handle :
530+ getattr (expected , f"to_{ format } " )(handle )
531+ with open (path , encoding = "utf-8" ) as handle :
532+ if format == "csv" :
533+ df = pd .read_csv (handle , index_col = 0 )
534+ else :
535+ df = pd .read_json (handle )
532536 tm .assert_frame_equal (expected , df )
533537
534538
535- def test_codecs_get_writer_reader ():
539+ def test_codecs_get_writer_reader (temp_file ):
536540 # GH39247
537541 expected = pd .DataFrame (
538542 1.1 * np .arange (120 ).reshape ((30 , 4 )),
539543 columns = pd .Index (list ("ABCD" )),
540544 index = pd .Index ([f"i-{ i } " for i in range (30 )]),
541545 )
542- with tm . ensure_clean () as path :
543- with open (path , "wb" ) as handle :
544- with codecs .getwriter ("utf-8" )(handle ) as encoded :
545- expected .to_csv (encoded )
546- with open (path , "rb" ) as handle :
547- with codecs .getreader ("utf-8" )(handle ) as encoded :
548- df = pd .read_csv (encoded , index_col = 0 )
546+ path = temp_file
547+ with open (path , "wb" ) as handle :
548+ with codecs .getwriter ("utf-8" )(handle ) as encoded :
549+ expected .to_csv (encoded )
550+ with open (path , "rb" ) as handle :
551+ with codecs .getreader ("utf-8" )(handle ) as encoded :
552+ df = pd .read_csv (encoded , index_col = 0 )
549553 tm .assert_frame_equal (expected , df )
550554
551555
@@ -572,7 +576,7 @@ def test_explicit_encoding(io_class, mode, msg):
572576
573577@pytest .mark .parametrize ("encoding_errors" , ["strict" , "replace" ])
574578@pytest .mark .parametrize ("format" , ["csv" , "json" ])
575- def test_encoding_errors (encoding_errors , format ):
579+ def test_encoding_errors (encoding_errors , format , temp_file ):
576580 # GH39450
577581 msg = "'utf-8' codec can't decode byte"
578582 bad_encoding = b"\xe4 "
@@ -591,18 +595,18 @@ def test_encoding_errors(encoding_errors, format):
591595 + b'"}}'
592596 )
593597 reader = partial (pd .read_json , orient = "index" )
594- with tm . ensure_clean () as path :
595- file = Path (path )
596- file .write_bytes (content )
598+ path = temp_file
599+ file = Path (path )
600+ file .write_bytes (content )
597601
598- if encoding_errors != "replace" :
599- with pytest .raises (UnicodeDecodeError , match = msg ):
600- reader (path , encoding_errors = encoding_errors )
601- else :
602- df = reader (path , encoding_errors = encoding_errors )
603- decoded = bad_encoding .decode (errors = encoding_errors )
604- expected = pd .DataFrame ({decoded : [decoded ]}, index = [decoded * 2 ])
605- tm .assert_frame_equal (df , expected )
602+ if encoding_errors != "replace" :
603+ with pytest .raises (UnicodeDecodeError , match = msg ):
604+ reader (path , encoding_errors = encoding_errors )
605+ else :
606+ df = reader (path , encoding_errors = encoding_errors )
607+ decoded = bad_encoding .decode (errors = encoding_errors )
608+ expected = pd .DataFrame ({decoded : [decoded ]}, index = [decoded * 2 ])
609+ tm .assert_frame_equal (df , expected )
606610
607611
608612@pytest .mark .parametrize ("encoding_errors" , [0 , None ])
@@ -616,11 +620,10 @@ def test_encoding_errors_badtype(encoding_errors):
616620 reader (content )
617621
618622
619- def test_bad_encdoing_errors ():
623+ def test_bad_encdoing_errors (temp_file ):
620624 # GH 39777
621- with tm .ensure_clean () as path :
622- with pytest .raises (LookupError , match = "unknown error handler name" ):
623- icom .get_handle (path , "w" , errors = "bad" )
625+ with pytest .raises (LookupError , match = "unknown error handler name" ):
626+ icom .get_handle (temp_file , "w" , errors = "bad" )
624627
625628
626629@pytest .mark .skipif (WASM , reason = "limited file system access on WASM" )
0 commit comments