11import gzip
22import io
33import os
4- from pathlib import Path
54import subprocess
65import sys
76import tarfile
3130 ],
3231)
3332@pytest .mark .parametrize ("method" , ["to_pickle" , "to_json" , "to_csv" ])
34- def test_compression_size (obj , method , compression_only ):
33+ def test_compression_size (obj , method , compression_only , temp_file ):
3534 if compression_only == "tar" :
3635 compression_only = {"method" : "tar" , "mode" : "w:gz" }
3736
38- with tm . ensure_clean () as path :
39- getattr (obj , method )(path , compression = compression_only )
40- compressed_size = os .path .getsize (path )
41- getattr (obj , method )(path , compression = None )
42- uncompressed_size = os .path .getsize (path )
43- assert uncompressed_size > compressed_size
37+ path = temp_file
38+ getattr (obj , method )(path , compression = compression_only )
39+ compressed_size = os .path .getsize (path )
40+ getattr (obj , method )(path , compression = None )
41+ uncompressed_size = os .path .getsize (path )
42+ assert uncompressed_size > compressed_size
4443
4544
4645@pytest .mark .parametrize (
@@ -54,22 +53,25 @@ def test_compression_size(obj, method, compression_only):
5453 ],
5554)
5655@pytest .mark .parametrize ("method" , ["to_csv" , "to_json" ])
57- def test_compression_size_fh (obj , method , compression_only ):
58- with tm .ensure_clean () as path :
59- with icom .get_handle (
60- path ,
61- "w:gz" if compression_only == "tar" else "w" ,
62- compression = compression_only ,
63- ) as handles :
64- getattr (obj , method )(handles .handle )
65- assert not handles .handle .closed
66- compressed_size = os .path .getsize (path )
67- with tm .ensure_clean () as path :
68- with icom .get_handle (path , "w" , compression = None ) as handles :
69- getattr (obj , method )(handles .handle )
70- assert not handles .handle .closed
71- uncompressed_size = os .path .getsize (path )
72- assert uncompressed_size > compressed_size
56+ def test_compression_size_fh (obj , method , compression_only , temp_file ):
57+ path = temp_file
58+ with icom .get_handle (
59+ path ,
60+ "w:gz" if compression_only == "tar" else "w" ,
61+ compression = compression_only ,
62+ ) as handles :
63+ getattr (obj , method )(handles .handle )
64+ assert not handles .handle .closed
65+ compressed_size = os .path .getsize (path )
66+
67+ # Create a new temporary file for uncompressed comparison
68+ path2 = temp_file .parent / f"{ temp_file .stem } _uncompressed{ temp_file .suffix } "
69+ path2 .touch ()
70+ with icom .get_handle (path2 , "w" , compression = None ) as handles :
71+ getattr (obj , method )(handles .handle )
72+ assert not handles .handle .closed
73+ uncompressed_size = os .path .getsize (path2 )
74+ assert uncompressed_size > compressed_size
7375
7476
7577@pytest .mark .parametrize (
@@ -81,14 +83,19 @@ def test_compression_size_fh(obj, method, compression_only):
8183 ],
8284)
8385def test_dataframe_compression_defaults_to_infer (
84- write_method , write_kwargs , read_method , compression_only , compression_to_extension
86+ write_method ,
87+ write_kwargs ,
88+ read_method ,
89+ compression_only ,
90+ compression_to_extension ,
91+ temp_file ,
8592):
8693 # GH22004
8794 input = pd .DataFrame ([[1.0 , 0 , - 4 ], [3.4 , 5 , 2 ]], columns = ["X" , "Y" , "Z" ])
8895 extension = compression_to_extension [compression_only ]
89- with tm . ensure_clean ( "compressed" + extension ) as path :
90- getattr (input , write_method )(path , ** write_kwargs )
91- output = read_method (path , compression = compression_only )
96+ path = temp_file . parent / f"compressed { extension } "
97+ getattr (input , write_method )(path , ** write_kwargs )
98+ output = read_method (path , compression = compression_only )
9299 tm .assert_frame_equal (output , input )
93100
94101
@@ -107,37 +114,38 @@ def test_series_compression_defaults_to_infer(
107114 read_kwargs ,
108115 compression_only ,
109116 compression_to_extension ,
117+ temp_file ,
110118):
111119 # GH22004
112120 input = pd .Series ([0 , 5 , - 2 , 10 ], name = "X" )
113121 extension = compression_to_extension [compression_only ]
114- with tm . ensure_clean ( "compressed" + extension ) as path :
115- getattr (input , write_method )(path , ** write_kwargs )
116- if "squeeze" in read_kwargs :
117- kwargs = read_kwargs .copy ()
118- del kwargs ["squeeze" ]
119- output = read_method (path , compression = compression_only , ** kwargs ).squeeze (
120- "columns"
121- )
122- else :
123- output = read_method (path , compression = compression_only , ** read_kwargs )
122+ path = temp_file . parent / f"compressed { extension } "
123+ getattr (input , write_method )(path , ** write_kwargs )
124+ if "squeeze" in read_kwargs :
125+ kwargs = read_kwargs .copy ()
126+ del kwargs ["squeeze" ]
127+ output = read_method (path , compression = compression_only , ** kwargs ).squeeze (
128+ "columns"
129+ )
130+ else :
131+ output = read_method (path , compression = compression_only , ** read_kwargs )
124132 tm .assert_series_equal (output , input , check_names = False )
125133
126134
127- def test_compression_warning (compression_only ):
135+ def test_compression_warning (compression_only , temp_file ):
128136 # Assert that passing a file object to to_csv while explicitly specifying a
129137 # compression protocol triggers a RuntimeWarning, as per GH21227.
130138 df = pd .DataFrame (
131139 100 * [[0.123456 , 0.234567 , 0.567567 ], [12.32112 , 123123.2 , 321321.2 ]],
132140 columns = ["X" , "Y" , "Z" ],
133141 )
134- with tm . ensure_clean () as path :
135- with icom .get_handle (path , "w" , compression = compression_only ) as handles :
136- with tm .assert_produces_warning (RuntimeWarning , match = "has no effect" ):
137- df .to_csv (handles .handle , compression = compression_only )
142+ path = temp_file
143+ with icom .get_handle (path , "w" , compression = compression_only ) as handles :
144+ with tm .assert_produces_warning (RuntimeWarning , match = "has no effect" ):
145+ df .to_csv (handles .handle , compression = compression_only )
138146
139147
140- def test_compression_binary (compression_only ):
148+ def test_compression_binary (compression_only , temp_file ):
141149 """
142150 Binary file handles support compression.
143151
@@ -150,13 +158,13 @@ def test_compression_binary(compression_only):
150158 )
151159
152160 # with a file
153- with tm . ensure_clean () as path :
154- with open (path , mode = "wb" ) as file :
155- df .to_csv (file , mode = "wb" , compression = compression_only )
156- file .seek (0 ) # file shouldn't be closed
157- tm .assert_frame_equal (
158- df , pd .read_csv (path , index_col = 0 , compression = compression_only )
159- )
161+ path = temp_file
162+ with open (path , mode = "wb" ) as file :
163+ df .to_csv (file , mode = "wb" , compression = compression_only )
164+ file .seek (0 ) # file shouldn't be closed
165+ tm .assert_frame_equal (
166+ df , pd .read_csv (path , index_col = 0 , compression = compression_only )
167+ )
160168
161169 # with BytesIO
162170 file = io .BytesIO ()
@@ -167,7 +175,7 @@ def test_compression_binary(compression_only):
167175 )
168176
169177
170- def test_gzip_reproducibility_file_name ():
178+ def test_gzip_reproducibility_file_name (temp_file ):
171179 """
172180 Gzip should create reproducible archives with mtime.
173181
@@ -183,13 +191,12 @@ def test_gzip_reproducibility_file_name():
183191 compression_options = {"method" : "gzip" , "mtime" : 1 }
184192
185193 # test for filename
186- with tm .ensure_clean () as path :
187- path = Path (path )
188- df .to_csv (path , compression = compression_options )
189- time .sleep (0.1 )
190- output = path .read_bytes ()
191- df .to_csv (path , compression = compression_options )
192- assert output == path .read_bytes ()
194+ path = temp_file
195+ df .to_csv (path , compression = compression_options )
196+ time .sleep (0.1 )
197+ output = path .read_bytes ()
198+ df .to_csv (path , compression = compression_options )
199+ assert output == path .read_bytes ()
193200
194201
195202def test_gzip_reproducibility_file_object ():
@@ -259,14 +266,14 @@ def test_with_missing_lzma_runtime():
259266 ],
260267)
261268@pytest .mark .parametrize ("method" , ["to_pickle" , "to_json" , "to_csv" ])
262- def test_gzip_compression_level (obj , method ):
269+ def test_gzip_compression_level (obj , method , temp_file ):
263270 # GH33196
264- with tm . ensure_clean () as path :
265- getattr (obj , method )(path , compression = "gzip" )
266- compressed_size_default = os .path .getsize (path )
267- getattr (obj , method )(path , compression = {"method" : "gzip" , "compresslevel" : 1 })
268- compressed_size_fast = os .path .getsize (path )
269- assert compressed_size_default < compressed_size_fast
271+ path = temp_file
272+ getattr (obj , method )(path , compression = "gzip" )
273+ compressed_size_default = os .path .getsize (path )
274+ getattr (obj , method )(path , compression = {"method" : "gzip" , "compresslevel" : 1 })
275+ compressed_size_fast = os .path .getsize (path )
276+ assert compressed_size_default < compressed_size_fast
270277
271278
272279@pytest .mark .parametrize (
@@ -280,15 +287,15 @@ def test_gzip_compression_level(obj, method):
280287 ],
281288)
282289@pytest .mark .parametrize ("method" , ["to_pickle" , "to_json" , "to_csv" ])
283- def test_xz_compression_level_read (obj , method ):
284- with tm . ensure_clean () as path :
285- getattr (obj , method )(path , compression = "xz" )
286- compressed_size_default = os .path .getsize (path )
287- getattr (obj , method )(path , compression = {"method" : "xz" , "preset" : 1 })
288- compressed_size_fast = os .path .getsize (path )
289- assert compressed_size_default < compressed_size_fast
290- if method == "to_csv" :
291- pd .read_csv (path , compression = "xz" )
290+ def test_xz_compression_level_read (obj , method , temp_file ):
291+ path = temp_file
292+ getattr (obj , method )(path , compression = "xz" )
293+ compressed_size_default = os .path .getsize (path )
294+ getattr (obj , method )(path , compression = {"method" : "xz" , "preset" : 1 })
295+ compressed_size_fast = os .path .getsize (path )
296+ assert compressed_size_default < compressed_size_fast
297+ if method == "to_csv" :
298+ pd .read_csv (path , compression = "xz" )
292299
293300
294301@pytest .mark .parametrize (
@@ -302,13 +309,13 @@ def test_xz_compression_level_read(obj, method):
302309 ],
303310)
304311@pytest .mark .parametrize ("method" , ["to_pickle" , "to_json" , "to_csv" ])
305- def test_bzip_compression_level (obj , method ):
312+ def test_bzip_compression_level (obj , method , temp_file ):
306313 """GH33196 bzip needs file size > 100k to show a size difference between
307314 compression levels, so here we just check if the call works when
308315 compression is passed as a dict.
309316 """
310- with tm . ensure_clean () as path :
311- getattr (obj , method )(path , compression = {"method" : "bz2" , "compresslevel" : 1 })
317+ path = temp_file
318+ getattr (obj , method )(path , compression = {"method" : "bz2" , "compresslevel" : 1 })
312319
313320
314321@pytest .mark .parametrize (
@@ -318,21 +325,21 @@ def test_bzip_compression_level(obj, method):
318325 (".tar" , tarfile .TarFile ),
319326 ],
320327)
321- def test_empty_archive_zip (suffix , archive ):
322- with tm . ensure_clean ( filename = suffix ) as path :
323- with archive (path , "w" ):
324- pass
325- with pytest .raises (ValueError , match = "Zero files found" ):
326- pd .read_csv (path )
328+ def test_empty_archive_zip (suffix , archive , temp_file ):
329+ path = temp_file . parent / f"archive { suffix } "
330+ with archive (path , "w" ):
331+ pass
332+ with pytest .raises (ValueError , match = "Zero files found" ):
333+ pd .read_csv (path )
327334
328335
329- def test_ambiguous_archive_zip ():
330- with tm . ensure_clean ( filename = " .zip") as path :
331- with zipfile .ZipFile (path , "w" ) as file :
332- file .writestr ("a.csv" , "foo,bar" )
333- file .writestr ("b.csv" , "foo,bar" )
334- with pytest .raises (ValueError , match = "Multiple files found in ZIP file" ):
335- pd .read_csv (path )
336+ def test_ambiguous_archive_zip (temp_file ):
337+ path = temp_file . parent / "archive .zip"
338+ with zipfile .ZipFile (path , "w" ) as file :
339+ file .writestr ("a.csv" , "foo,bar" )
340+ file .writestr ("b.csv" , "foo,bar" )
341+ with pytest .raises (ValueError , match = "Multiple files found in ZIP file" ):
342+ pd .read_csv (path )
336343
337344
338345def test_ambiguous_archive_tar (tmp_path ):
@@ -352,24 +359,24 @@ def test_ambiguous_archive_tar(tmp_path):
352359 pd .read_csv (tarpath )
353360
354361
355- def test_tar_gz_to_different_filename ():
356- with tm . ensure_clean ( filename = " .foo") as file :
357- pd .DataFrame (
358- [["1" , "2" ]],
359- columns = ["foo" , "bar" ],
360- ).to_csv (file , compression = {"method" : "tar" , "mode" : "w:gz" }, index = False )
361- with gzip .open (file ) as uncompressed :
362- with tarfile .TarFile (fileobj = uncompressed ) as archive :
363- members = archive .getmembers ()
364- assert len (members ) == 1
365- content = archive .extractfile (members [0 ]).read ().decode ("utf8" )
366-
367- if is_platform_windows ():
368- expected = "foo,bar\r \n 1,2\r \n "
369- else :
370- expected = "foo,bar\n 1,2\n "
371-
372- assert content == expected
362+ def test_tar_gz_to_different_filename (temp_file ):
363+ file = temp_file . parent / "archive .foo"
364+ pd .DataFrame (
365+ [["1" , "2" ]],
366+ columns = ["foo" , "bar" ],
367+ ).to_csv (file , compression = {"method" : "tar" , "mode" : "w:gz" }, index = False )
368+ with gzip .open (file ) as uncompressed :
369+ with tarfile .TarFile (fileobj = uncompressed ) as archive :
370+ members = archive .getmembers ()
371+ assert len (members ) == 1
372+ content = archive .extractfile (members [0 ]).read ().decode ("utf8" )
373+
374+ if is_platform_windows ():
375+ expected = "foo,bar\r \n 1,2\r \n "
376+ else :
377+ expected = "foo,bar\n 1,2\n "
378+
379+ assert content == expected
373380
374381
375382def test_tar_no_error_on_close ():
0 commit comments