@@ -105,7 +105,7 @@ def test_options_py(df_compat, pa):
105105 with pd .option_context ('io.parquet.engine' , 'pyarrow' ):
106106 df .to_parquet (path )
107107
108- result = read_parquet (path , compression = None )
108+ result = read_parquet (path )
109109 tm .assert_frame_equal (result , df )
110110
111111
@@ -118,7 +118,7 @@ def test_options_fp(df_compat, fp):
118118 with pd .option_context ('io.parquet.engine' , 'fastparquet' ):
119119 df .to_parquet (path , compression = None )
120120
121- result = read_parquet (path , compression = None )
121+ result = read_parquet (path )
122122 tm .assert_frame_equal (result , df )
123123
124124
@@ -130,7 +130,7 @@ def test_options_auto(df_compat, fp, pa):
130130 with pd .option_context ('io.parquet.engine' , 'auto' ):
131131 df .to_parquet (path )
132132
133- result = read_parquet (path , compression = None )
133+ result = read_parquet (path )
134134 tm .assert_frame_equal (result , df )
135135
136136
@@ -162,7 +162,7 @@ def test_cross_engine_pa_fp(df_cross_compat, pa, fp):
162162 with tm .ensure_clean () as path :
163163 df .to_parquet (path , engine = pa , compression = None )
164164
165- result = read_parquet (path , engine = fp , compression = None )
165+ result = read_parquet (path , engine = fp )
166166 tm .assert_frame_equal (result , df )
167167
168168
@@ -174,7 +174,7 @@ def test_cross_engine_fp_pa(df_cross_compat, pa, fp):
174174 with tm .ensure_clean () as path :
175175 df .to_parquet (path , engine = fp , compression = None )
176176
177- result = read_parquet (path , engine = pa , compression = None )
177+ result = read_parquet (path , engine = pa )
178178 tm .assert_frame_equal (result , df )
179179
180180
@@ -188,19 +188,23 @@ def check_error_on_write(self, df, engine, exc):
188188 with tm .ensure_clean () as path :
189189 to_parquet (df , path , engine , compression = None )
190190
191- def check_round_trip (self , df , engine , expected = None , ** kwargs ):
192-
191+ def check_round_trip (self , df , engine , expected = None ,
192+ write_kwargs = None , read_kwargs = None ):
193+ if write_kwargs is None :
194+ write_kwargs = {}
195+ if read_kwargs is None :
196+ read_kwargs = {}
193197 with tm .ensure_clean () as path :
194- df .to_parquet (path , engine , ** kwargs )
195- result = read_parquet (path , engine , ** kwargs )
198+ df .to_parquet (path , engine , ** write_kwargs )
199+ result = read_parquet (path , engine , ** read_kwargs )
196200
197201 if expected is None :
198202 expected = df
199203 tm .assert_frame_equal (result , expected )
200204
201205 # repeat
202- to_parquet (df , path , engine , ** kwargs )
203- result = pd .read_parquet (path , engine , ** kwargs )
206+ to_parquet (df , path , engine , ** write_kwargs )
207+ result = pd .read_parquet (path , engine , ** read_kwargs )
204208
205209 if expected is None :
206210 expected = df
@@ -222,7 +226,7 @@ def test_columns_dtypes(self, engine):
222226
223227 # unicode
224228 df .columns = [u'foo' , u'bar' ]
225- self .check_round_trip (df , engine , compression = None )
229+ self .check_round_trip (df , engine , write_kwargs = { 'compression' : None } )
226230
227231 def test_columns_dtypes_invalid (self , engine ):
228232
@@ -246,7 +250,7 @@ def test_columns_dtypes_invalid(self, engine):
246250 def test_write_with_index (self , engine ):
247251
248252 df = pd .DataFrame ({'A' : [1 , 2 , 3 ]})
249- self .check_round_trip (df , engine , compression = None )
253+ self .check_round_trip (df , engine , write_kwargs = { 'compression' : None } )
250254
251255 # non-default index
252256 for index in [[2 , 3 , 4 ],
@@ -280,7 +284,8 @@ def test_compression(self, engine, compression):
280284 pytest .importorskip ('brotli' )
281285
282286 df = pd .DataFrame ({'A' : [1 , 2 , 3 ]})
283- self .check_round_trip (df , engine , compression = compression )
287+ self .check_round_trip (df , engine ,
288+ write_kwargs = {'compression' : compression })
284289
285290 def test_read_columns (self , engine ):
286291 # GH18154
@@ -289,7 +294,8 @@ def test_read_columns(self, engine):
289294
290295 expected = pd .DataFrame ({'string' : list ('abc' )})
291296 self .check_round_trip (df , engine , expected = expected ,
292- compression = None , columns = ["string" ])
297+ write_kwargs = {'compression' : None },
298+ read_kwargs = {'columns' : ['string' ]})
293299
294300
295301class TestParquetPyArrow (Base ):
@@ -377,7 +383,7 @@ def test_basic(self, fp):
377383 'timedelta' : pd .timedelta_range ('1 day' , periods = 3 ),
378384 })
379385
380- self .check_round_trip (df , fp , compression = None )
386+ self .check_round_trip (df , fp , write_kwargs = { 'compression' : None } )
381387
382388 @pytest .mark .skip (reason = "not supported" )
383389 def test_duplicate_columns (self , fp ):
@@ -390,7 +396,8 @@ def test_duplicate_columns(self, fp):
390396 def test_bool_with_none (self , fp ):
391397 df = pd .DataFrame ({'a' : [True , None , False ]})
392398 expected = pd .DataFrame ({'a' : [1.0 , np .nan , 0.0 ]}, dtype = 'float16' )
393- self .check_round_trip (df , fp , expected = expected , compression = None )
399+ self .check_round_trip (df , fp , expected = expected ,
400+ write_kwargs = {'compression' : None })
394401
395402 def test_unsupported (self , fp ):
396403
@@ -406,7 +413,7 @@ def test_categorical(self, fp):
406413 if LooseVersion (fastparquet .__version__ ) < LooseVersion ("0.1.3" ):
407414 pytest .skip ("CategoricalDtype not supported for older fp" )
408415 df = pd .DataFrame ({'a' : pd .Categorical (list ('abc' ))})
409- self .check_round_trip (df , fp , compression = None )
416+ self .check_round_trip (df , fp , write_kwargs = { 'compression' : None } )
410417
411418 def test_datetime_tz (self , fp ):
412419 # doesn't preserve tz
@@ -416,4 +423,13 @@ def test_datetime_tz(self, fp):
416423 # warns on the coercion
417424 with catch_warnings (record = True ):
418425 self .check_round_trip (df , fp , df .astype ('datetime64[ns]' ),
419- compression = None )
426+ write_kwargs = {'compression' : None })
427+
428+ def test_filter_row_groups (self , fp ):
429+ d = {'a' : list (range (0 , 3 ))}
430+ df = pd .DataFrame (d )
431+ with tm .ensure_clean () as path :
432+ df .to_parquet (path , fp , compression = None ,
433+ row_group_offsets = 1 )
434+ result = read_parquet (path , fp , filters = [('a' , '==' , 0 )])
435+ assert len (result ) == 1
0 commit comments