@@ -228,6 +228,7 @@ def read(
228228 self ,
229229 path ,
230230 columns = None ,
231+ filters = None ,
231232 use_nullable_dtypes : bool = False ,
232233 dtype_backend : DtypeBackend | lib .NoDefault = lib .no_default ,
233234 storage_options : StorageOptions | None = None ,
@@ -257,7 +258,11 @@ def read(
257258 )
258259 try :
259260 pa_table = self .api .parquet .read_table (
260- path_or_handle , columns = columns , filesystem = filesystem , ** kwargs
261+ path_or_handle ,
262+ columns = columns ,
263+ filesystem = filesystem ,
264+ filters = filters ,
265+ ** kwargs ,
261266 )
262267 result = pa_table .to_pandas (** to_pandas_kwargs )
263268
@@ -335,6 +340,7 @@ def read(
335340 self ,
336341 path ,
337342 columns = None ,
343+ filters = None ,
338344 storage_options : StorageOptions | None = None ,
339345 filesystem = None ,
340346 ** kwargs ,
@@ -375,7 +381,7 @@ def read(
375381
376382 try :
377383 parquet_file = self .api .ParquetFile (path , ** parquet_kwargs )
378- return parquet_file .to_pandas (columns = columns , ** kwargs )
384+ return parquet_file .to_pandas (columns = columns , filters = filters , ** kwargs )
379385 finally :
380386 if handles is not None :
381387 handles .close ()
@@ -487,6 +493,7 @@ def read_parquet(
487493 use_nullable_dtypes : bool | lib .NoDefault = lib .no_default ,
488494 dtype_backend : DtypeBackend | lib .NoDefault = lib .no_default ,
489495 filesystem : Any = None ,
496+ filters : list [tuple ] | list [list [tuple ]] | None = None ,
490497 ** kwargs ,
491498) -> DataFrame :
492499 """
@@ -517,7 +524,6 @@ def read_parquet(
517524 if you wish to use its implementation.
518525 columns : list, default=None
519526 If not None, only these columns will be read from the file.
520-
521527 {storage_options}
522528
523529 .. versionadded:: 1.3.0
@@ -550,6 +556,24 @@ def read_parquet(
550556
551557 .. versionadded:: 2.1.0
552558
559+ filters : List[Tuple] or List[List[Tuple]], default None
560+ To filter out data.
561+ Filter syntax: [[(column, op, val), ...],...]
562+ where op is [==, =, >, >=, <, <=, !=, in, not in]
563+ The innermost tuples are transposed into a set of filters applied
564+ through an `AND` operation.
565+ The outer list combines these sets of filters through an `OR`
566+ operation.
567+ A single list of tuples can also be used, meaning that no `OR`
568+ operation between set of filters is to be conducted.
569+
570+ Using this argument will NOT result in row-wise filtering of the final
571+ partitions unless ``engine="pyarrow"`` is also specified. For
572+ other engines, filtering is only performed at the partition level, that is,
573+ to prevent the loading of some row-groups and/or files.
574+
575+ .. versionadded:: 2.1.0
576+
553577 **kwargs
554578 Any additional kwargs are passed to the engine.
555579
@@ -632,6 +656,7 @@ def read_parquet(
632656 return impl .read (
633657 path ,
634658 columns = columns ,
659+ filters = filters ,
635660 storage_options = storage_options ,
636661 use_nullable_dtypes = use_nullable_dtypes ,
637662 dtype_backend = dtype_backend ,
0 commit comments