ENH: to_excel(autofilter=...) apply Excel autofilter over written range for xlsxwriter/openpyxl; keep engine_kwargs semantics intact

antznette1 · antznette1 · commit e5975accce4a · 2025-11-06T01:04:04.000+01:00
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -773,7 +773,6 @@ def _set_axis(self, axis: AxisInt, labels: AnyArrayLike | list) -> None:
         """
         labels = ensure_index(labels)
         self._mgr.set_axis(axis, labels)
-
     @final
     @doc(klass=_shared_doc_kwargs["klass"])
     def droplevel(self, level: IndexLabel, axis: Axis = 0) -> Self:
@@ -1515,7 +1514,6 @@ def __bool__(self) -> NoReturn:
             f"The truth value of a {type(self).__name__} is ambiguous. "
             "Use a.empty, a.bool(), a.item(), a.any() or a.all()."
         )
-
     @final
     def abs(self) -> Self:
         """
@@ -2180,6 +2178,141 @@ def to_excel(
         freeze_panes: tuple[int, int] | None = None,
         storage_options: StorageOptions | None = None,
         engine_kwargs: dict[str, Any] | None = None,
+        autofilter: bool = False,
+    ) -> None:
+        """
+        Write object to an Excel sheet.
+
+        To write a single object to an Excel .xlsx file it is only necessary
+        to specify a target file name.
+
+        .. code-block:: python
+
+            df.to_excel("path_to_file.xlsx")
+
+        To write to different sheets of the same .xlsx file it is necessary to
+        create an `ExcelWriter` object with a target file name,
+        and specify a sheet in the file to write to.
+
+        .. code-block:: python
+
+            with pd.ExcelWriter("path_to_file.xlsx") as writer:
+                df1.to_excel(writer, sheet_name="Sheet_name_1")
+                df2.to_excel(writer, sheet_name="Sheet_name_2")
+
+        When using `ExcelWriter`, note that the objects are not written until the
+        `ExcelWriter` object is closed.
+
+        Parameters
+        ----------
+        excel_writer : string, path object or ExcelWriter object
+            File path or existing ExcelWriter
+            If a string is passed, a new ExcelWriter object is created.
+        sheet_name : str, default 'Sheet1'
+            Name of sheet which will contain DataFrame.
+        na_rep : str, default ''
+            Missing data representation
+        float_format : str, default None
+            Format string for floating point numbers
+        columns : sequence, optional
+            Columns to write
+        header : bool or list of str, default True
+            Write out the column names. If a list of string is given
+            it is assumed to be aliases for the column names
+        index : bool, default True
+            Write row names (index)
+        index_label : str or sequence, default None
+            Column label for index column(s) if desired. If None is given, and
+            `header` and `index` are True, then the index names are used. A
+            sequence should be given if the DataFrame uses MultiIndex.
+        startrow : int, default 0
+            Upper left cell row to dump data frame.
+            Per default (0) header is written, too.
+        startcol : int, default 0
+            Upper left cell column to dump data frame.
+        engine : str, optional
+            Write engine to use, 'openpyxl' or 'xlsxwriter'.
+            Defaults to 'xlsxwriter'.
+        merge_cells : bool, default True
+            Write MultiIndex and Hierarchical Rows as merged cells.
+            The indices corresponding to each row will be combined and
+            presented as a single cell.
+        inf_rep : str, default 'inf'
+            Representation for infinity (there is no native Numpy representation
+            for infinity in integer dtypes)
+        freeze_panes : tuple of int (length 2), default None
+            First rows to freeze panes on. Only applicable when `freeze_panes`
+            is passed as a tuple.
+        storage_options : dict, optional
+            Extra options that make sense for a particular storage connection,
+            e.g. host, port, username, password, etc., if using a URL that
+            requires authentication.
+        engine_kwargs : dict, optional
+            Arbitrary keyword arguments passed to excel engine.
+        autofilter : bool, default False
+            Whether to apply autofilter to the header row.
+
+        See Also
+        --------
+        read_excel : Read from an Excel file into a DataFrame.
+        ExcelFile : Class for parsing tabular excel files.
+        ExcelWriter : Class for writing DataFrame objects into excel sheets.
+
+        Notes
+        -----
+        The `engine` keyword is not supported when `excel_writer` is an
+        existing `ExcelWriter`.
+
+        Examples
+        --------
+        >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
+        >>> df.to_excel("pandas_simple.xlsx")
+        >>> df.to_excel("pandas_simple.xlsx", engine="openpyxl")
+        """
+        if isinstance(excel_writer, ExcelWriter):
+            if engine is not None:
+                raise ValueError(
+                    "engine should not be specified when passing an ExcelWriter"
+                )
+            engine = excel_writer.engine
+        else:
+            excel_writer = ExcelWriter(
+                excel_writer,
+                engine=engine,
+                mode=mode,
+                if_sheet_exists=if_sheet_exists,
+                engine_kwargs=engine_kwargs,
+                date_format=date_format,
+                datetime_format=datetime_format,
+                storage_options=storage_options,
+            )
+
+        formatter = ExcelFormatter(
+            self,
+            na_rep=na_rep,
+            float_format=float_format,
+            columns=columns,
+            header=header,
+            index=index,
+            index_label=index_label,
+            inf_rep=inf_rep,
+        )
+
+        formatter.write(
+            excel_writer,
+            sheet_name=sheet_name,
+            startrow=startrow,
+            startcol=startcol,
+            freeze_panes=freeze_panes,
+            engine=engine,
+            storage_options=storage_options,
+            engine_kwargs=engine_kwargs,
+            autofilter=autofilter,
+        )
+
+        if not isinstance(excel_writer, ExcelWriter):
+            # we need to close the writer if we created it
+            excel_writer.close()
     ) -> None:
         """
         Write {klass} to an Excel sheet.
@@ -4851,7 +4984,6 @@ def sort_values(
         ignore_index: bool = ...,
         key: ValueKeyFunc = ...,
     ) -> Self: ...
-
     @overload
     def sort_values(
         self,
@@ -5627,7 +5759,6 @@ def f(x) -> bool:
             return self.loc(axis=axis)[values]
         else:
             raise TypeError("Must pass either `items`, `like`, or `regex`")
-
     @final
     def head(self, n: int = 5) -> Self:
         """
@@ -6100,8 +6231,7 @@ def __finalize__(self, other, method: str | None = None, **kwargs) -> Self:
         ----------
         other : the object from which to get the attributes that we are going
             to propagate. If ``other`` has an ``input_objs`` attribute, then
-            this attribute must contain an iterable of objects, each with an
-            ``attrs`` attribute.
+            this attribute must contain an iterable of objects, each with an ``attrs`` attribute.
         method : str, optional
             A passed method name providing context on where ``__finalize__``
             was called.
@@ -9614,10 +9744,10 @@ def align(
         1  1  2  3  4
         2  6  7  8  9
         >>> other
-            A    B    C    D
-        2   10   20   30   40
-        3   60   70   80   90
-        4  600  700  800  900
+            A    B    C    D   E
+        2   10   20   30   40 NaN
+        3   60   70   80   90 NaN
+        4  600  700  800  900 NaN
 
         Align on columns:
 
@@ -9706,7 +9836,6 @@ def align(
         left = left.__finalize__(self)
         right = right.__finalize__(other)
         return left, right
-
     @final
     def _align_frame(
         self,
@@ -12044,7 +12173,6 @@ def last_valid_index(self) -> Hashable:
 {see_also}\
 {examples}
 """
-
 _sum_prod_doc = """
 {desc}
 
@@ -12826,8 +12954,6 @@ def last_valid_index(self) -> Hashable:
     The required number of valid values to perform the operation. If fewer than
     ``min_count`` non-NA values are present the result will be NA.
 """
-
-
 def make_doc(name: str, ndim: int) -> str:
     """
     Generate the docstring for a Series/DataFrame reduction.
@@ -13194,4 +13320,4 @@ def make_doc(name: str, ndim: int) -> str:
         examples=examples,
         **kwargs,
     )
-    return docstr
+    return docstr
diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
@@ -1209,6 +1209,7 @@ def _write_cells(
         startrow: int = 0,
         startcol: int = 0,
         freeze_panes: tuple[int, int] | None = None,
+        autofilter: bool = False,
     ) -> None:
         """
         Write given formatted cells into Excel an excel sheet
@@ -1223,6 +1224,8 @@ def _write_cells(
         startcol : upper left cell column to dump data frame
         freeze_panes: int tuple of length 2
             contains the bottom-most row and right-most column to freeze
+        autofilter : bool, default False
+            If True, apply an autofilter to the header row over the written data range.
         """
         raise NotImplementedError
 
diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py
@@ -449,6 +449,7 @@ def _write_cells(
         startrow: int = 0,
         startcol: int = 0,
         freeze_panes: tuple[int, int] | None = None,
+        autofilter: bool = False,
     ) -> None:
         # Write the frame cells using openpyxl.
         sheet_name = self._get_sheet_name(sheet_name)
@@ -486,6 +487,11 @@ def _write_cells(
                 row=freeze_panes[0] + 1, column=freeze_panes[1] + 1
             )
 
+        min_r = None
+        min_c = None
+        max_r = None
+        max_c = None
+
         for cell in cells:
             xcell = wks.cell(
                 row=startrow + cell.row + 1, column=startcol + cell.col + 1
@@ -506,10 +512,23 @@ def _write_cells(
                 for k, v in style_kwargs.items():
                     setattr(xcell, k, v)
 
+            abs_row = startrow + cell.row + 1
+            abs_col = startcol + cell.col + 1
+
+            # track bounds (1-based for openpyxl)
+            if min_r is None or abs_row < min_r:
+                min_r = abs_row
+            if min_c is None or abs_col < min_c:
+                min_c = abs_col
+            if max_r is None or abs_row > max_r:
+                max_r = abs_row
+            if max_c is None or abs_col > max_c:
+                max_c = abs_col
+
             if cell.mergestart is not None and cell.mergeend is not None:
                 wks.merge_cells(
-                    start_row=startrow + cell.row + 1,
-                    start_column=startcol + cell.col + 1,
+                    start_row=abs_row,
+                    start_column=abs_col,
                     end_column=startcol + cell.mergeend + 1,
                     end_row=startrow + cell.mergestart + 1,
                 )
@@ -532,6 +551,17 @@ def _write_cells(
                             for k, v in style_kwargs.items():
                                 setattr(xcell, k, v)
 
+        if autofilter and min_r is not None and min_c is not None and max_r is not None and max_c is not None:
+            try:
+                # Convert numeric bounds to Excel-style range e.g. A1:D10
+                from openpyxl.utils import get_column_letter
+
+                start_ref = f"{get_column_letter(min_c)}{min_r}"
+                end_ref = f"{get_column_letter(max_c)}{max_r}"
+                wks.auto_filter.ref = f"{start_ref}:{end_ref}"
+            except Exception:
+                pass
+
 
 class OpenpyxlReader(BaseExcelReader["Workbook"]):
     @doc(storage_options=_shared_docs["storage_options"])
diff --git a/pandas/io/excel/_xlsxwriter.py b/pandas/io/excel/_xlsxwriter.py
@@ -245,6 +245,7 @@ def _write_cells(
         startrow: int = 0,
         startcol: int = 0,
         freeze_panes: tuple[int, int] | None = None,
+        autofilter: bool = False,
     ) -> None:
         # Write the frame cells using xlsxwriter.
         sheet_name = self._get_sheet_name(sheet_name)
@@ -258,6 +259,11 @@ def _write_cells(
         if validate_freeze_panes(freeze_panes):
             wks.freeze_panes(*(freeze_panes))
 
+        min_r = None
+        min_c = None
+        max_r = None
+        max_c = None
+
         for cell in cells:
             val, fmt = self._value_with_fmt(cell.val)
 
@@ -271,14 +277,35 @@ def _write_cells(
                 style = self.book.add_format(_XlsxStyler.convert(cell.style, fmt))
                 style_dict[stylekey] = style
 
+            abs_row = startrow + cell.row
+            abs_col = startcol + cell.col
+
+            # track bounds
+            if min_r is None or abs_row < min_r:
+                min_r = abs_row
+            if min_c is None or abs_col < min_c:
+                min_c = abs_col
+            if max_r is None or abs_row > max_r:
+                max_r = abs_row
+            if max_c is None or abs_col > max_c:
+                max_c = abs_col
+
             if cell.mergestart is not None and cell.mergeend is not None:
                 wks.merge_range(
-                    startrow + cell.row,
-                    startcol + cell.col,
+                    abs_row,
+                    abs_col,
                     startrow + cell.mergestart,
                     startcol + cell.mergeend,
                     val,
                     style,
                 )
             else:
-                wks.write(startrow + cell.row, startcol + cell.col, val, style)
+                wks.write(abs_row, abs_col, val, style)
+
+        if autofilter and min_r is not None and min_c is not None and max_r is not None and max_c is not None:
+            # Apply autofilter over the used range. xlsxwriter uses 0-based indices.
+            try:
+                wks.autofilter(min_r, min_c, max_r, max_c)
+            except Exception:
+                # Be resilient if engine version doesn't support or range invalid
+                pass
diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py
@@ -884,6 +884,7 @@ def write(
         engine: str | None = None,
         storage_options: StorageOptions | None = None,
         engine_kwargs: dict | None = None,
+        autofilter: bool = False,
     ) -> None:
         """
         writer : path-like, file-like, or ExcelWriter object
@@ -938,6 +939,7 @@ def write(
                 startrow=startrow,
                 startcol=startcol,
                 freeze_panes=freeze_panes,
+                autofilter=autofilter,
             )
         finally:
             # make sure to close opened file handles