Skip to content

Commit e5975ac

Browse files
committed
ENH: to_excel(autofilter=...) apply Excel autofilter over written range for xlsxwriter/openpyxl; keep engine_kwargs semantics intact
1 parent 930b66d commit e5975ac

File tree

5 files changed

+208
-20
lines changed

5 files changed

+208
-20
lines changed

pandas/core/generic.py

Lines changed: 141 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -773,7 +773,6 @@ def _set_axis(self, axis: AxisInt, labels: AnyArrayLike | list) -> None:
773773
"""
774774
labels = ensure_index(labels)
775775
self._mgr.set_axis(axis, labels)
776-
777776
@final
778777
@doc(klass=_shared_doc_kwargs["klass"])
779778
def droplevel(self, level: IndexLabel, axis: Axis = 0) -> Self:
@@ -1515,7 +1514,6 @@ def __bool__(self) -> NoReturn:
15151514
f"The truth value of a {type(self).__name__} is ambiguous. "
15161515
"Use a.empty, a.bool(), a.item(), a.any() or a.all()."
15171516
)
1518-
15191517
@final
15201518
def abs(self) -> Self:
15211519
"""
@@ -2180,6 +2178,141 @@ def to_excel(
21802178
freeze_panes: tuple[int, int] | None = None,
21812179
storage_options: StorageOptions | None = None,
21822180
engine_kwargs: dict[str, Any] | None = None,
2181+
autofilter: bool = False,
2182+
) -> None:
2183+
"""
2184+
Write object to an Excel sheet.
2185+
2186+
To write a single object to an Excel .xlsx file it is only necessary
2187+
to specify a target file name.
2188+
2189+
.. code-block:: python
2190+
2191+
df.to_excel("path_to_file.xlsx")
2192+
2193+
To write to different sheets of the same .xlsx file it is necessary to
2194+
create an `ExcelWriter` object with a target file name,
2195+
and specify a sheet in the file to write to.
2196+
2197+
.. code-block:: python
2198+
2199+
with pd.ExcelWriter("path_to_file.xlsx") as writer:
2200+
df1.to_excel(writer, sheet_name="Sheet_name_1")
2201+
df2.to_excel(writer, sheet_name="Sheet_name_2")
2202+
2203+
When using `ExcelWriter`, note that the objects are not written until the
2204+
`ExcelWriter` object is closed.
2205+
2206+
Parameters
2207+
----------
2208+
excel_writer : string, path object or ExcelWriter object
2209+
File path or existing ExcelWriter
2210+
If a string is passed, a new ExcelWriter object is created.
2211+
sheet_name : str, default 'Sheet1'
2212+
Name of sheet which will contain DataFrame.
2213+
na_rep : str, default ''
2214+
Missing data representation
2215+
float_format : str, default None
2216+
Format string for floating point numbers
2217+
columns : sequence, optional
2218+
Columns to write
2219+
header : bool or list of str, default True
2220+
Write out the column names. If a list of string is given
2221+
it is assumed to be aliases for the column names
2222+
index : bool, default True
2223+
Write row names (index)
2224+
index_label : str or sequence, default None
2225+
Column label for index column(s) if desired. If None is given, and
2226+
`header` and `index` are True, then the index names are used. A
2227+
sequence should be given if the DataFrame uses MultiIndex.
2228+
startrow : int, default 0
2229+
Upper left cell row to dump data frame.
2230+
Per default (0) header is written, too.
2231+
startcol : int, default 0
2232+
Upper left cell column to dump data frame.
2233+
engine : str, optional
2234+
Write engine to use, 'openpyxl' or 'xlsxwriter'.
2235+
Defaults to 'xlsxwriter'.
2236+
merge_cells : bool, default True
2237+
Write MultiIndex and Hierarchical Rows as merged cells.
2238+
The indices corresponding to each row will be combined and
2239+
presented as a single cell.
2240+
inf_rep : str, default 'inf'
2241+
Representation for infinity (there is no native Numpy representation
2242+
for infinity in integer dtypes)
2243+
freeze_panes : tuple of int (length 2), default None
2244+
First rows to freeze panes on. Only applicable when `freeze_panes`
2245+
is passed as a tuple.
2246+
storage_options : dict, optional
2247+
Extra options that make sense for a particular storage connection,
2248+
e.g. host, port, username, password, etc., if using a URL that
2249+
requires authentication.
2250+
engine_kwargs : dict, optional
2251+
Arbitrary keyword arguments passed to excel engine.
2252+
autofilter : bool, default False
2253+
Whether to apply autofilter to the header row.
2254+
2255+
See Also
2256+
--------
2257+
read_excel : Read from an Excel file into a DataFrame.
2258+
ExcelFile : Class for parsing tabular excel files.
2259+
ExcelWriter : Class for writing DataFrame objects into excel sheets.
2260+
2261+
Notes
2262+
-----
2263+
The `engine` keyword is not supported when `excel_writer` is an
2264+
existing `ExcelWriter`.
2265+
2266+
Examples
2267+
--------
2268+
>>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
2269+
>>> df.to_excel("pandas_simple.xlsx")
2270+
>>> df.to_excel("pandas_simple.xlsx", engine="openpyxl")
2271+
"""
2272+
if isinstance(excel_writer, ExcelWriter):
2273+
if engine is not None:
2274+
raise ValueError(
2275+
"engine should not be specified when passing an ExcelWriter"
2276+
)
2277+
engine = excel_writer.engine
2278+
else:
2279+
excel_writer = ExcelWriter(
2280+
excel_writer,
2281+
engine=engine,
2282+
mode=mode,
2283+
if_sheet_exists=if_sheet_exists,
2284+
engine_kwargs=engine_kwargs,
2285+
date_format=date_format,
2286+
datetime_format=datetime_format,
2287+
storage_options=storage_options,
2288+
)
2289+
2290+
formatter = ExcelFormatter(
2291+
self,
2292+
na_rep=na_rep,
2293+
float_format=float_format,
2294+
columns=columns,
2295+
header=header,
2296+
index=index,
2297+
index_label=index_label,
2298+
inf_rep=inf_rep,
2299+
)
2300+
2301+
formatter.write(
2302+
excel_writer,
2303+
sheet_name=sheet_name,
2304+
startrow=startrow,
2305+
startcol=startcol,
2306+
freeze_panes=freeze_panes,
2307+
engine=engine,
2308+
storage_options=storage_options,
2309+
engine_kwargs=engine_kwargs,
2310+
autofilter=autofilter,
2311+
)
2312+
2313+
if not isinstance(excel_writer, ExcelWriter):
2314+
# we need to close the writer if we created it
2315+
excel_writer.close()
21832316
) -> None:
21842317
"""
21852318
Write {klass} to an Excel sheet.
@@ -4851,7 +4984,6 @@ def sort_values(
48514984
ignore_index: bool = ...,
48524985
key: ValueKeyFunc = ...,
48534986
) -> Self: ...
4854-
48554987
@overload
48564988
def sort_values(
48574989
self,
@@ -5627,7 +5759,6 @@ def f(x) -> bool:
56275759
return self.loc(axis=axis)[values]
56285760
else:
56295761
raise TypeError("Must pass either `items`, `like`, or `regex`")
5630-
56315762
@final
56325763
def head(self, n: int = 5) -> Self:
56335764
"""
@@ -6100,8 +6231,7 @@ def __finalize__(self, other, method: str | None = None, **kwargs) -> Self:
61006231
----------
61016232
other : the object from which to get the attributes that we are going
61026233
to propagate. If ``other`` has an ``input_objs`` attribute, then
6103-
this attribute must contain an iterable of objects, each with an
6104-
``attrs`` attribute.
6234+
this attribute must contain an iterable of objects, each with an ``attrs`` attribute.
61056235
method : str, optional
61066236
A passed method name providing context on where ``__finalize__``
61076237
was called.
@@ -9614,10 +9744,10 @@ def align(
96149744
1 1 2 3 4
96159745
2 6 7 8 9
96169746
>>> other
9617-
A B C D
9618-
2 10 20 30 40
9619-
3 60 70 80 90
9620-
4 600 700 800 900
9747+
A B C D E
9748+
2 10 20 30 40 NaN
9749+
3 60 70 80 90 NaN
9750+
4 600 700 800 900 NaN
96219751
96229752
Align on columns:
96239753
@@ -9706,7 +9836,6 @@ def align(
97069836
left = left.__finalize__(self)
97079837
right = right.__finalize__(other)
97089838
return left, right
9709-
97109839
@final
97119840
def _align_frame(
97129841
self,
@@ -12044,7 +12173,6 @@ def last_valid_index(self) -> Hashable:
1204412173
{see_also}\
1204512174
{examples}
1204612175
"""
12047-
1204812176
_sum_prod_doc = """
1204912177
{desc}
1205012178
@@ -12826,8 +12954,6 @@ def last_valid_index(self) -> Hashable:
1282612954
The required number of valid values to perform the operation. If fewer than
1282712955
``min_count`` non-NA values are present the result will be NA.
1282812956
"""
12829-
12830-
1283112957
def make_doc(name: str, ndim: int) -> str:
1283212958
"""
1283312959
Generate the docstring for a Series/DataFrame reduction.
@@ -13194,4 +13320,4 @@ def make_doc(name: str, ndim: int) -> str:
1319413320
examples=examples,
1319513321
**kwargs,
1319613322
)
13197-
return docstr
13323+
return docstr

pandas/io/excel/_base.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1209,6 +1209,7 @@ def _write_cells(
12091209
startrow: int = 0,
12101210
startcol: int = 0,
12111211
freeze_panes: tuple[int, int] | None = None,
1212+
autofilter: bool = False,
12121213
) -> None:
12131214
"""
12141215
Write given formatted cells into Excel an excel sheet
@@ -1223,6 +1224,8 @@ def _write_cells(
12231224
startcol : upper left cell column to dump data frame
12241225
freeze_panes: int tuple of length 2
12251226
contains the bottom-most row and right-most column to freeze
1227+
autofilter : bool, default False
1228+
If True, apply an autofilter to the header row over the written data range.
12261229
"""
12271230
raise NotImplementedError
12281231

pandas/io/excel/_openpyxl.py

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -449,6 +449,7 @@ def _write_cells(
449449
startrow: int = 0,
450450
startcol: int = 0,
451451
freeze_panes: tuple[int, int] | None = None,
452+
autofilter: bool = False,
452453
) -> None:
453454
# Write the frame cells using openpyxl.
454455
sheet_name = self._get_sheet_name(sheet_name)
@@ -486,6 +487,11 @@ def _write_cells(
486487
row=freeze_panes[0] + 1, column=freeze_panes[1] + 1
487488
)
488489

490+
min_r = None
491+
min_c = None
492+
max_r = None
493+
max_c = None
494+
489495
for cell in cells:
490496
xcell = wks.cell(
491497
row=startrow + cell.row + 1, column=startcol + cell.col + 1
@@ -506,10 +512,23 @@ def _write_cells(
506512
for k, v in style_kwargs.items():
507513
setattr(xcell, k, v)
508514

515+
abs_row = startrow + cell.row + 1
516+
abs_col = startcol + cell.col + 1
517+
518+
# track bounds (1-based for openpyxl)
519+
if min_r is None or abs_row < min_r:
520+
min_r = abs_row
521+
if min_c is None or abs_col < min_c:
522+
min_c = abs_col
523+
if max_r is None or abs_row > max_r:
524+
max_r = abs_row
525+
if max_c is None or abs_col > max_c:
526+
max_c = abs_col
527+
509528
if cell.mergestart is not None and cell.mergeend is not None:
510529
wks.merge_cells(
511-
start_row=startrow + cell.row + 1,
512-
start_column=startcol + cell.col + 1,
530+
start_row=abs_row,
531+
start_column=abs_col,
513532
end_column=startcol + cell.mergeend + 1,
514533
end_row=startrow + cell.mergestart + 1,
515534
)
@@ -532,6 +551,17 @@ def _write_cells(
532551
for k, v in style_kwargs.items():
533552
setattr(xcell, k, v)
534553

554+
if autofilter and min_r is not None and min_c is not None and max_r is not None and max_c is not None:
555+
try:
556+
# Convert numeric bounds to Excel-style range e.g. A1:D10
557+
from openpyxl.utils import get_column_letter
558+
559+
start_ref = f"{get_column_letter(min_c)}{min_r}"
560+
end_ref = f"{get_column_letter(max_c)}{max_r}"
561+
wks.auto_filter.ref = f"{start_ref}:{end_ref}"
562+
except Exception:
563+
pass
564+
535565

536566
class OpenpyxlReader(BaseExcelReader["Workbook"]):
537567
@doc(storage_options=_shared_docs["storage_options"])

pandas/io/excel/_xlsxwriter.py

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,7 @@ def _write_cells(
245245
startrow: int = 0,
246246
startcol: int = 0,
247247
freeze_panes: tuple[int, int] | None = None,
248+
autofilter: bool = False,
248249
) -> None:
249250
# Write the frame cells using xlsxwriter.
250251
sheet_name = self._get_sheet_name(sheet_name)
@@ -258,6 +259,11 @@ def _write_cells(
258259
if validate_freeze_panes(freeze_panes):
259260
wks.freeze_panes(*(freeze_panes))
260261

262+
min_r = None
263+
min_c = None
264+
max_r = None
265+
max_c = None
266+
261267
for cell in cells:
262268
val, fmt = self._value_with_fmt(cell.val)
263269

@@ -271,14 +277,35 @@ def _write_cells(
271277
style = self.book.add_format(_XlsxStyler.convert(cell.style, fmt))
272278
style_dict[stylekey] = style
273279

280+
abs_row = startrow + cell.row
281+
abs_col = startcol + cell.col
282+
283+
# track bounds
284+
if min_r is None or abs_row < min_r:
285+
min_r = abs_row
286+
if min_c is None or abs_col < min_c:
287+
min_c = abs_col
288+
if max_r is None or abs_row > max_r:
289+
max_r = abs_row
290+
if max_c is None or abs_col > max_c:
291+
max_c = abs_col
292+
274293
if cell.mergestart is not None and cell.mergeend is not None:
275294
wks.merge_range(
276-
startrow + cell.row,
277-
startcol + cell.col,
295+
abs_row,
296+
abs_col,
278297
startrow + cell.mergestart,
279298
startcol + cell.mergeend,
280299
val,
281300
style,
282301
)
283302
else:
284-
wks.write(startrow + cell.row, startcol + cell.col, val, style)
303+
wks.write(abs_row, abs_col, val, style)
304+
305+
if autofilter and min_r is not None and min_c is not None and max_r is not None and max_c is not None:
306+
# Apply autofilter over the used range. xlsxwriter uses 0-based indices.
307+
try:
308+
wks.autofilter(min_r, min_c, max_r, max_c)
309+
except Exception:
310+
# Be resilient if engine version doesn't support or range invalid
311+
pass

pandas/io/formats/excel.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -884,6 +884,7 @@ def write(
884884
engine: str | None = None,
885885
storage_options: StorageOptions | None = None,
886886
engine_kwargs: dict | None = None,
887+
autofilter: bool = False,
887888
) -> None:
888889
"""
889890
writer : path-like, file-like, or ExcelWriter object
@@ -938,6 +939,7 @@ def write(
938939
startrow=startrow,
939940
startcol=startcol,
940941
freeze_panes=freeze_panes,
942+
autofilter=autofilter,
941943
)
942944
finally:
943945
# make sure to close opened file handles

0 commit comments

Comments
 (0)