From f9970170c0b900bdbbc7e8132e78cc263340e5b6 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Tue, 4 Nov 2025 09:46:54 +0000 Subject: [PATCH 1/2] Type `DataFrame` methods: `__dataframe__`, `first`, `last`, `asfreq`, `where` --- pandas-stubs/core/frame.pyi | 92 +++++++++++++++++++--- pandas-stubs/core/reshape/pivot.pyi | 54 ++++++------- pandas-stubs/plotting/_core.pyi | 90 +++++++++++++++------- tests/test_plotting.py | 115 +++++++++++++++++++++++++++- 4 files changed, 285 insertions(+), 66 deletions(-) diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi index a3d6f70ab..89fc4fc0d 100644 --- a/pandas-stubs/core/frame.pyi +++ b/pandas-stubs/core/frame.pyi @@ -55,6 +55,7 @@ from pandas.core.indexing import ( _LocIndexer, ) from pandas.core.reshape.pivot import ( + _PivotAggFunc, _PivotTableColumnsTypes, _PivotTableIndexTypes, _PivotTableValuesTypes, @@ -124,6 +125,7 @@ from pandas._typing import ( Level, ListLike, ListLikeExceptSeriesAndStr, + ListLikeHashable, ListLikeU, MaskType, MergeHow, @@ -167,6 +169,7 @@ from pandas._typing import ( from pandas.io.formats.style import Styler from pandas.plotting import PlotAccessor +from pandas.plotting._core import _BoxPlotT _T_MUTABLE_MAPPING_co = TypeVar( "_T_MUTABLE_MAPPING_co", bound=MutableMapping, covariant=True @@ -1361,10 +1364,12 @@ class DataFrame(NDFrame, OpsMixin, _GetItemHack): ) -> Self: ... def pivot_table( self, - values: _PivotTableValuesTypes = ..., - index: _PivotTableIndexTypes = ..., - columns: _PivotTableColumnsTypes = ..., - aggfunc="mean", + values: _PivotTableValuesTypes = None, + index: _PivotTableIndexTypes = None, + columns: _PivotTableColumnsTypes = None, + aggfunc: ( + _PivotAggFunc | Sequence[_PivotAggFunc] | Mapping[Hashable, _PivotAggFunc] + ) = "mean", fill_value: Scalar | None = None, margins: _bool = False, dropna: _bool = True, @@ -1696,8 +1701,9 @@ class DataFrame(NDFrame, OpsMixin, _GetItemHack): def plot(self) -> PlotAccessor: ... def hist( self, - column: _str | list[_str] | None = None, by: _str | ListLike | None = None, + bins: int | list = 10, + *, grid: _bool = True, xlabelsize: float | str | None = None, xrot: float | None = None, @@ -1708,24 +1714,88 @@ class DataFrame(NDFrame, OpsMixin, _GetItemHack): sharey: _bool = False, figsize: tuple[float, float] | None = None, layout: tuple[int, int] | None = None, - bins: int | list = 10, backend: _str | None = None, + legend: bool = False, **kwargs: Any, - ): ... + ) -> npt.NDArray[np.object_]: ... + + # Keep in sync with `pd.plotting.boxplot` + @overload def boxplot( self, - column: _str | list[_str] | None = None, - by: _str | ListLike | None = None, + by: None = None, ax: PlotAxes | None = None, fontsize: float | _str | None = None, rot: float = 0, grid: _bool = True, figsize: tuple[float, float] | None = None, layout: tuple[int, int] | None = None, - return_type: Literal["axes", "dict", "both"] | None = None, + *, + return_type: Literal["axes"] | None = None, backend: _str | None = None, **kwargs: Any, - ): ... + ) -> PlotAxes: ... + @overload + def boxplot( + self, + by: None = None, + ax: PlotAxes | None = None, + fontsize: float | _str | None = None, + rot: float = 0, + grid: _bool = True, + figsize: tuple[float, float] | None = None, + layout: tuple[int, int] | None = None, + *, + return_type: Literal["dict"], + backend: _str | None = None, + **kwargs: Any, + ) -> dict[str, PlotAxes]: ... + @overload + def boxplot( + self, + by: None = None, + ax: PlotAxes | None = None, + fontsize: float | _str | None = None, + rot: float = 0, + grid: _bool = True, + figsize: tuple[float, float] | None = None, + layout: tuple[int, int] | None = None, + *, + return_type: Literal["both"], + backend: _str | None = None, + **kwargs: Any, + ) -> _BoxPlotT: ... + @overload + def boxplot( + self, + by: Hashable | ListLikeHashable, + ax: PlotAxes | None = None, + fontsize: float | _str | None = None, + rot: float = 0, + grid: _bool = True, + figsize: tuple[float, float] | None = None, + layout: tuple[int, int] | None = None, + *, + return_type: None = None, + backend: _str | None = None, + **kwargs: Any, + ) -> PlotAxes: ... + @overload + def boxplot( + self, + by: Hashable | ListLikeHashable, + ax: PlotAxes | None = None, + fontsize: float | _str | None = None, + rot: float = 0, + grid: _bool = True, + figsize: tuple[float, float] | None = None, + layout: tuple[int, int] | None = None, + *, + return_type: Literal["axes", "dict", "both"], + backend: _str | None = None, + **kwargs: Any, + ) -> Series: ... + sparse = ... # The rest of these are remnants from the diff --git a/pandas-stubs/core/reshape/pivot.pyi b/pandas-stubs/core/reshape/pivot.pyi index 8e8382798..85c1ebfb8 100644 --- a/pandas-stubs/core/reshape/pivot.pyi +++ b/pandas-stubs/core/reshape/pivot.pyi @@ -64,54 +64,54 @@ _ExtendedAnyArrayLike: TypeAlias = AnyArrayLike | ArrayLike @overload def pivot_table( data: DataFrame, - values: _PivotTableValuesTypes = ..., - index: _PivotTableIndexTypes = ..., - columns: _PivotTableColumnsTypes = ..., + values: _PivotTableValuesTypes = None, + index: _PivotTableIndexTypes = None, + columns: _PivotTableColumnsTypes = None, aggfunc: ( _PivotAggFunc | Sequence[_PivotAggFunc] | Mapping[Hashable, _PivotAggFunc] ) = ..., - fill_value: Scalar | None = ..., - margins: bool = ..., - dropna: bool = ..., - margins_name: str = ..., - observed: bool = ..., - sort: bool = ..., + fill_value: Scalar | None = None, + margins: bool = False, + dropna: bool = True, + margins_name: Hashable = "all", + observed: bool = True, + sort: bool = True, ) -> DataFrame: ... # Can only use Index or ndarray when index or columns is a Grouper @overload def pivot_table( data: DataFrame, - values: _PivotTableValuesTypes = ..., + values: _PivotTableValuesTypes = None, *, index: Grouper, - columns: _PivotTableColumnsTypes | Index | npt.NDArray = ..., + columns: _PivotTableColumnsTypes | Index | npt.NDArray = None, aggfunc: ( _PivotAggFunc | Sequence[_PivotAggFunc] | Mapping[Hashable, _PivotAggFunc] - ) = ..., - fill_value: Scalar | None = ..., - margins: bool = ..., - dropna: bool = ..., - margins_name: str = ..., - observed: bool = ..., - sort: bool = ..., + ) = "mean", + fill_value: Scalar | None = None, + margins: bool = False, + dropna: bool = True, + margins_name: Hashable = "all", + observed: bool = True, + sort: bool = True, ) -> DataFrame: ... @overload def pivot_table( data: DataFrame, - values: _PivotTableValuesTypes = ..., - index: _PivotTableIndexTypes | Index | npt.NDArray = ..., + values: _PivotTableValuesTypes = None, + index: _PivotTableIndexTypes | Index | npt.NDArray = None, *, columns: Grouper, aggfunc: ( _PivotAggFunc | Sequence[_PivotAggFunc] | Mapping[Hashable, _PivotAggFunc] - ) = ..., - fill_value: Scalar | None = ..., - margins: bool = ..., - dropna: bool = ..., - margins_name: str = ..., - observed: bool = ..., - sort: bool = ..., + ) = "mean", + fill_value: Scalar | None = None, + margins: bool = False, + dropna: bool = True, + margins_name: Hashable = "all", + observed: bool = True, + sort: bool = True, ) -> DataFrame: ... def pivot( data: DataFrame, diff --git a/pandas-stubs/plotting/_core.pyi b/pandas-stubs/plotting/_core.pyi index 1b610d958..f90e8f098 100644 --- a/pandas-stubs/plotting/_core.pyi +++ b/pandas-stubs/plotting/_core.pyi @@ -27,6 +27,7 @@ from pandas._typing import ( HashableT1, HashableT2, HashableT3, + ListLikeHashable, npt, ) @@ -39,50 +40,87 @@ _SingleColor: TypeAlias = ( ) _PlotAccessorColor: TypeAlias = str | list[_SingleColor] | dict[HashableT, _SingleColor] +# Keep in sync with `DataFrame.boxplot` @overload def boxplot( data: DataFrame, - column: Hashable | list[HashableT1] | None = ..., - by: Hashable | list[HashableT2] | None = ..., - ax: Axes | None = ..., - fontsize: float | str | None = ..., - rot: float = ..., - grid: bool = ..., - figsize: tuple[float, float] | None = ..., - layout: tuple[int, int] | None = ..., - return_type: Literal["axes"] | None = ..., + column: Hashable | ListLikeHashable, + by: None = None, + ax: Axes | None = None, + fontsize: float | str | None = None, + rot: float = 0, + grid: bool = True, + figsize: tuple[float, float] | None = None, + layout: tuple[int, int] | None = None, + *, + return_type: Literal["axes"] | None = None, + backend: str | None = None, **kwargs: Any, ) -> Axes: ... @overload def boxplot( data: DataFrame, - column: Hashable | list[HashableT1] | None = ..., - by: Hashable | list[HashableT2] | None = ..., - ax: Axes | None = ..., - fontsize: float | str | None = ..., - rot: float = ..., - grid: bool = ..., - figsize: tuple[float, float] | None = ..., - layout: tuple[int, int] | None = ..., + column: Hashable | ListLikeHashable, + by: None = None, + ax: Axes | None = None, + fontsize: float | str | None = None, + rot: float = 0, + grid: bool = True, + figsize: tuple[float, float] | None = None, + layout: tuple[int, int] | None = None, *, return_type: Literal["dict"], + backend: str | None = None, **kwargs: Any, -) -> dict[str, list[Line2D]]: ... +) -> dict[str, Axes]: ... @overload def boxplot( data: DataFrame, - column: Hashable | list[HashableT1] | None = ..., - by: Hashable | list[HashableT2] | None = ..., - ax: Axes | None = ..., - fontsize: float | str | None = ..., - rot: float = ..., - grid: bool = ..., - figsize: tuple[float, float] | None = ..., - layout: tuple[int, int] | None = ..., + column: Hashable | ListLikeHashable, + by: None = None, + ax: Axes | None = None, + fontsize: float | str | None = None, + rot: float = 0, + grid: bool = True, + figsize: tuple[float, float] | None = None, + layout: tuple[int, int] | None = None, *, return_type: Literal["both"], + backend: str | None = None, **kwargs: Any, ) -> _BoxPlotT: ... +@overload +def boxplot( + data: DataFrame, + column: Hashable | ListLikeHashable, + by: Hashable | ListLikeHashable, + ax: Axes | None = None, + fontsize: float | str | None = None, + rot: float = 0, + grid: bool = True, + figsize: tuple[float, float] | None = None, + layout: tuple[int, int] | None = None, + *, + return_type: None = None, + backend: str | None = None, + **kwargs: Any, +) -> Axes: ... +@overload +def boxplot( + data: DataFrame, + column: Hashable | ListLikeHashable, + by: Hashable | ListLikeHashable, + ax: Axes | None = None, + fontsize: float | str | None = None, + rot: float = 0, + grid: bool = True, + figsize: tuple[float, float] | None = None, + layout: tuple[int, int] | None = None, + *, + return_type: Literal["axes", "dict", "both"], + backend: str | None = None, + **kwargs: Any, +) -> Series: ... class PlotAccessor: def __init__(self, data: Series | DataFrame) -> None: ... diff --git a/tests/test_plotting.py b/tests/test_plotting.py index 20a4ab153..b9309e2e0 100644 --- a/tests/test_plotting.py +++ b/tests/test_plotting.py @@ -1,6 +1,9 @@ import io import itertools -from typing import Any +from typing import ( + TYPE_CHECKING, + Any, +) from matplotlib.axes import Axes from matplotlib.figure import Figure @@ -23,6 +26,9 @@ register_matplotlib_converters, ) +if TYPE_CHECKING: + from pandas.plotting._core import _BoxPlotT # noqa: F401 + @pytest.fixture(autouse=True) def autouse_mpl_cleanup(mpl_cleanup: None) -> None: @@ -204,13 +210,118 @@ def test_bootstrap_plot(close_figures: None) -> None: check(assert_type(pd.plotting.bootstrap_plot(s), Figure), Figure) +def test_hist(close_figures: None) -> None: + df = pd.DataFrame({"Col1": [1, 2], "Col4": [2, 1]}) + check(assert_type(df.hist(), npt.NDArray[np.object_]), npt.NDArray[np.object_]) + check( + assert_type(df.hist(by="Col4"), npt.NDArray[np.object_]), + npt.NDArray[np.object_], + ) + + def test_boxplot(close_figures: None) -> None: np.random.seed(1234) df = pd.DataFrame(np.random.randn(10, 4), columns=["Col1", "Col2", "Col3", "Col4"]) + + # Basic usage + check(assert_type(pd.plotting.boxplot(df, column=["Col1"]), Axes), Axes) + check(assert_type(df.boxplot(column=["Col1"]), Axes), Axes) + + # Default return_type (axes) check( - assert_type(pd.plotting.boxplot(df, column=["Col1", "Col2", "Col3"]), Axes), + assert_type(pd.plotting.boxplot(df, column=["Col1"], return_type="axes"), Axes), Axes, ) + check( + assert_type(df.boxplot(column=["Col1"], return_type="axes"), Axes), + Axes, + ) + + # Return type: dict + check( + assert_type( + pd.plotting.boxplot(df, column=["Col1"], return_type="dict"), + dict[str, Axes], + ), + dict, + ) + check( + assert_type( + df.boxplot(column=["Col1"], return_type="dict"), + dict[str, Axes], + ), + dict, + ) + + # Return type: both + check( + assert_type( + pd.plotting.boxplot(df, column=["Col1"], return_type="both"), "_BoxPlotT" + ), + tuple, + ) + check( + assert_type(df.boxplot(column=["Col1"], return_type="both"), "_BoxPlotT"), + tuple, + ) + + # Basic by= usage + check( + assert_type(pd.plotting.boxplot(df, column=["Col1"], by="Col4"), Axes), + Axes, + ) + check( + assert_type(df.boxplot(column=["Col1"], by="Col4"), Axes), + Axes, + ) + + # Return type: axes with by= + check( + assert_type( + pd.plotting.boxplot(df, column=["Col1"], return_type="axes", by="Col4"), + Series, + ), + Series, + ) + check( + assert_type( + df.boxplot(column=["Col1"], return_type="axes", by="Col4"), + Series, + ), + Series, + ) + + # Return type: dict with by= + check( + assert_type( + pd.plotting.boxplot(df, column=["Col1"], return_type="dict", by="Col4"), + Series, + ), + Series, + ) + check( + assert_type( + df.boxplot(column=["Col1"], return_type="dict", by="Col4"), + Series, + ), + Series, + ) + + # Return type: both with by= + check( + assert_type( + pd.plotting.boxplot(df, column=["Col1"], return_type="both", by="Col4"), + Series, + ), + Series, + ) + check( + assert_type( + df.boxplot(column=["Col1"], return_type="both", by="Col4"), + Series, + ), + Series, + ) def test_reg_dereg(close_figures: None) -> None: From f6a00e9e3cc535c6131173b006109b10bacb535a Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Thu, 6 Nov 2025 15:55:43 +0000 Subject: [PATCH 2/2] add 'mean' default, correct 'All' default, specify --- pandas-stubs/core/reshape/pivot.pyi | 13 +++++++------ tests/test_plotting.py | 5 +---- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/pandas-stubs/core/reshape/pivot.pyi b/pandas-stubs/core/reshape/pivot.pyi index 85c1ebfb8..f328e1588 100644 --- a/pandas-stubs/core/reshape/pivot.pyi +++ b/pandas-stubs/core/reshape/pivot.pyi @@ -6,6 +6,7 @@ from collections.abc import ( ) import datetime from typing import ( + Any, Literal, TypeAlias, overload, @@ -69,11 +70,11 @@ def pivot_table( columns: _PivotTableColumnsTypes = None, aggfunc: ( _PivotAggFunc | Sequence[_PivotAggFunc] | Mapping[Hashable, _PivotAggFunc] - ) = ..., + ) = "mean", fill_value: Scalar | None = None, margins: bool = False, dropna: bool = True, - margins_name: Hashable = "all", + margins_name: Hashable = "All", observed: bool = True, sort: bool = True, ) -> DataFrame: ... @@ -85,14 +86,14 @@ def pivot_table( values: _PivotTableValuesTypes = None, *, index: Grouper, - columns: _PivotTableColumnsTypes | Index | npt.NDArray = None, + columns: _PivotTableColumnsTypes | npt.NDArray[Any] | Index = None, aggfunc: ( _PivotAggFunc | Sequence[_PivotAggFunc] | Mapping[Hashable, _PivotAggFunc] ) = "mean", fill_value: Scalar | None = None, margins: bool = False, dropna: bool = True, - margins_name: Hashable = "all", + margins_name: Hashable = "All", observed: bool = True, sort: bool = True, ) -> DataFrame: ... @@ -100,7 +101,7 @@ def pivot_table( def pivot_table( data: DataFrame, values: _PivotTableValuesTypes = None, - index: _PivotTableIndexTypes | Index | npt.NDArray = None, + index: _PivotTableIndexTypes | npt.NDArray[Any] | Index = None, *, columns: Grouper, aggfunc: ( @@ -109,7 +110,7 @@ def pivot_table( fill_value: Scalar | None = None, margins: bool = False, dropna: bool = True, - margins_name: Hashable = "all", + margins_name: Hashable = "All", observed: bool = True, sort: bool = True, ) -> DataFrame: ... diff --git a/tests/test_plotting.py b/tests/test_plotting.py index b9309e2e0..8565f4753 100644 --- a/tests/test_plotting.py +++ b/tests/test_plotting.py @@ -316,10 +316,7 @@ def test_boxplot(close_figures: None) -> None: Series, ) check( - assert_type( - df.boxplot(column=["Col1"], return_type="both", by="Col4"), - Series, - ), + assert_type(df.boxplot(column=["Col1"], return_type="both", by="Col4"), Series), Series, )