diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 65982ecdb810c..8f96681abf2f4 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -232,7 +232,7 @@ Other enhancements - Support reading Stata 102-format (Stata 1) dta files (:issue:`58978`) - Support reading Stata 110-format (Stata 7) dta files (:issue:`47176`) - Switched wheel upload to **PyPI Trusted Publishing** (OIDC) for release-tag pushes in ``wheels.yml``. (:issue:`61718`) -- +- :func:`qcut` now accepts ``right`` as optional arguments, as in :meth:`cut` (:issue:`62938`) .. --------------------------------------------------------------------------- .. _whatsnew_300.notable_bug_fixes: diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index b13da83084e5c..fc3870f2e235f 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -294,6 +294,7 @@ def qcut( x, q, labels=None, + right: bool = True, retbins: bool = False, precision: int = 3, duplicates: str = "raise", @@ -316,6 +317,11 @@ def qcut( Used as labels for the resulting bins. Must be of the same length as the resulting bins. If False, return only integer indicators of the bins. If True, raises an error. + right : bool, default True + Indicates whether `bins` includes the rightmost edge or not. If + ``right == True`` (the default), then the `bins` ``[1, 2, 3, 4]`` + indicate (1,2], (2,3], (3,4]. This argument is ignored when + `bins` is an IntervalIndex. retbins : bool, optional Whether to return the (bins, labels) or not. Can be useful if bins is given as a scalar. @@ -378,6 +384,7 @@ def qcut( x_idx, Index(bins), labels=labels, + right=right, precision=precision, include_lowest=True, duplicates=duplicates, diff --git a/pandas/tests/reshape/test_qcut.py b/pandas/tests/reshape/test_qcut.py index b6d45aeab8a7b..368d71f784e28 100644 --- a/pandas/tests/reshape/test_qcut.py +++ b/pandas/tests/reshape/test_qcut.py @@ -44,6 +44,40 @@ def test_qcut(): tm.assert_categorical_equal(labels, ex_levels) +def test_qcut_right(): + arr = np.random.default_rng(2).standard_normal(1000) + + # We store the bins as Index that have been + # rounded to comparisons are a bit tricky. + labels, _ = qcut(arr, 4, retbins=True, right=True) + ex_bins = np.quantile(arr, [0, 0.25, 0.5, 0.75, 1.0]) + + result = labels.categories.left.values + assert np.allclose(result, ex_bins[:-1], atol=1e-2) + + result = labels.categories.right.values + assert np.allclose(result, ex_bins[1:], atol=1e-2) + + ex_levels = cut(arr, ex_bins, include_lowest=True, right=True) + tm.assert_categorical_equal(labels, ex_levels) + + +def test_qcut_no_right(): + arr = np.random.default_rng(2).standard_normal(1000) + + labels, _ = qcut(arr, 4, retbins=True, right=False) + ex_bins = np.quantile(arr, [0, 0.25, 0.5, 0.75, 1.0]) + + lefts = labels.categories.left.values + assert np.allclose(lefts, ex_bins[:-1], atol=1e-2) + + rights = labels.categories.right.values + assert np.allclose(rights, ex_bins[1:], atol=1e-2) + + ex_levels = cut(arr, ex_bins, include_lowest=True, right=False) + tm.assert_categorical_equal(labels, ex_levels) + + def test_qcut_bounds(): arr = np.random.default_rng(2).standard_normal(1000)