From 05f34910efa432f8580b5dcd687020b8f6033440 Mon Sep 17 00:00:00 2001 From: Sanjana Moudgalya <36978563+sanjanam1998@users.noreply.github.com> Date: Wed, 1 Oct 2025 14:46:06 +0000 Subject: [PATCH 1/9] API/BUG: freq retention in value_counts --- pandas/core/algorithms.py | 30 +++++ pandas/tests/base/test_value_counts.py | 150 +++++++++++++++++++++++++ 2 files changed, 180 insertions(+) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index bbca78459ca75..57f24abac0acd 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -937,6 +937,36 @@ def value_counts_internal( if normalize: result = result / counts.sum() + # freq patching for DatetimeIndex, TimedeltaIndex + try: + from pandas import ( + DatetimeIndex, + TimedeltaIndex, + ) + + if ( + bins is None + and not sort + and isinstance(values, (DatetimeIndex, TimedeltaIndex)) + and values.freq is not None + and isinstance(result.index, (DatetimeIndex, TimedeltaIndex)) + and len(result.index) == len(values) + and result.index.equals(values) + ): + base_freq = values.freq + # Rebuild the index with the original freq; name preserved. + if isinstance(result.index, DatetimeIndex): + result.index = DatetimeIndex( + result.index._data, freq=base_freq, name=result.index.name + ) + else: # TimedeltaIndex + result.index = TimedeltaIndex( + result.index._data, freq=base_freq, name=result.index.name + ) + except Exception: + # If freq patching fails, does not affect value_counts + pass + return result diff --git a/pandas/tests/base/test_value_counts.py b/pandas/tests/base/test_value_counts.py index bcb31829a201f..5e102c15f04f9 100644 --- a/pandas/tests/base/test_value_counts.py +++ b/pandas/tests/base/test_value_counts.py @@ -339,3 +339,153 @@ def test_value_counts_object_inference_deprecated(): exp = dti.value_counts() exp.index = exp.index.astype(object) tm.assert_series_equal(res, exp) + + +def _vc_make_index(kind: str, periods=5, freq="D"): + if kind == "dt": + return pd.date_range("2016-01-01", periods=periods, freq=freq) + if kind == "td": + return pd.timedelta_range(Timedelta(0), periods=periods, freq=freq) + raise ValueError("kind must be 'dt' or 'td'") + + +@pytest.mark.parametrize( + "kind,freq,normalize", + [ + ("dt", "D", False), + ("dt", "D", True), + ("td", "D", False), + ("td", "D", True), + ("td", Timedelta(hours=1), False), + ("td", Timedelta(hours=1), True), + ], +) +def test_value_counts_freq_preserved_datetimelike_no_sort(kind, freq, normalize): + idx = _vc_make_index(kind, periods=5, freq=freq) + vc = idx.value_counts(sort=False, normalize=normalize) + assert vc.index.freq == idx.freq + if normalize: + assert np.isclose(vc.values, 1 / len(idx)).all() + + +@pytest.mark.parametrize( + "kind,freq", + [ + ("dt", "D"), + ("td", "D"), + ("td", Timedelta(hours=1)), + ], +) +def test_value_counts_freq_drops_datetimelike_when_sorted(kind, freq): + idx = _vc_make_index(kind, periods=5, freq=freq) + vc = idx.value_counts() # default sort=True (reorders) + assert vc.index.freq is None + + +@pytest.mark.parametrize( + "kind,freq", + [ + ("dt", "D"), + ("td", "D"), + ("td", Timedelta(hours=1)), + ], +) +def test_value_counts_freq_drops_datetimelike_with_duplicates(kind, freq): + base = _vc_make_index(kind, periods=5, freq=freq) + obj = base.insert(1, base[1]) # duplicate one label + vc = obj.value_counts(sort=False) + assert vc.index.freq is None + + +@pytest.mark.parametrize( + "kind,freq", + [ + ("dt", "D"), + ("td", "D"), + ("td", Timedelta(hours=1)), + ], +) +def test_value_counts_freq_drops_datetimelike_with_gap(kind, freq): + base = _vc_make_index(kind, periods=5, freq=freq) + obj = base.delete(2) # remove one step to break contiguity + vc = obj.value_counts(sort=False) + assert vc.index.freq is None + + +@pytest.mark.parametrize( + "kind,freq,dropna,expect_hasnans", + [ + ("dt", "D", False, True), # keep NaT + ("dt", "D", True, False), # drop NaT + ("td", "D", False, True), + ("td", "D", True, False), + ("td", Timedelta(hours=1), False, True), + ("td", Timedelta(hours=1), True, False), + ], +) +def test_value_counts_freq_drops_datetimelike_with_nat( + kind, freq, dropna, expect_hasnans +): + base = _vc_make_index(kind, periods=3, freq=freq) + obj = base.insert(1, pd.NaT) + vc = obj.value_counts(dropna=dropna, sort=False) + assert vc.index.freq is None + assert vc.index.hasnans is expect_hasnans + + +@pytest.mark.parametrize( + "freq,start,periods,sort", + [ + ("D", "2016-01-01", 5, False), + ("D", "2016-01-01", 5, True), + ("M", "2016-01", 6, False), # MonthEnd + ("M", "2016-01", 6, True), + ("Q-DEC", "2016Q1", 4, False), # QuarterEnd (Dec anchored) + ("Q-DEC", "2016Q1", 4, True), + ("Y-DEC", "2014", 3, False), # YearEnd (Dec anchored) + ("Y-DEC", "2014", 3, True), + ], +) +def test_value_counts_period_freq_preserved_sort_and_nosort(freq, start, periods, sort): + pi = pd.period_range(start=start, periods=periods, freq=freq) + vc = pi.value_counts(sort=sort) + assert isinstance(vc.index, pd.PeriodIndex) + assert vc.index.dtype == pi.dtype + assert vc.index.freq == pi.freq + + +def test_value_counts_period_freq_preserved_with_duplicates(): + pi = pd.period_range("2016-01", periods=5, freq="M") + obj = pi.insert(1, pi[1]) # duplicate one label + vc = obj.value_counts(sort=False) + assert isinstance(vc.index, pd.PeriodIndex) + assert vc.index.dtype == pi.dtype + assert vc.index.freq == pi.freq + + +def test_value_counts_period_freq_preserved_with_gap(): + pi = pd.period_range("2016-01", periods=5, freq="M") + obj = pi.delete(2) # remove one element + vc = obj.value_counts(sort=False) + assert isinstance(vc.index, pd.PeriodIndex) + assert vc.index.dtype == pi.dtype + assert vc.index.freq == pi.freq + + +def test_value_counts_period_freq_preserved_with_normalize(): + pi = pd.period_range("2016-01", periods=4, freq="M") + vc = pi.value_counts(normalize=True, sort=False) + assert isinstance(vc.index, pd.PeriodIndex) + assert vc.index.dtype == pi.dtype + assert vc.index.freq == pi.freq + assert np.isclose(vc.values, 1 / len(pi)).all() + + +def test_value_counts_period_freq_preserved_with_nat_dropna_true(): + pi = pd.period_range("2016-01", periods=5, freq="M") + obj = pi.insert(1, pd.NaT) + vc = obj.value_counts(dropna=True, sort=False) + assert not vc.index.hasnans + assert isinstance(vc.index, pd.PeriodIndex) + assert vc.index.dtype == pi.dtype + assert vc.index.freq == pi.freq From 163c0f34e0bfcc5d71d9fe7a490fadd2ffb1d88e Mon Sep 17 00:00:00 2001 From: Sanjana Moudgalya <36978563+sanjanam1998@users.noreply.github.com> Date: Wed, 1 Oct 2025 14:57:19 +0000 Subject: [PATCH 2/9] adding whats new --- doc/source/whatsnew/v3.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 91ce855f03b08..dcb8aa7e8ecff 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -968,6 +968,7 @@ Datetimelike - Bug in comparison between objects with pyarrow date dtype and ``timestamp[pyarrow]`` or ``np.datetime64`` dtype failing to consider these as non-comparable (:issue:`62157`) - Bug in constructing arrays with :class:`ArrowDtype` with ``timestamp`` type incorrectly allowing ``Decimal("NaN")`` (:issue:`61773`) - Bug in constructing arrays with a timezone-aware :class:`ArrowDtype` from timezone-naive datetime objects incorrectly treating those as UTC times instead of wall times like :class:`DatetimeTZDtype` (:issue:`61775`) +- Bug in retaining frequency in :meth:`value_counts` specifically for :meth:`DatetimeIndex` and :meth:`TimedeltaIndex` (:issue:`33830`) - Bug in setting scalar values with mismatched resolution into arrays with non-nanosecond ``datetime64``, ``timedelta64`` or :class:`DatetimeTZDtype` incorrectly truncating those scalars (:issue:`56410`) Timedelta From 74ad212e80a50918ced82a727cf4cb98d6fd8f48 Mon Sep 17 00:00:00 2001 From: Sanjana Moudgalya <36978563+sanjanam1998@users.noreply.github.com> Date: Wed, 15 Oct 2025 18:02:29 +0000 Subject: [PATCH 3/9] preserving freq without patching --- pandas/core/algorithms.py | 59 +++++++++++++++++++-------------------- 1 file changed, 29 insertions(+), 30 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 57f24abac0acd..f709ec3535deb 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -867,6 +867,26 @@ def value_counts_internal( Series, ) + def _preserve_freq(original_values, result_index): + freq = getattr(original_values, "freq", None) + + if ( + freq is not None + and type(original_values) is type(result_index) + and len(result_index) == len(original_values) + and result_index.equals(original_values) + ): + try: + # Rebuild index with freq using the same constructor + return type(result_index)( + result_index._data, freq=freq, name=result_index.name + ) + except (TypeError, ValueError): + # If reconstruction fails, return original index + pass + + return result_index + index_name = getattr(values, "name", None) name = "proportion" if normalize else "count" @@ -929,6 +949,15 @@ def value_counts_internal( # Starting in 3.0, we no longer perform dtype inference on the # Index object we construct here, xref GH#56161 idx = Index(keys, dtype=keys.dtype, name=index_name) + + if ( + bins is None + and not sort + and hasattr(values, "freq") + and values.freq is not None + ): + idx = _preserve_freq(values, idx) + result = Series(counts, index=idx, name=name, copy=False) if sort: @@ -937,36 +966,6 @@ def value_counts_internal( if normalize: result = result / counts.sum() - # freq patching for DatetimeIndex, TimedeltaIndex - try: - from pandas import ( - DatetimeIndex, - TimedeltaIndex, - ) - - if ( - bins is None - and not sort - and isinstance(values, (DatetimeIndex, TimedeltaIndex)) - and values.freq is not None - and isinstance(result.index, (DatetimeIndex, TimedeltaIndex)) - and len(result.index) == len(values) - and result.index.equals(values) - ): - base_freq = values.freq - # Rebuild the index with the original freq; name preserved. - if isinstance(result.index, DatetimeIndex): - result.index = DatetimeIndex( - result.index._data, freq=base_freq, name=result.index.name - ) - else: # TimedeltaIndex - result.index = TimedeltaIndex( - result.index._data, freq=base_freq, name=result.index.name - ) - except Exception: - # If freq patching fails, does not affect value_counts - pass - return result From d8acdffac35053083c89489a9d4411c41d83d7ca Mon Sep 17 00:00:00 2001 From: Sanjana Moudgalya <36978563+sanjanam1998@users.noreply.github.com> Date: Sat, 18 Oct 2025 02:34:00 +0000 Subject: [PATCH 4/9] git fix --- pandas/core/algorithms.py | 44 +++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 23 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index f709ec3535deb..5f7a6ed2a82a4 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -863,30 +863,12 @@ def value_counts_internal( dropna: bool = True, ) -> Series: from pandas import ( + DatetimeIndex, Index, Series, + TimedeltaIndex, ) - def _preserve_freq(original_values, result_index): - freq = getattr(original_values, "freq", None) - - if ( - freq is not None - and type(original_values) is type(result_index) - and len(result_index) == len(original_values) - and result_index.equals(original_values) - ): - try: - # Rebuild index with freq using the same constructor - return type(result_index)( - result_index._data, freq=freq, name=result_index.name - ) - except (TypeError, ValueError): - # If reconstruction fails, return original index - pass - - return result_index - index_name = getattr(values, "name", None) name = "proportion" if normalize else "count" @@ -953,10 +935,26 @@ def _preserve_freq(original_values, result_index): if ( bins is None and not sort - and hasattr(values, "freq") - and values.freq is not None + and isinstance(values, (DatetimeIndex, TimedeltaIndex)) + and values.inferred_freq is not None ): - idx = _preserve_freq(values, idx) + # freq preservation + # Check if the result would be the same as input + if len(idx) == len(values) and idx.equals(values): + # Rebuild idx with the correct type and inferred frequency + if isinstance(values, DatetimeIndex): + idx = DatetimeIndex( + idx._data if hasattr(idx, "_data") else idx.values, + freq=values.inferred_freq, + name=idx.name, + ) + + elif isinstance(values, TimedeltaIndex): + idx = TimedeltaIndex( + idx._data if hasattr(idx, "_data") else idx.values, + freq=values.inferred_freq, + name=idx.name, + ) result = Series(counts, index=idx, name=name, copy=False) From b643a7c106fa78066215d44b03245a25dd25711a Mon Sep 17 00:00:00 2001 From: Sanjana Moudgalya <36978563+sanjanam1998@users.noreply.github.com> Date: Wed, 22 Oct 2025 15:40:48 +0000 Subject: [PATCH 5/9] test changes --- pandas/core/algorithms.py | 20 +-- pandas/tests/base/test_value_counts.py | 200 ++++++++----------------- 2 files changed, 68 insertions(+), 152 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 5f7a6ed2a82a4..f7bc11749957e 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -937,24 +937,12 @@ def value_counts_internal( and not sort and isinstance(values, (DatetimeIndex, TimedeltaIndex)) and values.inferred_freq is not None + and len(idx) == len(values) + and idx.equals(values) ): # freq preservation - # Check if the result would be the same as input - if len(idx) == len(values) and idx.equals(values): - # Rebuild idx with the correct type and inferred frequency - if isinstance(values, DatetimeIndex): - idx = DatetimeIndex( - idx._data if hasattr(idx, "_data") else idx.values, - freq=values.inferred_freq, - name=idx.name, - ) - - elif isinstance(values, TimedeltaIndex): - idx = TimedeltaIndex( - idx._data if hasattr(idx, "_data") else idx.values, - freq=values.inferred_freq, - name=idx.name, - ) + # Rebuild idx with the correct type and inferred frequency + idx.freq = values.inferred_freq result = Series(counts, index=idx, name=name, copy=False) diff --git a/pandas/tests/base/test_value_counts.py b/pandas/tests/base/test_value_counts.py index 5e102c15f04f9..42e43990134bc 100644 --- a/pandas/tests/base/test_value_counts.py +++ b/pandas/tests/base/test_value_counts.py @@ -341,151 +341,79 @@ def test_value_counts_object_inference_deprecated(): tm.assert_series_equal(res, exp) -def _vc_make_index(kind: str, periods=5, freq="D"): - if kind == "dt": - return pd.date_range("2016-01-01", periods=periods, freq=freq) - if kind == "td": - return pd.timedelta_range(Timedelta(0), periods=periods, freq=freq) - raise ValueError("kind must be 'dt' or 'td'") - - -@pytest.mark.parametrize( - "kind,freq,normalize", - [ - ("dt", "D", False), - ("dt", "D", True), - ("td", "D", False), - ("td", "D", True), - ("td", Timedelta(hours=1), False), - ("td", Timedelta(hours=1), True), - ], -) -def test_value_counts_freq_preserved_datetimelike_no_sort(kind, freq, normalize): - idx = _vc_make_index(kind, periods=5, freq=freq) - vc = idx.value_counts(sort=False, normalize=normalize) - assert vc.index.freq == idx.freq - if normalize: - assert np.isclose(vc.values, 1 / len(idx)).all() - - @pytest.mark.parametrize( - "kind,freq", + "index", [ - ("dt", "D"), - ("td", "D"), - ("td", Timedelta(hours=1)), + pd.date_range("2016-01-01", periods=5, freq="D"), + pd.timedelta_range(Timedelta(0), periods=5, freq="h"), ], + ids=["DatetimeIndex[D]", "TimedeltaIndex[h]"], ) -def test_value_counts_freq_drops_datetimelike_when_sorted(kind, freq): - idx = _vc_make_index(kind, periods=5, freq=freq) - vc = idx.value_counts() # default sort=True (reorders) - assert vc.index.freq is None - - @pytest.mark.parametrize( - "kind,freq", + "build,kwargs,exp_preserve,exp_hasnans,exp_index_fn", [ - ("dt", "D"), - ("td", "D"), - ("td", Timedelta(hours=1)), + (lambda idx: idx, {"sort": False}, True, False, lambda idx, obj: idx), + ( + lambda idx: idx, + {"sort": False, "normalize": True}, + True, + False, + lambda idx, obj: idx, + ), + (lambda idx: idx, {}, False, False, None), + ( + lambda idx: idx.insert(1, idx[1]), + {"sort": False}, + False, + False, + lambda idx, obj: type(idx)(idx, freq=None), + ), + ( + lambda idx: idx.delete(2), + {"sort": False}, + False, + False, + lambda idx, obj: type(idx)(obj, freq=None), + ), + ( + lambda idx: idx.insert(1, pd.NaT), + {"sort": False, "dropna": False}, + False, + True, + lambda idx, obj: type(idx)( + list(idx[:1]) + [pd.NaT] + list(idx[1:]), freq=None + ), + ), + ( + lambda idx: idx.insert(1, pd.NaT), + {"sort": False, "dropna": True}, + False, + False, + lambda idx, obj: type(idx)(idx, freq=None), + ), ], ) -def test_value_counts_freq_drops_datetimelike_with_duplicates(kind, freq): - base = _vc_make_index(kind, periods=5, freq=freq) - obj = base.insert(1, base[1]) # duplicate one label - vc = obj.value_counts(sort=False) - assert vc.index.freq is None - - -@pytest.mark.parametrize( - "kind,freq", - [ - ("dt", "D"), - ("td", "D"), - ("td", Timedelta(hours=1)), - ], -) -def test_value_counts_freq_drops_datetimelike_with_gap(kind, freq): - base = _vc_make_index(kind, periods=5, freq=freq) - obj = base.delete(2) # remove one step to break contiguity - vc = obj.value_counts(sort=False) - assert vc.index.freq is None +def test_value_counts_freq_datetimelike( + index, build, kwargs, exp_preserve, exp_hasnans, exp_index_fn +): + obj = build(index) + vc = obj.value_counts(**kwargs) + # without sort + if exp_index_fn is not None: + expected_idx = exp_index_fn(index, obj) + tm.assert_index_equal(vc.index, expected_idx) -@pytest.mark.parametrize( - "kind,freq,dropna,expect_hasnans", - [ - ("dt", "D", False, True), # keep NaT - ("dt", "D", True, False), # drop NaT - ("td", "D", False, True), - ("td", "D", True, False), - ("td", Timedelta(hours=1), False, True), - ("td", Timedelta(hours=1), True, False), - ], -) -def test_value_counts_freq_drops_datetimelike_with_nat( - kind, freq, dropna, expect_hasnans -): - base = _vc_make_index(kind, periods=3, freq=freq) - obj = base.insert(1, pd.NaT) - vc = obj.value_counts(dropna=dropna, sort=False) - assert vc.index.freq is None - assert vc.index.hasnans is expect_hasnans + # freq preservation / drop + if exp_preserve: + assert vc.index.freq == index.freq + else: + assert vc.index.freq is None + # NaT presence + assert vc.index.hasnans is exp_hasnans -@pytest.mark.parametrize( - "freq,start,periods,sort", - [ - ("D", "2016-01-01", 5, False), - ("D", "2016-01-01", 5, True), - ("M", "2016-01", 6, False), # MonthEnd - ("M", "2016-01", 6, True), - ("Q-DEC", "2016Q1", 4, False), # QuarterEnd (Dec anchored) - ("Q-DEC", "2016Q1", 4, True), - ("Y-DEC", "2014", 3, False), # YearEnd (Dec anchored) - ("Y-DEC", "2014", 3, True), - ], -) -def test_value_counts_period_freq_preserved_sort_and_nosort(freq, start, periods, sort): - pi = pd.period_range(start=start, periods=periods, freq=freq) - vc = pi.value_counts(sort=sort) - assert isinstance(vc.index, pd.PeriodIndex) - assert vc.index.dtype == pi.dtype - assert vc.index.freq == pi.freq - - -def test_value_counts_period_freq_preserved_with_duplicates(): - pi = pd.period_range("2016-01", periods=5, freq="M") - obj = pi.insert(1, pi[1]) # duplicate one label - vc = obj.value_counts(sort=False) - assert isinstance(vc.index, pd.PeriodIndex) - assert vc.index.dtype == pi.dtype - assert vc.index.freq == pi.freq - - -def test_value_counts_period_freq_preserved_with_gap(): - pi = pd.period_range("2016-01", periods=5, freq="M") - obj = pi.delete(2) # remove one element - vc = obj.value_counts(sort=False) - assert isinstance(vc.index, pd.PeriodIndex) - assert vc.index.dtype == pi.dtype - assert vc.index.freq == pi.freq - - -def test_value_counts_period_freq_preserved_with_normalize(): - pi = pd.period_range("2016-01", periods=4, freq="M") - vc = pi.value_counts(normalize=True, sort=False) - assert isinstance(vc.index, pd.PeriodIndex) - assert vc.index.dtype == pi.dtype - assert vc.index.freq == pi.freq - assert np.isclose(vc.values, 1 / len(pi)).all() - - -def test_value_counts_period_freq_preserved_with_nat_dropna_true(): - pi = pd.period_range("2016-01", periods=5, freq="M") - obj = pi.insert(1, pd.NaT) - vc = obj.value_counts(dropna=True, sort=False) - assert not vc.index.hasnans - assert isinstance(vc.index, pd.PeriodIndex) - assert vc.index.dtype == pi.dtype - assert vc.index.freq == pi.freq + # without normalize + if kwargs.get("normalize", False): + expected_val = 1.0 / len(index) + assert np.isclose(vc.to_numpy(), expected_val).all() From 7198d973d80dad85f6fd625d758a7ed62766f321 Mon Sep 17 00:00:00 2001 From: Sanjana Moudgalya <36978563+sanjanam1998@users.noreply.github.com> Date: Thu, 23 Oct 2025 12:32:04 -0400 Subject: [PATCH 6/9] Update pandas/core/algorithms.py Co-authored-by: William Ayd --- pandas/core/algorithms.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 886c3bf0deecd..bb4cf5de15d73 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -947,7 +947,7 @@ def value_counts_internal( ): # freq preservation # Rebuild idx with the correct type and inferred frequency - idx.freq = values.inferred_freq + idx.freq = values.inferred_freq # type: ignore[attr-defined] result = Series(counts, index=idx, name=name, copy=False) From 85630e8dfa2932e0ba67e792f6199ee6e49a6ab1 Mon Sep 17 00:00:00 2001 From: Sanjana Moudgalya <36978563+sanjanam1998@users.noreply.github.com> Date: Tue, 28 Oct 2025 16:41:12 +0000 Subject: [PATCH 7/9] test optimize --- pandas/core/algorithms.py | 6 +- pandas/tests/base/test_value_counts.py | 117 +++++++++++-------------- 2 files changed, 53 insertions(+), 70 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index bb4cf5de15d73..b977e998b82a4 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -941,12 +941,10 @@ def value_counts_internal( bins is None and not sort and isinstance(values, (DatetimeIndex, TimedeltaIndex)) - and values.inferred_freq is not None - and len(idx) == len(values) and idx.equals(values) + and values.inferred_freq is not None ): - # freq preservation - # Rebuild idx with the correct type and inferred frequency + # Preserve freq of original index idx.freq = values.inferred_freq # type: ignore[attr-defined] result = Series(counts, index=idx, name=name, copy=False) diff --git a/pandas/tests/base/test_value_counts.py b/pandas/tests/base/test_value_counts.py index 42e43990134bc..26f9919e95434 100644 --- a/pandas/tests/base/test_value_counts.py +++ b/pandas/tests/base/test_value_counts.py @@ -342,78 +342,63 @@ def test_value_counts_object_inference_deprecated(): @pytest.mark.parametrize( - "index", + ("index", "expected_index"), [ - pd.date_range("2016-01-01", periods=5, freq="D"), - pd.timedelta_range(Timedelta(0), periods=5, freq="h"), - ], - ids=["DatetimeIndex[D]", "TimedeltaIndex[h]"], -) -@pytest.mark.parametrize( - "build,kwargs,exp_preserve,exp_hasnans,exp_index_fn", - [ - (lambda idx: idx, {"sort": False}, True, False, lambda idx, obj: idx), - ( - lambda idx: idx, - {"sort": False, "normalize": True}, - True, - False, - lambda idx, obj: idx, + pytest.param( + pd.date_range("2016-01-01", periods=5, freq="D"), + pd.date_range("2016-01-01", periods=5, freq="D"), + ), + pytest.param( + pd.timedelta_range(Timedelta(0), periods=5, freq="h"), + pd.timedelta_range(Timedelta(0), periods=5, freq="h"), + ), + pytest.param( + pd.date_range("2016-01-01", periods=5, freq="D").insert( + 1, pd.date_range("2016-01-01", periods=5, freq="D")[1] + ), + DatetimeIndex(pd.date_range("2016-01-01", periods=5, freq="D"), freq=None), + ), + pytest.param( + pd.timedelta_range(Timedelta(0), periods=5, freq="h").insert( + 1, pd.timedelta_range(Timedelta(0), periods=5, freq="h")[1] + ), + TimedeltaIndex( + pd.timedelta_range(Timedelta(0), periods=5, freq="h"), freq=None + ), ), - (lambda idx: idx, {}, False, False, None), - ( - lambda idx: idx.insert(1, idx[1]), - {"sort": False}, - False, - False, - lambda idx, obj: type(idx)(idx, freq=None), + pytest.param( + pd.date_range("2016-01-01", periods=5, freq="D").delete(2), + DatetimeIndex( + pd.date_range("2016-01-01", periods=5, freq="D").delete(2), freq=None + ), ), - ( - lambda idx: idx.delete(2), - {"sort": False}, - False, - False, - lambda idx, obj: type(idx)(obj, freq=None), + pytest.param( + pd.timedelta_range(Timedelta(0), periods=5, freq="h").delete(2), + TimedeltaIndex( + pd.timedelta_range(Timedelta(0), periods=5, freq="h").delete(2), + freq=None, + ), ), - ( - lambda idx: idx.insert(1, pd.NaT), - {"sort": False, "dropna": False}, - False, - True, - lambda idx, obj: type(idx)( - list(idx[:1]) + [pd.NaT] + list(idx[1:]), freq=None + pytest.param( + pd.date_range("2016-01-01", periods=5, freq="D").insert(1, pd.NaT), + DatetimeIndex( + list(pd.date_range("2016-01-01", periods=5, freq="D")[:1]) + + [pd.NaT] + + list(pd.date_range("2016-01-01", periods=5, freq="D")[1:]), + freq=None, ), ), - ( - lambda idx: idx.insert(1, pd.NaT), - {"sort": False, "dropna": True}, - False, - False, - lambda idx, obj: type(idx)(idx, freq=None), + pytest.param( + pd.timedelta_range(Timedelta(0), periods=5, freq="h").insert(1, pd.NaT), + TimedeltaIndex( + list(pd.timedelta_range(Timedelta(0), periods=5, freq="h")[:1]) + + [pd.NaT] + + list(pd.timedelta_range(Timedelta(0), periods=5, freq="h")[1:]), + freq=None, + ), ), ], ) -def test_value_counts_freq_datetimelike( - index, build, kwargs, exp_preserve, exp_hasnans, exp_index_fn -): - obj = build(index) - vc = obj.value_counts(**kwargs) - - # without sort - if exp_index_fn is not None: - expected_idx = exp_index_fn(index, obj) - tm.assert_index_equal(vc.index, expected_idx) - - # freq preservation / drop - if exp_preserve: - assert vc.index.freq == index.freq - else: - assert vc.index.freq is None - - # NaT presence - assert vc.index.hasnans is exp_hasnans - - # without normalize - if kwargs.get("normalize", False): - expected_val = 1.0 / len(index) - assert np.isclose(vc.to_numpy(), expected_val).all() +def test_value_counts_index_datetimelike(index, expected_index): + vc = index.value_counts(sort=False, dropna=False) + tm.assert_index_equal(vc.index, expected_index) From ffad5038fd015daf70123fe6595b3be858a3d0d1 Mon Sep 17 00:00:00 2001 From: Sanjana Moudgalya <36978563+sanjanam1998@users.noreply.github.com> Date: Wed, 29 Oct 2025 13:52:13 +0000 Subject: [PATCH 8/9] none typing --- pandas/tests/base/test_value_counts.py | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/pandas/tests/base/test_value_counts.py b/pandas/tests/base/test_value_counts.py index 26f9919e95434..672b2b744bbe0 100644 --- a/pandas/tests/base/test_value_counts.py +++ b/pandas/tests/base/test_value_counts.py @@ -356,27 +356,22 @@ def test_value_counts_object_inference_deprecated(): pd.date_range("2016-01-01", periods=5, freq="D").insert( 1, pd.date_range("2016-01-01", periods=5, freq="D")[1] ), - DatetimeIndex(pd.date_range("2016-01-01", periods=5, freq="D"), freq=None), + DatetimeIndex(pd.date_range("2016-01-01", periods=5, freq="D")), ), pytest.param( pd.timedelta_range(Timedelta(0), periods=5, freq="h").insert( 1, pd.timedelta_range(Timedelta(0), periods=5, freq="h")[1] ), - TimedeltaIndex( - pd.timedelta_range(Timedelta(0), periods=5, freq="h"), freq=None - ), + TimedeltaIndex(pd.timedelta_range(Timedelta(0), periods=5, freq="h")), ), pytest.param( pd.date_range("2016-01-01", periods=5, freq="D").delete(2), - DatetimeIndex( - pd.date_range("2016-01-01", periods=5, freq="D").delete(2), freq=None - ), + DatetimeIndex(pd.date_range("2016-01-01", periods=5, freq="D").delete(2)), ), pytest.param( pd.timedelta_range(Timedelta(0), periods=5, freq="h").delete(2), TimedeltaIndex( - pd.timedelta_range(Timedelta(0), periods=5, freq="h").delete(2), - freq=None, + pd.timedelta_range(Timedelta(0), periods=5, freq="h").delete(2) ), ), pytest.param( @@ -384,8 +379,7 @@ def test_value_counts_object_inference_deprecated(): DatetimeIndex( list(pd.date_range("2016-01-01", periods=5, freq="D")[:1]) + [pd.NaT] - + list(pd.date_range("2016-01-01", periods=5, freq="D")[1:]), - freq=None, + + list(pd.date_range("2016-01-01", periods=5, freq="D")[1:]) ), ), pytest.param( @@ -393,8 +387,7 @@ def test_value_counts_object_inference_deprecated(): TimedeltaIndex( list(pd.timedelta_range(Timedelta(0), periods=5, freq="h")[:1]) + [pd.NaT] - + list(pd.timedelta_range(Timedelta(0), periods=5, freq="h")[1:]), - freq=None, + + list(pd.timedelta_range(Timedelta(0), periods=5, freq="h")[1:]) ), ), ], From 7b573c2723f29f9c651c20cd82e017017b660459 Mon Sep 17 00:00:00 2001 From: Sanjana Moudgalya <36978563+sanjanam1998@users.noreply.github.com> Date: Thu, 30 Oct 2025 14:50:59 +0000 Subject: [PATCH 9/9] improving test cases --- pandas/tests/base/test_value_counts.py | 83 ++++++++++++++++---------- 1 file changed, 53 insertions(+), 30 deletions(-) diff --git a/pandas/tests/base/test_value_counts.py b/pandas/tests/base/test_value_counts.py index 672b2b744bbe0..f642d26c32f5d 100644 --- a/pandas/tests/base/test_value_counts.py +++ b/pandas/tests/base/test_value_counts.py @@ -14,6 +14,7 @@ Series, Timedelta, TimedeltaIndex, + Timestamp, array, ) import pandas._testing as tm @@ -344,52 +345,74 @@ def test_value_counts_object_inference_deprecated(): @pytest.mark.parametrize( ("index", "expected_index"), [ - pytest.param( + [ pd.date_range("2016-01-01", periods=5, freq="D"), pd.date_range("2016-01-01", periods=5, freq="D"), - ), - pytest.param( + ], + [ pd.timedelta_range(Timedelta(0), periods=5, freq="h"), pd.timedelta_range(Timedelta(0), periods=5, freq="h"), - ), - pytest.param( - pd.date_range("2016-01-01", periods=5, freq="D").insert( - 1, pd.date_range("2016-01-01", periods=5, freq="D")[1] + ], + [ + DatetimeIndex( + [Timestamp("2016-01-01") + Timedelta(days=i) for i in range(1)] + + [Timestamp("2016-01-02")] + + [Timestamp("2016-01-01") + Timedelta(days=i) for i in range(1, 5)] ), DatetimeIndex(pd.date_range("2016-01-01", periods=5, freq="D")), - ), - pytest.param( - pd.timedelta_range(Timedelta(0), periods=5, freq="h").insert( - 1, pd.timedelta_range(Timedelta(0), periods=5, freq="h")[1] + ], + [ + TimedeltaIndex( + [Timedelta(hours=i) for i in range(1)] + + [Timedelta(hours=1)] + + [Timedelta(hours=i) for i in range(1, 5)], ), TimedeltaIndex(pd.timedelta_range(Timedelta(0), periods=5, freq="h")), - ), - pytest.param( - pd.date_range("2016-01-01", periods=5, freq="D").delete(2), - DatetimeIndex(pd.date_range("2016-01-01", periods=5, freq="D").delete(2)), - ), - pytest.param( - pd.timedelta_range(Timedelta(0), periods=5, freq="h").delete(2), + ], + [ + DatetimeIndex( + [Timestamp("2016-01-01") + Timedelta(days=i) for i in range(2)] + + [Timestamp("2016-01-01") + Timedelta(days=i) for i in range(3, 5)], + ), + DatetimeIndex( + [Timestamp("2016-01-01") + Timedelta(days=i) for i in range(2)] + + [Timestamp("2016-01-01") + Timedelta(days=i) for i in range(3, 5)], + ), + ], + [ + TimedeltaIndex( + [Timedelta(hours=i) for i in range(2)] + + [Timedelta(hours=i) for i in range(3, 5)], + ), TimedeltaIndex( - pd.timedelta_range(Timedelta(0), periods=5, freq="h").delete(2) + [Timedelta(hours=i) for i in range(2)] + + [Timedelta(hours=i) for i in range(3, 5)], ), - ), - pytest.param( - pd.date_range("2016-01-01", periods=5, freq="D").insert(1, pd.NaT), + ], + [ DatetimeIndex( - list(pd.date_range("2016-01-01", periods=5, freq="D")[:1]) + [Timestamp("2016-01-01")] + + [pd.NaT] + + [Timestamp("2016-01-01") + Timedelta(days=i) for i in range(1, 5)], + ), + DatetimeIndex( + [Timestamp("2016-01-01")] + + [pd.NaT] + + [Timestamp("2016-01-01") + Timedelta(days=i) for i in range(1, 5)], + ), + ], + [ + TimedeltaIndex( + [Timedelta(hours=0)] + [pd.NaT] - + list(pd.date_range("2016-01-01", periods=5, freq="D")[1:]) + + [Timedelta(hours=i) for i in range(1, 5)], ), - ), - pytest.param( - pd.timedelta_range(Timedelta(0), periods=5, freq="h").insert(1, pd.NaT), TimedeltaIndex( - list(pd.timedelta_range(Timedelta(0), periods=5, freq="h")[:1]) + [Timedelta(hours=0)] + [pd.NaT] - + list(pd.timedelta_range(Timedelta(0), periods=5, freq="h")[1:]) + + [Timedelta(hours=i) for i in range(1, 5)], ), - ), + ], ], ) def test_value_counts_index_datetimelike(index, expected_index):