Merge branch 'main' into warn-52593

jbrockmendel · jbrockmendel · commit 56ddf75bb7ae · 2025-11-03T08:17:08.000-08:00
diff --git a/README.md b/README.md
@@ -179,7 +179,7 @@ If you are simply looking to start working with the pandas codebase, navigate to
 
 You can also triage issues which may include reproducing bug reports, or asking for vital information such as version numbers or reproduction instructions. If you would like to start triaging issues, one easy way to get started is to [subscribe to pandas on CodeTriage](https://www.codetriage.com/pandas-dev/pandas).
 
-Or maybe through using pandas you have an idea of your own or are looking for something in the documentation and thinking ‘this can be improved’...you can do something about it!
+Or maybe through using pandas you have an idea of your own or are looking for something in the documentation and thinking ‘this can be improved’... you can do something about it!
 
 Feel free to ask questions on the [mailing list](https://groups.google.com/forum/?fromgroups#!forum/pydata) or on [Slack](https://pandas.pydata.org/docs/dev/development/community.html?highlight=slack#community-slack).
 
diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst
@@ -137,7 +137,7 @@ We could naturally group by either the ``A`` or ``B`` columns, or both:
 
    ``df.groupby('A')`` is just syntactic sugar for ``df.groupby(df['A'])``.
 
-The above GroupBy will split the DataFrame on its index (rows). To split by columns, first do
+DataFrame groupby always operates along axis 0 (rows). To split by columns, first do
 a transpose:
 
 .. ipython::
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -961,6 +961,7 @@ Categorical
 ^^^^^^^^^^^
 - Bug in :class:`Categorical` where constructing from a pandas :class:`Series` or :class:`Index` with ``dtype='object'`` did not preserve the categories' dtype as ``object``; now the ``categories.dtype`` is preserved as ``object`` for these cases, while numpy arrays and Python sequences with ``dtype='object'`` continue to infer the most specific dtype (for example, ``str`` if all elements are strings) (:issue:`61778`)
 - Bug in :func:`Series.apply` where ``nan`` was ignored for :class:`CategoricalDtype` (:issue:`59938`)
+- Bug in :func:`bdate_range` raising ``ValueError`` with frequency ``freq="cbh"`` (:issue:`62849`)
 - Bug in :func:`testing.assert_index_equal` raising ``TypeError`` instead of ``AssertionError`` for incomparable ``CategoricalIndex`` when ``check_categorical=True`` and ``exact=False`` (:issue:`61935`)
 - Bug in :meth:`Categorical.astype` where ``copy=False`` would still trigger a copy of the codes (:issue:`62000`)
 - Bug in :meth:`DataFrame.pivot` and :meth:`DataFrame.set_index` raising an ``ArrowNotImplementedError`` for columns with pyarrow dictionary dtype (:issue:`53051`)
@@ -983,6 +984,7 @@ Datetimelike
 - Bug in :meth:`DataFrame.min` and :meth:`DataFrame.max` casting ``datetime64`` and ``timedelta64`` columns to ``float64`` and losing precision (:issue:`60850`)
 - Bug in :meth:`Dataframe.agg` with df with missing values resulting in IndexError (:issue:`58810`)
 - Bug in :meth:`DateOffset.rollback` (and subclass methods) with ``normalize=True`` rolling back one offset too long (:issue:`32616`)
+- Bug in :meth:`DatetimeIndex.asof` with a string key giving incorrect results (:issue:`50946`)
 - Bug in :meth:`DatetimeIndex.is_year_start` and :meth:`DatetimeIndex.is_quarter_start` does not raise on Custom business days frequencies bigger then "1C" (:issue:`58664`)
 - Bug in :meth:`DatetimeIndex.is_year_start` and :meth:`DatetimeIndex.is_quarter_start` returning ``False`` on double-digit frequencies (:issue:`58523`)
 - Bug in :meth:`DatetimeIndex.union` and :meth:`DatetimeIndex.intersection` when ``unit`` was non-nanosecond (:issue:`59036`)
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -9430,7 +9430,7 @@ def groupby(
             index. If a dict or Series is passed, the Series or dict VALUES
             will be used to determine the groups (the Series' values are first
             aligned; see ``.align()`` method). If a list or ndarray of length
-            equal to the selected axis is passed (see the `groupby user guide
+            equal to the number of rows is passed (see the `groupby user guide
             <https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html#splitting-an-object-into-groups>`_),
             the values are used as-is to determine the groups. A label or list
             of labels may be passed to group by the columns in ``self``.
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -5675,7 +5675,7 @@ def asof(self, label):
                 return self._na_value
         else:
             if isinstance(loc, slice):
-                loc = loc.indices(len(self))[-1]
+                return self[loc][-1]
 
         return self[loc]
 
diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
@@ -1133,12 +1133,14 @@ def bdate_range(
         msg = "freq must be specified for bdate_range; use date_range instead"
         raise TypeError(msg)
 
-    if isinstance(freq, str) and freq.startswith("C"):
+    if isinstance(freq, str) and freq.upper().startswith("C"):
+        msg = f"invalid custom frequency string: {freq}"
+        if freq == "CBH":
+            raise ValueError(f"{msg}, did you mean cbh?")
         try:
             weekmask = weekmask or "Mon Tue Wed Thu Fri"
             freq = prefix_mapping[freq](holidays=holidays, weekmask=weekmask)
         except (KeyError, TypeError) as err:
-            msg = f"invalid custom frequency string: {freq}"
             raise ValueError(msg) from err
     elif holidays or weekmask:
         msg = (
diff --git a/pandas/tests/frame/methods/test_join.py b/pandas/tests/frame/methods/test_join.py
@@ -575,3 +575,27 @@ def test_frame_join_tzaware(self):
 
         tm.assert_index_equal(result.index, expected)
         assert result.index.tz.key == "US/Central"
+
+    def test_frame_join_categorical_index(self):
+        # GH 61675
+        cat_data = pd.Categorical(
+            [3, 4],
+            categories=pd.Series([2, 3, 4, 5], dtype="Int64"),
+            ordered=True,
+        )
+        values1 = "a b".split()
+        values2 = "foo bar".split()
+        df1 = DataFrame({"hr": cat_data, "values1": values1}).set_index("hr")
+        df2 = DataFrame({"hr": cat_data, "values2": values2}).set_index("hr")
+        df1.columns = pd.CategoricalIndex([4], dtype=cat_data.dtype, name="other_hr")
+        df2.columns = pd.CategoricalIndex([3], dtype=cat_data.dtype, name="other_hr")
+
+        df_joined = df1.join(df2)
+        expected = DataFrame(
+            {"hr": cat_data, "values1": values1, "values2": values2}
+        ).set_index("hr")
+        expected.columns = pd.CategoricalIndex(
+            [4, 3], dtype=cat_data.dtype, name="other_hr"
+        )
+
+        tm.assert_frame_equal(df_joined, expected)
diff --git a/pandas/tests/indexes/datetimes/methods/test_asof.py b/pandas/tests/indexes/datetimes/methods/test_asof.py
@@ -1,6 +1,7 @@
 from datetime import timedelta
 
 from pandas import (
+    DatetimeIndex,
     Index,
     Timestamp,
     date_range,
@@ -28,3 +29,18 @@ def test_asof(self):
 
         dt = index[0].to_pydatetime()
         assert isinstance(index.asof(dt), Timestamp)
+
+    def test_asof_datetime_string(self):
+        # GH#50946
+
+        dti = date_range("2021-08-05", "2021-08-10", freq="1D")
+
+        key = "2021-08-09"
+        res = dti.asof(key)
+        exp = dti[4]
+        assert res == exp
+
+        # add a non-midnight time caused a bug
+        dti2 = DatetimeIndex(list(dti) + ["2021-08-11 00:00:01"])
+        res = dti2.asof(key)
+        assert res == exp
diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py
@@ -1216,7 +1216,7 @@ def test_cdaterange_holidays_weekmask_requires_freqstr(self):
             )
 
     @pytest.mark.parametrize(
-        "freq", [freq for freq in prefix_mapping if freq.startswith("C")]
+        "freq", [freq for freq in prefix_mapping if freq.upper().startswith("C")]
     )
     def test_all_custom_freq(self, freq):
         # should not raise
@@ -1280,6 +1280,39 @@ def test_data_range_custombusinessday_partial_time(self, unit):
         )
         tm.assert_index_equal(result, expected)
 
+    def test_cdaterange_cbh(self):
+        # GH#62849
+        result = bdate_range(
+            "2009-03-13",
+            "2009-03-15",
+            freq="cbh",
+            weekmask="Mon Wed Fri",
+            holidays=["2009-03-14"],
+        )
+        expected = DatetimeIndex(
+            [
+                "2009-03-13 09:00:00",
+                "2009-03-13 10:00:00",
+                "2009-03-13 11:00:00",
+                "2009-03-13 12:00:00",
+                "2009-03-13 13:00:00",
+                "2009-03-13 14:00:00",
+                "2009-03-13 15:00:00",
+                "2009-03-13 16:00:00",
+            ],
+            dtype="datetime64[ns]",
+            freq="cbh",
+        )
+        tm.assert_index_equal(result, expected)
+
+    def test_cdaterange_deprecated_error_CBH(self):
+        # GH#62849
+        msg = "invalid custom frequency string: CBH, did you mean cbh?"
+        with pytest.raises(ValueError, match=msg):
+            bdate_range(
+                START, END, freq="CBH", weekmask="Mon Wed Fri", holidays=["2009-03-14"]
+            )
+
 
 class TestDateRangeNonNano:
     def test_date_range_reso_validation(self):
diff --git a/pandas/tests/io/parser/common/test_file_buffer_url.py b/pandas/tests/io/parser/common/test_file_buffer_url.py
@@ -97,25 +97,25 @@ def test_nonexistent_path(all_parsers):
 
 @pytest.mark.skipif(WASM, reason="limited file system access on WASM")
 @td.skip_if_windows  # os.chmod does not work in windows
-def test_no_permission(all_parsers):
+def test_no_permission(all_parsers, temp_file):
     # GH 23784
     parser = all_parsers
 
     msg = r"\[Errno 13\]"
-    with tm.ensure_clean() as path:
-        os.chmod(path, 0)  # make file unreadable
+    path = temp_file
+    os.chmod(path, 0)  # make file unreadable
 
-        # verify that this process cannot open the file (not running as sudo)
-        try:
-            with open(path, encoding="utf-8"):
-                pass
-            pytest.skip("Running as sudo.")
-        except PermissionError:
+    # verify that this process cannot open the file (not running as sudo)
+    try:
+        with open(path, encoding="utf-8"):
             pass
+        pytest.skip("Running as sudo.")
+    except PermissionError:
+        pass
 
-        with pytest.raises(PermissionError, match=msg) as e:
-            parser.read_csv(path)
-        assert path == e.value.filename
+    with pytest.raises(PermissionError, match=msg) as e:
+        parser.read_csv(path)
+    assert str(path.resolve()) == e.value.filename
 
 
 @pytest.mark.parametrize(
@@ -269,19 +269,19 @@ def test_internal_eof_byte(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
-def test_internal_eof_byte_to_file(all_parsers):
+def test_internal_eof_byte_to_file(all_parsers, temp_file):
     # see gh-16559
     parser = all_parsers
     data = b'c1,c2\r\n"test \x1a    test", test\r\n'
     expected = DataFrame([["test \x1a    test", " test"]], columns=["c1", "c2"])
     path = f"__{uuid.uuid4()}__.csv"
 
-    with tm.ensure_clean(path) as path:
-        with open(path, "wb") as f:
-            f.write(data)
+    path2 = temp_file.parent / path
+    with open(path2, "wb") as f:
+        f.write(data)
 
-        result = parser.read_csv(path)
-        tm.assert_frame_equal(result, expected)
+    result = parser.read_csv(path2)
+    tm.assert_frame_equal(result, expected)
 
 
 def test_file_handle_string_io(all_parsers):
@@ -372,7 +372,7 @@ def test_read_csv_file_handle(all_parsers, io_class, encoding):
     assert not handle.closed
 
 
-def test_memory_map_compression(all_parsers, compression):
+def test_memory_map_compression(all_parsers, compression, temp_file):
     """
     Support memory map for compressed files.
 
@@ -381,16 +381,16 @@ def test_memory_map_compression(all_parsers, compression):
     parser = all_parsers
     expected = DataFrame({"a": [1], "b": [2]})
 
-    with tm.ensure_clean() as path:
-        expected.to_csv(path, index=False, compression=compression)
+    path = temp_file
+    expected.to_csv(path, index=False, compression=compression)
 
-        if parser.engine == "pyarrow":
-            msg = "The 'memory_map' option is not supported with the 'pyarrow' engine"
-            with pytest.raises(ValueError, match=msg):
-                parser.read_csv(path, memory_map=True, compression=compression)
-            return
+    if parser.engine == "pyarrow":
+        msg = "The 'memory_map' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(path, memory_map=True, compression=compression)
+        return
 
-        result = parser.read_csv(path, memory_map=True, compression=compression)
+    result = parser.read_csv(path, memory_map=True, compression=compression)
 
     tm.assert_frame_equal(
         result,
@@ -442,12 +442,12 @@ def test_context_manageri_user_provided(all_parsers, datapath):
 
 
 @skip_pyarrow  # ParserError: Empty CSV file
-def test_file_descriptor_leak(all_parsers):
+def test_file_descriptor_leak(all_parsers, temp_file):
     # GH 31488
     parser = all_parsers
-    with tm.ensure_clean() as path:
-        with pytest.raises(EmptyDataError, match="No columns to parse from file"):
-            parser.read_csv(path)
+    path = temp_file
+    with pytest.raises(EmptyDataError, match="No columns to parse from file"):
+        parser.read_csv(path)
 
 
 def test_memory_map(all_parsers, csv_dir_path):