pandas-dev
diff --git a/‎.github/workflows/docbuild-and-upload.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/docbuild-and-upload.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/wheels.yml‎
Lines changed: 5 additions & 5 deletions b/‎.github/workflows/wheels.yml‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎doc/source/user_guide/io.rst‎
Lines changed: 1 addition & 46 deletions b/‎doc/source/user_guide/io.rst‎
Lines changed: 1 addition & 46 deletions
diff --git a/‎doc/source/whatsnew/v3.0.0.rst‎
Lines changed: 7 additions & 0 deletions b/‎doc/source/whatsnew/v3.0.0.rst‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎pandas/_libs/index.pyx‎
Lines changed: 3 additions & 0 deletions b/‎pandas/_libs/index.pyx‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎pandas/conftest.py‎
Lines changed: 3 additions & 0 deletions b/‎pandas/conftest.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎pandas/core/arrays/_arrow_string_mixins.py‎
Lines changed: 2 additions & 5 deletions b/‎pandas/core/arrays/_arrow_string_mixins.py‎
Lines changed: 2 additions & 5 deletions
diff --git a/‎pandas/core/arrays/categorical.py‎
Lines changed: 12 additions & 1 deletion b/‎pandas/core/arrays/categorical.py‎
Lines changed: 12 additions & 1 deletion
diff --git a/‎pandas/core/arrays/string_arrow.py‎
Lines changed: 1 addition & 2 deletions b/‎pandas/core/arrays/string_arrow.py‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎pandas/core/frame.py‎
Lines changed: 67 additions & 34 deletions b/‎pandas/core/frame.py‎
Lines changed: 67 additions & 34 deletions
@@ -93,7 +93,7 @@ jobs:
       run: mv doc/build/html web/build/docs
 
     - name: Save website as an artifact
-      uses: actions/upload-artifact@v4
+      uses: actions/upload-artifact@v5
       with:
         name: website
         path: web/build
 
@@ -64,7 +64,7 @@ jobs:
           python -m pip install build
           python -m build --sdist
 
-      - uses: actions/upload-artifact@v4
+      - uses: actions/upload-artifact@v5
         with:
           name: sdist
           path: ./dist/*
@@ -138,7 +138,7 @@ jobs:
       # removes unnecessary files from the release
       - name: Download sdist (not macOS)
         #if: ${{ matrix.buildplat[1] != 'macosx_*' }}
-        uses: actions/download-artifact@v5
+        uses: actions/download-artifact@v6
         with:
           name: sdist
           path: ./dist
@@ -196,7 +196,7 @@ jobs:
         shell: bash -el {0}
         run: for whl in $(ls wheelhouse); do wheel unpack wheelhouse/$whl -d /tmp; done
 
-      - uses: actions/upload-artifact@v4
+      - uses: actions/upload-artifact@v5
         with:
           name: ${{ matrix.python[0] }}-${{ matrix.buildplat[1] }}
           path: ./wheelhouse/*.whl
@@ -238,11 +238,11 @@ jobs:
 
     steps:
       - name: Download all artefacts
-        uses: actions/download-artifact@v5
+        uses: actions/download-artifact@v6
         with:
           path: dist          # everything lands in ./dist/**
 
-      # TODO: This step can be probably be achieved by actions/download-artifact@v5
+      # TODO: This step can be probably be achieved by actions/download-artifact@v6
       # by specifying merge-multiple: true, and a glob pattern
       - name: Collect files
         run: |
 
@@ -2366,52 +2366,7 @@ Read a URL with no options:
 
    The data from the above URL changes every Monday so the resulting data above may be slightly different.
 
-Read a URL while passing headers alongside the HTTP request:
-
-.. code-block:: ipython
-
-   In [322]: url = 'https://www.sump.org/notes/request/' # HTTP request reflector
-
-   In [323]: pd.read_html(url)
-   Out[323]:
-   [                   0                    1
-    0     Remote Socket:  51.15.105.256:51760
-    1  Protocol Version:             HTTP/1.1
-    2    Request Method:                  GET
-    3       Request URI:      /notes/request/
-    4     Request Query:                  NaN,
-    0   Accept-Encoding:             identity
-    1              Host:         www.sump.org
-    2        User-Agent:    Python-urllib/3.8
-    3        Connection:                close]
-
-   In [324]: headers = {
-      .....:    'User-Agent':'Mozilla Firefox v14.0',
-      .....:    'Accept':'application/json',
-      .....:    'Connection':'keep-alive',
-      .....:    'Auth':'Bearer 2*/f3+fe68df*4'
-      .....: }
-
-   In [325]: pd.read_html(url, storage_options=headers)
-   Out[325]:
-   [                   0                    1
-    0     Remote Socket:  51.15.105.256:51760
-    1  Protocol Version:             HTTP/1.1
-    2    Request Method:                  GET
-    3       Request URI:      /notes/request/
-    4     Request Query:                  NaN,
-    0        User-Agent: Mozilla Firefox v14.0
-    1    AcceptEncoding:   gzip,  deflate,  br
-    2            Accept:      application/json
-    3        Connection:             keep-alive
-    4              Auth:  Bearer 2*/f3+fe68df*4]
-
-.. note::
-
-   We see above that the headers we passed are reflected in the HTTP request.
-
-Read in the content of the file from the above URL and pass it to ``read_html``
-as a string:
+Read in HTML content from a file using ``read_html``:
 
 .. ipython:: python
 
 
@@ -219,6 +219,7 @@ Other enhancements
 - Added support to read and write from and to Apache Iceberg tables with the new :func:`read_iceberg` and :meth:`DataFrame.to_iceberg` functions (:issue:`61383`)
 - Errors occurring during SQL I/O will now throw a generic :class:`.DatabaseError` instead of the raw Exception type from the underlying driver manager library (:issue:`60748`)
 - Implemented :meth:`Series.str.isascii` and :meth:`Series.str.isascii` (:issue:`59091`)
+- Improve error reporting through outputting the first few duplicates when :func:`merge` validation fails (:issue:`62742`)
 - Improve the resulting dtypes in :meth:`DataFrame.where` and :meth:`DataFrame.mask` with :class:`ExtensionDtype` ``other`` (:issue:`62038`)
 - Improved deprecation message for offset aliases (:issue:`60820`)
 - Many type aliases are now exposed in the new submodule :py:mod:`pandas.api.typing.aliases` (:issue:`55231`)
@@ -956,6 +957,7 @@ Bug fixes
 
 Categorical
 ^^^^^^^^^^^
+- Bug in :class:`Categorical` where constructing from a pandas :class:`Series` or :class:`Index` with ``dtype='object'`` did not preserve the categories' dtype as ``object``; now the ``categories.dtype`` is preserved as ``object`` for these cases, while numpy arrays and Python sequences with ``dtype='object'`` continue to infer the most specific dtype (for example, ``str`` if all elements are strings) (:issue:`61778`)
 - Bug in :func:`Series.apply` where ``nan`` was ignored for :class:`CategoricalDtype` (:issue:`59938`)
 - Bug in :func:`testing.assert_index_equal` raising ``TypeError`` instead of ``AssertionError`` for incomparable ``CategoricalIndex`` when ``check_categorical=True`` and ``exact=False`` (:issue:`61935`)
 - Bug in :meth:`Categorical.astype` where ``copy=False`` would still trigger a copy of the codes (:issue:`62000`)
@@ -1015,6 +1017,7 @@ Numeric
 ^^^^^^^
 - Bug in :func:`api.types.infer_dtype` returning "mixed" for complex and ``pd.NA`` mix (:issue:`61976`)
 - Bug in :func:`api.types.infer_dtype` returning "mixed-integer-float" for float and ``pd.NA`` mix (:issue:`61621`)
+- Bug in :meth:`DataFrame.combine_first` where Int64 and UInt64 integers with absolute value greater than ``2**53`` would lose precision after the operation. (:issue:`60128`)
 - Bug in :meth:`DataFrame.corr` where numerical precision errors resulted in correlations above ``1.0`` (:issue:`61120`)
 - Bug in :meth:`DataFrame.cov` raises a ``TypeError`` instead of returning potentially incorrect results or other errors (:issue:`53115`)
 - Bug in :meth:`DataFrame.quantile` where the column type was not preserved when ``numeric_only=True`` with a list-like ``q`` produced an empty result (:issue:`59035`)
@@ -1034,6 +1037,7 @@ Conversion
 
 Strings
 ^^^^^^^
+- Bug in :meth:`Series.str.replace` raising an error on valid group references (``\1``, ``\2``, etc.) on series converted to PyArrow backend dtype (:issue:`62653`)
 - Bug in :meth:`Series.str.zfill` raising ``AttributeError`` for :class:`ArrowDtype` (:issue:`61485`)
 - Bug in :meth:`Series.value_counts` would not respect ``sort=False`` for series having ``string`` dtype (:issue:`55224`)
 - Bug in multiplication with a :class:`StringDtype` incorrectly allowing multiplying by bools; explicitly cast to integers instead (:issue:`62595`)
@@ -1043,6 +1047,7 @@ Interval
 - :meth:`Index.is_monotonic_decreasing`, :meth:`Index.is_monotonic_increasing`, and :meth:`Index.is_unique` could incorrectly be ``False`` for an ``Index`` created from a slice of another ``Index``. (:issue:`57911`)
 - Bug in :class:`Index`, :class:`Series`, :class:`DataFrame` constructors when given a sequence of :class:`Interval` subclass objects casting them to :class:`Interval` (:issue:`46945`)
 - Bug in :func:`interval_range` where start and end numeric types were always cast to 64 bit (:issue:`57268`)
+- Bug in :meth:`IntervalIndex.get_indexer` and :meth:`IntervalIndex.drop` when one of the sides of the index is non-unique (:issue:`52245`)
 
 Indexing
 ^^^^^^^^
@@ -1149,6 +1154,7 @@ Groupby/resample/rolling
 - Bug in :meth:`.DataFrameGroupBy.groups` and :meth:`.SeriesGroupby.groups` that would not respect groupby argument ``dropna`` (:issue:`55919`)
 - Bug in :meth:`.DataFrameGroupBy.median` where nat values gave an incorrect result. (:issue:`57926`)
 - Bug in :meth:`.DataFrameGroupBy.quantile` when ``interpolation="nearest"`` is inconsistent with :meth:`DataFrame.quantile` (:issue:`47942`)
+- Bug in :meth:`.DataFrameGroupBy` reductions where non-Boolean values were allowed for the ``numeric_only`` argument; passing a non-Boolean value will now raise (:issue:`62778`)
 - Bug in :meth:`.Resampler.interpolate` on a :class:`DataFrame` with non-uniform sampling and/or indices not aligning with the resulting resampled index would result in wrong interpolation (:issue:`21351`)
 - Bug in :meth:`.Series.rolling` when used with a :class:`.BaseIndexer` subclass and computing min/max (:issue:`46726`)
 - Bug in :meth:`DataFrame.ewm` and :meth:`Series.ewm` when passed ``times`` and aggregation functions other than mean (:issue:`51695`)
@@ -1205,6 +1211,7 @@ ExtensionArray
 - Bug in comparison between object with :class:`ArrowDtype` and incompatible-dtyped (e.g. string vs bool) incorrectly raising instead of returning all-``False`` (for ``==``) or all-``True`` (for ``!=``) (:issue:`59505`)
 - Bug in constructing pandas data structures when passing into ``dtype`` a string of the type followed by ``[pyarrow]`` while PyArrow is not installed would raise ``NameError`` rather than ``ImportError`` (:issue:`57928`)
 - Bug in various :class:`DataFrame` reductions for pyarrow temporal dtypes returning incorrect dtype when result was null (:issue:`59234`)
+- Fixed flex arithmetic with :class:`ExtensionArray` operands raising when ``fill_value`` was passed. (:issue:`62467`)
 
 Styler
 ^^^^^^
 
@@ -321,6 +321,9 @@ cdef class IndexEngine:
             if is_strict_monotonic:
                 self.unique = 1
                 self.need_unique_check = 0
+            elif self.monotonic_inc == 1 or self.monotonic_dec == 1:
+                self.unique = 0
+                self.need_unique_check = 0
 
     cdef _call_monotonic(self, values):
         return algos.is_monotonic(values, timelike=False)
 
@@ -1447,6 +1447,9 @@ def any_string_dtype(request):
         return pd.StringDtype(storage, na_value)
 
 
+any_string_dtype2 = any_string_dtype
+
+
 @pytest.fixture(params=tm.DATETIME64_DTYPES)
 def datetime64_dtype(request):
     """
 
@@ -173,15 +173,12 @@ def _str_replace(
             or callable(repl)
             or not case
             or flags
-            or (
-                isinstance(repl, str)
-                and (r"\g<" in repl or re.search(r"\\\d", repl) is not None)
-            )
+            or (isinstance(repl, str) and r"\g<" in repl)
         ):
             raise NotImplementedError(
                 "replace is not supported with a re.Pattern, callable repl, "
                 "case=False, flags!=0, or when the replacement string contains "
-                "named group references (\\g<...>, \\d+)"
+                "named group references (\\g<...>)"
             )
 
         func = pc.replace_substring_regex if regex else pc.replace_substring
 
@@ -460,6 +460,10 @@ def __init__(
                 codes = arr.indices.to_numpy()
                 dtype = CategoricalDtype(categories, values.dtype.pyarrow_dtype.ordered)
             else:
+                preserve_object = False
+                if isinstance(values, (ABCIndex, ABCSeries)) and values.dtype == object:
+                    # GH#61778
+                    preserve_object = True
                 if not isinstance(values, ABCIndex):
                     # in particular RangeIndex xref test_index_equal_range_categories
                     values = sanitize_array(values, None)
@@ -476,7 +480,14 @@ def __init__(
                             "by passing in a categories argument."
                         ) from err
 
-                # we're inferring from values
+                if preserve_object:
+                    # GH#61778 wrap categories in an Index to prevent dtype
+                    #  inference in the CategoricalDtype constructor
+                    from pandas import Index
+
+                    categories = Index(categories, dtype=object, copy=False)
+
+                # if not preserve_obejct, we're inferring from values
                 dtype = CategoricalDtype(categories, dtype.ordered)
 
         elif isinstance(values.dtype, CategoricalDtype):
 
@@ -425,8 +425,7 @@ def _str_replace(
             or flags
             or (  # substitution contains a named group pattern
                 # https://docs.python.org/3/library/re.html
-                isinstance(repl, str)
-                and (r"\g<" in repl or re.search(r"\\\d", repl) is not None)
+                isinstance(repl, str) and r"\g<" in repl
             )
         ):
             return super()._str_replace(pat, repl, n, case, flags, regex)
 
@@ -3293,28 +3293,71 @@ def to_html(
         Examples
         --------
         >>> df = pd.DataFrame(data={"col1": [1, 2], "col2": [4, 3]})
-        >>> html_string = '''<table border="1" class="dataframe">
-        ...   <thead>
-        ...     <tr style="text-align: right;">
-        ...       <th></th>
-        ...       <th>col1</th>
-        ...       <th>col2</th>
-        ...     </tr>
-        ...   </thead>
-        ...   <tbody>
-        ...     <tr>
-        ...       <th>0</th>
-        ...       <td>1</td>
-        ...       <td>4</td>
-        ...     </tr>
-        ...     <tr>
-        ...       <th>1</th>
-        ...       <td>2</td>
-        ...       <td>3</td>
-        ...     </tr>
-        ...   </tbody>
-        ... </table>'''
-        >>> assert html_string == df.to_html()
+        >>> html_string = df.to_html()
+        >>> print(html_string)
+        <table border="1" class="dataframe">
+          <thead>
+            <tr style="text-align: right;">
+              <th></th>
+              <th>col1</th>
+              <th>col2</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <th>0</th>
+              <td>1</td>
+              <td>4</td>
+            </tr>
+            <tr>
+              <th>1</th>
+              <td>2</td>
+              <td>3</td>
+            </tr>
+          </tbody>
+        </table>
+
+        HTML output
+
+        +----+-----+-----+
+        |    |col1 |col2 |
+        +====+=====+=====+
+        |0   |1    |4    |
+        +----+-----+-----+
+        |1   |2    |3    |
+        +----+-----+-----+
+
+        >>> df = pd.DataFrame(data={"col1": [1, 2], "col2": [4, 3]})
+        >>> html_string = df.to_html(index=False)
+        >>> print(html_string)
+        <table border="1" class="dataframe">
+          <thead>
+            <tr style="text-align: right;">
+              <th>col1</th>
+              <th>col2</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td>1</td>
+              <td>4</td>
+            </tr>
+            <tr>
+              <td>2</td>
+              <td>3</td>
+            </tr>
+          </tbody>
+        </table>
+
+        HTML output
+
+        +-----+-----+
+        |col1 |col2 |
+        +=====+=====+
+        |1    |4    |
+        +-----+-----+
+        |2    |3    |
+        +-----+-----+
         """
         if justify is not None and justify not in fmt.VALID_JUSTIFY_PARAMETERS:
             raise ValueError("Invalid value for justify parameter")
@@ -9165,20 +9208,10 @@ def combine_first(self, other: DataFrame) -> DataFrame:
         1  0.0  3.0  1.0
         2  NaN  3.0  1.0
         """
-        from pandas.core.computation import expressions
 
         def combiner(x: Series, y: Series):
-            mask = x.isna()._values
-
-            x_values = x._values
-            y_values = y._values
-
-            # If the column y in other DataFrame is not in first DataFrame,
-            # just return y_values.
-            if y.name not in self.columns:
-                return y_values
-
-            return expressions.where(mask, y_values, x_values)
+            # GH#60128 The combiner is supposed to preserve EA Dtypes.
+            return y if y.name not in self.columns else y.where(x.isna(), x)
 
         if len(other) == 0:
             combined = self.reindex(