pandas-dev
diff --git a/‎doc/source/whatsnew/v3.0.0.rst‎
Lines changed: 2 additions & 0 deletions b/‎doc/source/whatsnew/v3.0.0.rst‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎pandas/core/dtypes/concat.py‎
Lines changed: 4 additions & 0 deletions b/‎pandas/core/dtypes/concat.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎pandas/core/generic.py‎
Lines changed: 86 additions & 15 deletions b/‎pandas/core/generic.py‎
Lines changed: 86 additions & 15 deletions
diff --git a/‎pandas/core/reshape/merge.py‎
Lines changed: 4 additions & 4 deletions b/‎pandas/core/reshape/merge.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎pandas/core/series.py‎
Lines changed: 2 additions & 2 deletions b/‎pandas/core/series.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎pandas/plotting/_matplotlib/timeseries.py‎
Lines changed: 2 additions & 2 deletions b/‎pandas/plotting/_matplotlib/timeseries.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎pandas/tests/copy_view/test_functions.py‎
Lines changed: 23 additions & 0 deletions b/‎pandas/tests/copy_view/test_functions.py‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎pandas/tests/frame/methods/test_to_csv.py‎
Lines changed: 2 additions & 2 deletions b/‎pandas/tests/frame/methods/test_to_csv.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎pandas/tests/groupby/test_groupby_dropna.py‎
Lines changed: 15 additions & 9 deletions b/‎pandas/tests/groupby/test_groupby_dropna.py‎
Lines changed: 15 additions & 9 deletions
diff --git a/‎pandas/tests/indexes/datetimes/methods/test_tz_localize.py‎
Lines changed: 4 additions & 4 deletions b/‎pandas/tests/indexes/datetimes/methods/test_tz_localize.py‎
Lines changed: 4 additions & 4 deletions
@@ -940,6 +940,7 @@ Performance improvements
 - Performance improvement in :meth:`RangeIndex.reindex` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57647`, :issue:`57752`)
 - Performance improvement in :meth:`RangeIndex.take` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57445`, :issue:`57752`)
 - Performance improvement in :func:`merge` if hash-join can be used (:issue:`57970`)
+- Performance improvement in :func:`merge` when join keys have different dtypes and need to be upcast (:issue:`62902`)
 - Performance improvement in :meth:`CategoricalDtype.update_dtype` when ``dtype`` is a :class:`CategoricalDtype` with non ``None`` categories and ordered (:issue:`59647`)
 - Performance improvement in :meth:`DataFrame.__getitem__` when ``key`` is a :class:`DataFrame` with many columns (:issue:`61010`)
 - Performance improvement in :meth:`DataFrame.astype` when converting to extension floating dtypes, e.g. "Float64" (:issue:`60066`)
@@ -1177,6 +1178,7 @@ Groupby/resample/rolling
 
 Reshaping
 ^^^^^^^^^
+- Bug in :func:`concat` with mixed integer and bool dtypes incorrectly casting the bools to integers (:issue:`45101`)
 - Bug in :func:`qcut` where values at the quantile boundaries could be incorrectly assigned (:issue:`59355`)
 - Bug in :meth:`DataFrame.combine_first` not preserving the column order (:issue:`60427`)
 - Bug in :meth:`DataFrame.explode` producing incorrect result for :class:`pyarrow.large_list` type (:issue:`61091`)
 
@@ -161,6 +161,10 @@ def _get_result_dtype(
                 # coerce to object
                 target_dtype = np.dtype(object)
                 kinds = {"o"}
+    elif "b" in kinds and len(kinds) > 1:
+        # GH#21108, GH#45101
+        target_dtype = np.dtype(object)
+        kinds = {"o"}
     else:
         # error: Argument 1 to "np_find_common_type" has incompatible type
         # "*Set[Union[ExtensionDtype, Any]]"; expected "dtype[Any]"
 
@@ -8156,7 +8156,6 @@ def asof(self, where, subset=None):
     # ----------------------------------------------------------------------
     # Action Methods
 
-    @doc(klass=_shared_doc_kwargs["klass"])
     def isna(self) -> Self:
         """
         Detect missing values.
@@ -8169,15 +8168,18 @@ def isna(self) -> Self:
 
         Returns
         -------
-        {klass}
-            Mask of bool values for each element in {klass} that
-            indicates whether an element is an NA value.
+        Series/DataFrame
+            Mask of bool values for each element in Series/DataFrame
+            that indicates whether an element is an NA value.
 
         See Also
         --------
-        {klass}.isnull : Alias of isna.
-        {klass}.notna : Boolean inverse of isna.
-        {klass}.dropna : Omit axes labels with missing values.
+        Series.isnull : Alias of isna.
+        DataFrame.isnull : Alias of isna.
+        Series.notna : Boolean inverse of isna.
+        DataFrame.notna : Boolean inverse of isna.
+        Series.dropna : Omit axes labels with missing values.
+        DataFrame.dropna : Omit axes labels with missing values.
         isna : Top-level isna.
 
         Examples
@@ -8225,11 +8227,77 @@ def isna(self) -> Self:
         """
         return isna(self).__finalize__(self, method="isna")
 
-    @doc(isna, klass=_shared_doc_kwargs["klass"])
     def isnull(self) -> Self:
+        """
+        Detect missing values.
+
+        Return a boolean same-sized object indicating if the values are NA.
+        NA values, such as None or :attr:`numpy.NaN`, gets mapped to True
+        values.
+        Everything else gets mapped to False values. Characters such as empty
+        strings ``''`` or :attr:`numpy.inf` are not considered NA values.
+
+        Returns
+        -------
+        Series/DataFrame
+            Mask of bool values for each element in Series/DataFrame
+            that indicates whether an element is an NA value.
+
+        See Also
+        --------
+        Series.isna : Alias of isnull.
+        DataFrame.isna : Alias of isnull.
+        Series.notna : Boolean inverse of isnull.
+        DataFrame.notna : Boolean inverse of isnull.
+        Series.dropna : Omit axes labels with missing values.
+        DataFrame.dropna : Omit axes labels with missing values.
+        isna : Top-level isna.
+
+        Examples
+        --------
+        Show which entries in a DataFrame are NA.
+
+        >>> df = pd.DataFrame(
+        ...     dict(
+        ...         age=[5, 6, np.nan],
+        ...         born=[
+        ...             pd.NaT,
+        ...             pd.Timestamp("1939-05-27"),
+        ...             pd.Timestamp("1940-04-25"),
+        ...         ],
+        ...         name=["Alfred", "Batman", ""],
+        ...         toy=[None, "Batmobile", "Joker"],
+        ...     )
+        ... )
+        >>> df
+           age       born    name        toy
+        0  5.0        NaT  Alfred        NaN
+        1  6.0 1939-05-27  Batman  Batmobile
+        2  NaN 1940-04-25              Joker
+
+        >>> df.isna()
+             age   born   name    toy
+        0  False   True  False   True
+        1  False  False  False  False
+        2   True  False  False  False
+
+        Show which entries in a Series are NA.
+
+        >>> ser = pd.Series([5, 6, np.nan])
+        >>> ser
+        0    5.0
+        1    6.0
+        2    NaN
+        dtype: float64
+
+        >>> ser.isna()
+        0    False
+        1    False
+        2     True
+        dtype: bool
+        """
         return isna(self).__finalize__(self, method="isnull")
 
-    @doc(klass=_shared_doc_kwargs["klass"])
     def notna(self) -> Self:
         """
         Detect existing (non-missing) values.
@@ -8242,15 +8310,18 @@ def notna(self) -> Self:
 
         Returns
         -------
-        {klass}
-            Mask of bool values for each element in {klass} that
-            indicates whether an element is not an NA value.
+        Series/DataFrame
+            Mask of bool values for each element in Series/DataFrame
+            that indicates whether an element is not an NA value.
 
         See Also
         --------
-        {klass}.notnull : Alias of notna.
-        {klass}.isna : Boolean inverse of notna.
-        {klass}.dropna : Omit axes labels with missing values.
+        Series.notnull : Alias of notna.
+        DataFrame.notnull : Alias of notna.
+        Series.isna : Boolean inverse of notna.
+        DataFrame.isna : Boolean inverse of notna.
+        Series.dropna : Omit axes labels with missing values.
+        DataFrame.dropna : Omit axes labels with missing values.
         notna : Top-level notna.
 
         Examples
 
@@ -1192,8 +1192,8 @@ def _indicator_pre_merge(
                 "Cannot use name of an existing column for indicator column"
             )
 
-        left = left.copy()
-        right = right.copy()
+        left = left.copy(deep=False)
+        right = right.copy(deep=False)
 
         left["_left_indicator"] = 1
         left["_left_indicator"] = left["_left_indicator"].astype("int8")
@@ -1871,11 +1871,11 @@ def _maybe_coerce_merge_keys(self) -> None:
             # incompatible dtypes. See GH 16900.
             if name in self.left.columns:
                 typ = cast(Categorical, lk).categories.dtype if lk_is_cat else object
-                self.left = self.left.copy()
+                self.left = self.left.copy(deep=False)
                 self.left[name] = self.left[name].astype(typ)
             if name in self.right.columns:
                 typ = cast(Categorical, rk).categories.dtype if rk_is_cat else object
-                self.right = self.right.copy()
+                self.right = self.right.copy(deep=False)
                 self.right[name] = self.right[name].astype(typ)
 
     def _validate_left_right_on(self, left_on, right_on):
 
@@ -6183,7 +6183,7 @@ def isna(self) -> Series:
         return NDFrame.isna(self)
 
     # error: Cannot determine type of 'isna'
-    @doc(NDFrame.isna, klass=_shared_doc_kwargs["klass"])  # type: ignore[has-type]
+    @doc(NDFrame.isna, klass=_shared_doc_kwargs["klass"])
     def isnull(self) -> Series:
         """
         Series.isnull is an alias for Series.isna.
@@ -6260,7 +6260,7 @@ def notna(self) -> Series:
         return super().notna()
 
     # error: Cannot determine type of 'notna'
-    @doc(NDFrame.notna, klass=_shared_doc_kwargs["klass"])  # type: ignore[has-type]
+    @doc(NDFrame.notna, klass=_shared_doc_kwargs["klass"])
     def notnull(self) -> Series:
         """
         Series.notnull is an alias for Series.notna.
 
@@ -75,7 +75,7 @@ def maybe_resample(series: Series, ax: Axes, kwargs: dict[str, Any]):
 
     if ax_freq is not None and freq != ax_freq:
         if is_superperiod(freq, ax_freq):  # upsample input
-            series = series.copy()
+            series = series.copy(deep=False)
             # error: "Index" has no attribute "asfreq"
             series.index = series.index.asfreq(  # type: ignore[attr-defined]
                 ax_freq, how="s"
@@ -142,7 +142,7 @@ def _replot_ax(ax: Axes, freq: BaseOffset):
     labels = []
     if data is not None:
         for series, plotf, kwds in data:
-            series = series.copy()
+            series = series.copy(deep=False)
             idx = series.index.asfreq(freq, how="S")
             series.index = idx
             # TODO #54485
 
@@ -243,6 +243,29 @@ def test_merge_copy_keyword():
     assert np.shares_memory(get_array(df2, "b"), get_array(result, "b"))
 
 
+def test_merge_upcasting_no_copy():
+    left = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+    left_copy = left.copy()
+    right = DataFrame({"a": [1, 2, 3], "c": [7, 8, 9]}, dtype=object)
+    result = merge(left, right, on="a")
+    assert np.shares_memory(get_array(result, "b"), get_array(left, "b"))
+    assert not np.shares_memory(get_array(result, "a"), get_array(left, "a"))
+    tm.assert_frame_equal(left, left_copy)
+
+    result = merge(right, left, on="a")
+    assert np.shares_memory(get_array(result, "b"), get_array(left, "b"))
+    assert not np.shares_memory(get_array(result, "a"), get_array(left, "a"))
+    tm.assert_frame_equal(left, left_copy)
+
+
+def test_merge_indicator_no_deep_copy():
+    left = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+    right = DataFrame({"a": [1, 2, 3], "c": [7, 8, 9]})
+    result = merge(left, right, on="a", indicator=True)
+    assert np.shares_memory(get_array(result, "b"), get_array(left, "b"))
+    assert np.shares_memory(get_array(result, "c"), get_array(right, "c"))
+
+
 @pytest.mark.parametrize("dtype", [object, "str"])
 def test_join_on_key(dtype):
     df_index = Index(["a", "b", "c"], name="key", dtype=dtype)
 
@@ -836,9 +836,9 @@ def test_to_csv_dups_cols2(self, temp_file):
         result = result.rename(columns={"a.1": "a"})
         tm.assert_frame_equal(result, df)
 
-    @pytest.mark.parametrize("chunksize", [10000, 50000, 100000])
+    @pytest.mark.parametrize("chunksize", [1, 5, 10])
     def test_to_csv_chunking(self, chunksize, temp_file):
-        aa = DataFrame({"A": range(100000)})
+        aa = DataFrame({"A": range(10)})
         aa["B"] = aa.A + 1.0
         aa["C"] = aa.A + 2.0
         aa["D"] = aa.A + 3.0
 
@@ -394,8 +394,20 @@ def test_groupby_drop_nan_with_multi_index():
     tm.assert_frame_equal(result, expected)
 
 
-# sequence_index enumerates all strings made up of x, y, z of length 4
-@pytest.mark.parametrize("sequence_index", range(3**4))
+# y >x and z is the missing value
+@pytest.mark.parametrize(
+    "sequence",
+    [
+        "xyzy",
+        "xxyz",
+        "yzxz",
+        "zzzz",
+        "zyzx",
+        "yyyy",
+        "zzxy",
+        "xyxy",
+    ],
+)
 @pytest.mark.parametrize(
     "dtype",
     [
@@ -419,15 +431,9 @@ def test_groupby_drop_nan_with_multi_index():
     ],
 )
 @pytest.mark.parametrize("test_series", [True, False])
-def test_no_sort_keep_na(sequence_index, dtype, test_series, as_index):
+def test_no_sort_keep_na(sequence, dtype, test_series, as_index):
     # GH#46584, GH#48794
 
-    # Convert sequence_index into a string sequence, e.g. 5 becomes "xxyz"
-    # This sequence is used for the grouper.
-    sequence = "".join(
-        [{0: "x", 1: "y", 2: "z"}[sequence_index // (3**k) % 3] for k in range(4)]
-    )
-
     # Unique values to use for grouper, depends on dtype
     if dtype in ("string", "string[pyarrow]"):
         uniques = {"x": "x", "y": "y", "z": pd.NA}
 
@@ -149,23 +149,23 @@ def test_dti_tz_localize_pass_dates_to_utc(self, tzstr):
     @pytest.mark.parametrize("prefix", ["", "dateutil/"])
     def test_dti_tz_localize(self, prefix):
         tzstr = prefix + "US/Eastern"
-        dti = date_range(start="1/1/2005", end="1/1/2005 0:00:30.256", freq="ms")
+        dti = date_range(start="1/1/2005", end="1/1/2005 0:00:02.256", freq="ms")
         dti2 = dti.tz_localize(tzstr)
 
         dti_utc = date_range(
-            start="1/1/2005 05:00", end="1/1/2005 5:00:30.256", freq="ms", tz="utc"
+            start="1/1/2005 05:00", end="1/1/2005 5:00:02.256", freq="ms", tz="utc"
         )
 
         tm.assert_numpy_array_equal(dti2.values, dti_utc.values)
 
         dti3 = dti2.tz_convert(prefix + "US/Pacific")
         tm.assert_numpy_array_equal(dti3.values, dti_utc.values)
 
-        dti = date_range(start="11/6/2011 1:59", end="11/6/2011 2:00", freq="ms")
+        dti = date_range(start="11/6/2011 1:59:59", end="11/6/2011 2:00", freq="ms")
         with pytest.raises(ValueError, match="Cannot infer dst time"):
             dti.tz_localize(tzstr)
 
-        dti = date_range(start="3/13/2011 1:59", end="3/13/2011 2:00", freq="ms")
+        dti = date_range(start="3/13/2011 1:59:59", end="3/13/2011 2:00", freq="ms")
         with pytest.raises(ValueError, match="2011-03-13 02:00:00"):
             dti.tz_localize(tzstr)