Cleanup

rhshadrach · rhshadrach · commit 2c0562628719 · 2025-11-01T13:09:27.000-04:00
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -390,9 +390,9 @@ and users can skip the check by explicitly specifying ``sort=True`` or
 ``sort=False``.
 
 This deprecation can also impact pandas' internal usage of :func:`concat`.
-While we have investigated uses of :func:`concat` to determine if this could lead
-to a change in behavior of other functions and methods in the API, it is
-possible some have been missed. In order to be cautious here, pandas has *not*
+Here cases where :func:`concat` was sorting a :class:`DatetimeIndex` but not
+other indexes are considered bugs and have been fixed as noted below. However
+it is possible some have been missed. In order to be cautious here, pandas has *not*
 added ``sort=False`` to any internal calls where we believe behavior should not change.
 If we have missed something, users will not experience a behavior change but they
 will receive a warning about :func:`concat` even though they are not directly
@@ -429,6 +429,14 @@ we may address any potential behavior changes.
 
     pd.concat([df1, df2], axis=1, sort=False)
 
+Cases where pandas' internal usage of :func:`concat` resulted in inconsistent sorting
+that are now fixed in this release are as follows.
+
+- :meth:`Series.apply` and :meth:`DataFrame.apply` with a list-like or dict-like ``func`` argument.
+- :meth:`Series.shift`, :meth:`DataFrame.shift`, :meth:`.SeriesGroupBy.shift`, :meth:`.DataFrameGroupBy.shift` with the ``periods`` argument a list of length greater than 1.
+- :meth:`DataFrame.join` with ``other`` a list of one or more Series or DataFrames and ``how="inner"``, ``how="left"``, or ``how="right"``.
+- :meth:`Series.str.cat` with ``others`` a Series or DataFrame.
+
 .. _whatsnew_300.api_breaking.value_counts_sorting:
 
 Changed behavior in :meth:`DataFrame.value_counts` and :meth:`DataFrameGroupBy.value_counts` when ``sort=False``
@@ -1233,7 +1241,6 @@ Groupby/resample/rolling
 - Bug in :meth:`DataFrameGroupBy.apply` with ``as_index=False`` that was returning :class:`MultiIndex` instead of returning :class:`Index`. (:issue:`58291`)
 - Bug in :meth:`DataFrameGroupBy.cumsum` and :meth:`DataFrameGroupBy.cumprod` where ``numeric_only`` parameter was passed indirectly through kwargs instead of passing directly. (:issue:`58811`)
 - Bug in :meth:`DataFrameGroupBy.cumsum` where it did not return the correct dtype when the label contained ``None``. (:issue:`58811`)
-- Bug in :meth:`DataFrameGroupBy.shift` where the resulting index would be sorted if the input is a :class:`DatetimeIndex` and multiple periods are specified (:issue:`62843`)
 - Bug in :meth:`DataFrameGroupby.transform` and :meth:`SeriesGroupby.transform` with a reducer and ``observed=False`` that coerces dtype to float when there are unobserved categories. (:issue:`55326`)
 - Bug in :meth:`Rolling.apply` for ``method="table"`` where column order was not being respected due to the columns getting sorted by default. (:issue:`59666`)
 - Bug in :meth:`Rolling.apply` where the applied function could be called on fewer than ``min_period`` periods if ``method="table"``. (:issue:`58868`)
diff --git a/pandas/core/apply.py b/pandas/core/apply.py
@@ -382,7 +382,7 @@ def transform_dict_like(self, func) -> DataFrame:
         for name, how in func.items():
             colg = obj._gotitem(name, ndim=1)
             results[name] = colg.transform(how, 0, *args, **kwargs)
-        return concat(results, axis=1)  # nobug
+        return concat(results, axis=1)
 
     def transform_str_or_callable(self, func) -> DataFrame | Series:
         """
@@ -485,7 +485,7 @@ def wrap_results_list_like(
         obj = self.obj
 
         try:
-            return concat(results, keys=keys, axis=1, sort=False)  # nobug
+            return concat(results, keys=keys, axis=1, sort=False)
         except TypeError as err:
             # we are concatting non-NDFrame objects,
             # e.g. a list of scalars
@@ -635,7 +635,7 @@ def wrap_results_dict_like(
                 keys_to_use = ktu
 
             axis: AxisInt = 0 if isinstance(obj, ABCSeries) else 1
-            result = concat(  # nobug
+            result = concat(
                 results,
                 axis=axis,
                 keys=keys_to_use,
diff --git a/pandas/core/arrays/arrow/accessors.py b/pandas/core/arrays/arrow/accessors.py
@@ -496,6 +496,6 @@ def explode(self) -> DataFrame:
         from pandas import concat
 
         pa_type = self._pa_array.type
-        return concat(  # nobug
+        return concat(
             [self.field(i) for i in range(pa_type.num_fields)], axis="columns"
         )
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -2690,7 +2690,7 @@ def describe(self) -> DataFrame:
         from pandas import Index
         from pandas.core.reshape.concat import concat
 
-        result = concat([counts, freqs], ignore_index=True, axis=1)  # nobug
+        result = concat([counts, freqs], ignore_index=True, axis=1)
         result.columns = Index(["counts", "freqs"])
         result.index.name = "categories"
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -6130,7 +6130,7 @@ def shift(
                     .shift(periods=period, freq=freq, axis=axis, fill_value=fill_value)
                     .add_suffix(f"{suffix}_{period}" if suffix else f"_{period}")
                 )
-            return concat(shifted_dataframes, axis=1, sort=False)  # nobug
+            return concat(shifted_dataframes, axis=1, sort=False)
         elif suffix:
             raise ValueError("Cannot specify `suffix` if `periods` is an int.")
         periods = cast(int, periods)
@@ -11166,7 +11166,7 @@ def _append_internal(
 
         from pandas.core.reshape.concat import concat
 
-        result = concat(  # nobug
+        result = concat(
             [self, row_df],
             ignore_index=ignore_index,
         )
@@ -11394,12 +11394,12 @@ def join(
             # join indexes only using concat
             if can_concat:
                 if how == "left":
-                    res = concat(  # nobug
+                    res = concat(
                         frames, axis=1, join="outer", verify_integrity=True, sort=sort
                     )
                     return res.reindex(self.index)
                 else:
-                    return concat(  # nobug
+                    return concat(
                         frames, axis=1, join=how, verify_integrity=True, sort=sort
                     )
 
@@ -11590,7 +11590,7 @@ def _series_round(ser: Series, decimals: int) -> Series:
             return self._constructor(
                 concat(new_cols, axis=1),
                 index=self.index,
-                columns=self.columns,  # nobug
+                columns=self.columns,
             ).__finalize__(self, method="round")
         else:
             return self.copy(deep=False)
@@ -14173,7 +14173,7 @@ def isin(self, values: Series | DataFrame | Sequence | Mapping) -> DataFrame:
             from pandas.core.reshape.concat import concat
 
             values = collections.defaultdict(list, values)
-            result = concat(  # nobug
+            result = concat(
                 (
                     self.iloc[:, [i]].isin(values[col])
                     for i, col in enumerate(self.columns)
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -6525,7 +6525,7 @@ def astype(
             return self.copy(deep=False)
 
         # GH 19920: retain column metadata after concat
-        result = concat(results, axis=1)  # nobug
+        result = concat(results, axis=1)
         # GH#40810 retain subclass
         # error: Incompatible types in assignment
         # (expression has type "Self", variable has type "DataFrame")
@@ -9507,7 +9507,7 @@ def compare(
 
         # error: List item 0 has incompatible type "NDFrame"; expected
         #  "Union[Series, DataFrame]"
-        diff = concat(  # nobug - self and other must have same index/coluns
+        diff = concat(
             [self, other],  # type: ignore[list-item]
             axis=axis,
             keys=result_names,
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -549,7 +549,7 @@ def _aggregate_multiple_funcs(self, arg, *args, **kwargs) -> DataFrame:
         if any(isinstance(x, DataFrame) for x in results.values()):
             from pandas import concat
 
-            res_df = concat(  # nobug
+            res_df = concat(
                 results.values(), axis=1, keys=[key.label for key in results]
             )
             return res_df
@@ -722,7 +722,7 @@ def _transform_general(
         if results:
             from pandas.core.reshape.concat import concat
 
-            concatenated = concat(results, ignore_index=True)  # nobug
+            concatenated = concat(results, ignore_index=True)
             result = self._set_result_index_ordered(concatenated)
         else:
             result = self.obj._constructor(dtype=np.float64)
@@ -2238,7 +2238,7 @@ def _transform_general(self, func, engine, engine_kwargs, *args, **kwargs):
             applied.append(res)
 
         concat_index = obj.columns
-        concatenated = concat(  # nobug
+        concatenated = concat(
             applied, axis=0, verify_integrity=False, ignore_index=True
         )
         concatenated = concatenated.reindex(concat_index, axis=1)
@@ -2530,7 +2530,7 @@ def _apply_to_column_groupbys(self, func) -> DataFrame:
             # concat would raise
             res_df = DataFrame([], columns=columns, index=self._grouper.result_index)
         else:
-            res_df = concat(results, keys=columns, axis=1)  # nobug
+            res_df = concat(results, keys=columns, axis=1)
 
         if not self.as_index:
             res_df.index = default_index(len(res_df))
@@ -3390,9 +3390,7 @@ def _wrap_transform_general_frame(
         # other dimension; this will preserve dtypes
         # GH14457
         if res.index.is_(obj.index):
-            res_frame = concat(
-                [res] * len(group.columns), axis=1, ignore_index=True
-            )  # nobug
+            res_frame = concat([res] * len(group.columns), axis=1, ignore_index=True)
             res_frame.columns = group.columns
             res_frame.index = group.index
         else:
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -5238,7 +5238,7 @@ def shift(
         return (
             shifted_dataframes[0]
             if len(shifted_dataframes) == 1
-            else concat(shifted_dataframes, axis=1, sort=False)  # nobug
+            else concat(shifted_dataframes, axis=1, sort=False)
         )
 
     @final
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -5379,9 +5379,9 @@ def append(self, other: Index | Sequence[Index]) -> Index:
         names = {obj.name for obj in to_concat}
         name = None if len(names) > 1 else self.name
 
-        return self._concat(to_concat, name)  # nobug
+        return self._concat(to_concat, name)
 
-    def _concat(self, to_concat: list[Index], name: Hashable) -> Index:  # nobug
+    def _concat(self, to_concat: list[Index], name: Hashable) -> Index:
         """
         Concatenate multiple Index objects.
         """
diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py
@@ -1181,7 +1181,7 @@ def insert(self, loc: int, item) -> Index:
 
         return super().insert(loc, item)
 
-    def _concat(self, indexes: list[Index], name: Hashable) -> Index:  # nobug
+    def _concat(self, indexes: list[Index], name: Hashable) -> Index:
         """
         Overriding parent method for the case of all RangeIndex instances.
 
@@ -1191,7 +1191,7 @@ def _concat(self, indexes: list[Index], name: Hashable) -> Index:  # nobug
         indexes = [RangeIndex(3), RangeIndex(4, 6)] -> Index([0,1,2,4,5], dtype='int64')
         """
         if not all(isinstance(x, RangeIndex) for x in indexes):
-            result = super()._concat(indexes, name)  # nobug
+            result = super()._concat(indexes, name)
             if result.dtype.kind == "i":
                 return self._shallow_copy(result._values)
             return result
diff --git a/pandas/core/interchange/from_dataframe.py b/pandas/core/interchange/from_dataframe.py
@@ -144,9 +144,7 @@ def _from_dataframe(df: DataFrameXchg, allow_copy: bool = True) -> pd.DataFrame:
     elif len(pandas_dfs) == 1:
         pandas_df = pandas_dfs[0]
     else:
-        pandas_df = pd.concat(
-            pandas_dfs, axis=0, ignore_index=True, copy=False
-        )  # nobug
+        pandas_df = pd.concat(pandas_dfs, axis=0, ignore_index=True, copy=False)
 
     index_obj = df.metadata.get("pandas.index", None)
     if index_obj is not None:
diff --git a/pandas/core/methods/describe.py b/pandas/core/methods/describe.py
@@ -173,7 +173,7 @@ def describe(self, percentiles: Sequence[float] | np.ndarray) -> DataFrame:
 
         col_names = reorder_columns(ldesc)
         d = concat(
-            [x.reindex(col_names) for x in ldesc],  # nobug
+            [x.reindex(col_names) for x in ldesc],
             axis=1,
             ignore_index=True,
             sort=False,
diff --git a/pandas/core/methods/selectn.py b/pandas/core/methods/selectn.py
@@ -186,7 +186,7 @@ def compute(self, method: str) -> Series:
             # reverse indices
             inds = narr - 1 - inds
 
-        result = concat([dropped.iloc[inds], nan_index]).iloc[:findex]  # nobug
+        result = concat([dropped.iloc[inds], nan_index]).iloc[:findex]
         result.index = original_index.take(result.index)
         return result
 
diff --git a/pandas/core/resample.py b/pandas/core/resample.py
@@ -1025,7 +1025,7 @@ def interpolate(
 
             missing_data_points_index = obj.index.difference(final_index)
             if len(missing_data_points_index) > 0:
-                result = concat(  # nobug
+                result = concat(
                     [result, obj.loc[missing_data_points_index]]
                 ).sort_index()
 
diff --git a/pandas/core/reshape/encoding.py b/pandas/core/reshape/encoding.py
@@ -223,7 +223,7 @@ def check_len(item, name: str) -> None:
                 dtype=dtype,
             )
             with_dummies.append(dummy)
-        result = concat(with_dummies, axis=1)  # nobug
+        result = concat(with_dummies, axis=1)
     else:
         result = _get_dummies_1d(
             data,
@@ -342,7 +342,7 @@ def get_empty_frame(data) -> DataFrame:
             )
             sparse_series.append(Series(data=sarr, index=index, name=col, copy=False))
 
-        return concat(sparse_series, axis=1)  # nobug
+        return concat(sparse_series, axis=1)
 
     else:
         # ensure ndarray layout is column-major
@@ -568,7 +568,7 @@ def from_dummies(
                     "Dummy DataFrame contains unassigned value(s); "
                     f"First instance in row: {assigned.idxmin()}"
                 )
-            data_slice = concat(  # nobug
+            data_slice = concat(
                 (data_to_decode.loc[:, prefix_slice], assigned == 0), axis=1
             )
         else:
diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py
@@ -249,9 +249,7 @@ def melt(
         if not isinstance(id_data.dtype, np.dtype):
             # i.e. ExtensionDtype
             if num_cols_adjusted > 0:
-                mdata[col] = concat(
-                    [id_data] * num_cols_adjusted, ignore_index=True
-                )  # nobug
+                mdata[col] = concat([id_data] * num_cols_adjusted, ignore_index=True)
             else:
                 # We can't concat empty list. (GH 46044)
                 mdata[col] = type(id_data)([], name=id_data.name, dtype=id_data.dtype)
@@ -263,7 +261,7 @@ def melt(
     if frame.shape[1] > 0 and not any(
         not isinstance(dt, np.dtype) and dt._supports_2d for dt in frame.dtypes
     ):
-        mdata[value_name] = concat(  # nobug
+        mdata[value_name] = concat(
             [frame.iloc[:, i] for i in range(frame.shape[1])], ignore_index=True
         ).values
     else:
@@ -668,7 +666,7 @@ def melt_stub(df, stub: str, i, j, value_vars, sep: str):
         value_vars_flattened.extend(value_var)
         _melted.append(melt_stub(df, stub, i, j, value_var, sep))
 
-    melted = concat(_melted, axis=1)  # nobug
+    melted = concat(_melted, axis=1)
     id_vars = df.columns.difference(value_vars_flattened)
     new = df[id_vars]
 
diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
@@ -507,7 +507,7 @@ def _groupby_and_merge(
     # if we have a missing piece this can be reset
     from pandas.core.reshape.concat import concat
 
-    result = concat(pieces, ignore_index=True)  # nobug
+    result = concat(pieces, ignore_index=True)
     result = result.reindex(columns=pieces[0].columns)
     return result, lby
 
@@ -1131,7 +1131,7 @@ def _reindex_and_concat(
 
         left.columns = llabels
         right.columns = rlabels
-        result = concat([left, right], axis=1)  # nobug
+        result = concat([left, right], axis=1)
         return result
 
     def get_result(self) -> DataFrame:
diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py
@@ -263,7 +263,7 @@ def pivot_table(
             pieces.append(_table)
             keys.append(getattr(func, "__name__", func))
 
-        table = concat(pieces, keys=keys, axis=1)  # nobug
+        table = concat(pieces, keys=keys, axis=1)
         return table.__finalize__(data, method="pivot_table")
 
     table = __internal_pivot_table(
@@ -506,7 +506,7 @@ def _add_margins(
         margin_dummy[cols] = margin_dummy[cols].apply(
             maybe_downcast_to_dtype, args=(dtype,)
         )
-    result = concat([result, margin_dummy])  # nobug
+    result = concat([result, margin_dummy])
     result.index.names = row_names
 
     return result
@@ -608,7 +608,7 @@ def _all_key(key):
             # GH 49240
             return table
         else:
-            result = concat(table_pieces, axis=cat_axis)  # nobug
+            result = concat(table_pieces, axis=cat_axis)
 
         if len(rows) == 0:
             return result
@@ -1185,7 +1185,7 @@ def _normalize(
         # Fix Margins
         if normalize == "columns":
             column_margin = column_margin / column_margin.sum()
-            table = concat([table, column_margin], axis=1)  # nobug
+            table = concat([table, column_margin], axis=1)
             table = table.fillna(0)
             table.columns = table_columns
 
@@ -1199,7 +1199,7 @@ def _normalize(
             column_margin = column_margin / column_margin.sum()
             index_margin = index_margin / index_margin.sum()
             index_margin.loc[margins_name] = 1
-            table = concat([table, column_margin], axis=1)  # nobug
+            table = concat([table, column_margin], axis=1)
             table = table._append_internal(index_margin, ignore_index=True)
 
             table = table.fillna(0)
diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
diff --git a/pandas/core/series.py b/pandas/core/series.py
diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py
diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py
diff --git a/pandas/tests/frame/methods/test_shift.py b/pandas/tests/frame/methods/test_shift.py
diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py
diff --git a/pandas/tests/strings/test_cat.py b/pandas/tests/strings/test_cat.py

Original file line number	Diff line number	Diff line change
`@@ -496,6 +496,6 @@ def explode(self) -> DataFrame:`
`496`	`496`	`from pandas import concat`
`497`	`497`
`498`	`498`	`pa_type = self._pa_array.type`
`499`		`- return concat( # nobug`
	`499`	`+ return concat(`
`500`	`500`	`[self.field(i) for i in range(pa_type.num_fields)], axis="columns"`
`501`	`501`	`)`
Original file line number	Diff line number	Diff line change
`@@ -5238,7 +5238,7 @@ def shift(`
`5238`	`5238`	`return (`
`5239`	`5239`	`shifted_dataframes[0]`
`5240`	`5240`	`if len(shifted_dataframes) == 1`
`5241`		`- else concat(shifted_dataframes, axis=1, sort=False) # nobug`
	`5241`	`+ else concat(shifted_dataframes, axis=1, sort=False)`
`5242`	`5242`	`)`
`5243`	`5243`
`5244`	`5244`	`@final`