Skip to content

Commit 2c05626

Browse files
committed
Cleanup
1 parent 9c128d9 commit 2c05626

File tree

27 files changed

+85
-60
lines changed

27 files changed

+85
-60
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -390,9 +390,9 @@ and users can skip the check by explicitly specifying ``sort=True`` or
390390
``sort=False``.
391391

392392
This deprecation can also impact pandas' internal usage of :func:`concat`.
393-
While we have investigated uses of :func:`concat` to determine if this could lead
394-
to a change in behavior of other functions and methods in the API, it is
395-
possible some have been missed. In order to be cautious here, pandas has *not*
393+
Here cases where :func:`concat` was sorting a :class:`DatetimeIndex` but not
394+
other indexes are considered bugs and have been fixed as noted below. However
395+
it is possible some have been missed. In order to be cautious here, pandas has *not*
396396
added ``sort=False`` to any internal calls where we believe behavior should not change.
397397
If we have missed something, users will not experience a behavior change but they
398398
will receive a warning about :func:`concat` even though they are not directly
@@ -429,6 +429,14 @@ we may address any potential behavior changes.
429429
430430
pd.concat([df1, df2], axis=1, sort=False)
431431
432+
Cases where pandas' internal usage of :func:`concat` resulted in inconsistent sorting
433+
that are now fixed in this release are as follows.
434+
435+
- :meth:`Series.apply` and :meth:`DataFrame.apply` with a list-like or dict-like ``func`` argument.
436+
- :meth:`Series.shift`, :meth:`DataFrame.shift`, :meth:`.SeriesGroupBy.shift`, :meth:`.DataFrameGroupBy.shift` with the ``periods`` argument a list of length greater than 1.
437+
- :meth:`DataFrame.join` with ``other`` a list of one or more Series or DataFrames and ``how="inner"``, ``how="left"``, or ``how="right"``.
438+
- :meth:`Series.str.cat` with ``others`` a Series or DataFrame.
439+
432440
.. _whatsnew_300.api_breaking.value_counts_sorting:
433441

434442
Changed behavior in :meth:`DataFrame.value_counts` and :meth:`DataFrameGroupBy.value_counts` when ``sort=False``
@@ -1233,7 +1241,6 @@ Groupby/resample/rolling
12331241
- Bug in :meth:`DataFrameGroupBy.apply` with ``as_index=False`` that was returning :class:`MultiIndex` instead of returning :class:`Index`. (:issue:`58291`)
12341242
- Bug in :meth:`DataFrameGroupBy.cumsum` and :meth:`DataFrameGroupBy.cumprod` where ``numeric_only`` parameter was passed indirectly through kwargs instead of passing directly. (:issue:`58811`)
12351243
- Bug in :meth:`DataFrameGroupBy.cumsum` where it did not return the correct dtype when the label contained ``None``. (:issue:`58811`)
1236-
- Bug in :meth:`DataFrameGroupBy.shift` where the resulting index would be sorted if the input is a :class:`DatetimeIndex` and multiple periods are specified (:issue:`62843`)
12371244
- Bug in :meth:`DataFrameGroupby.transform` and :meth:`SeriesGroupby.transform` with a reducer and ``observed=False`` that coerces dtype to float when there are unobserved categories. (:issue:`55326`)
12381245
- Bug in :meth:`Rolling.apply` for ``method="table"`` where column order was not being respected due to the columns getting sorted by default. (:issue:`59666`)
12391246
- Bug in :meth:`Rolling.apply` where the applied function could be called on fewer than ``min_period`` periods if ``method="table"``. (:issue:`58868`)

pandas/core/apply.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -382,7 +382,7 @@ def transform_dict_like(self, func) -> DataFrame:
382382
for name, how in func.items():
383383
colg = obj._gotitem(name, ndim=1)
384384
results[name] = colg.transform(how, 0, *args, **kwargs)
385-
return concat(results, axis=1) # nobug
385+
return concat(results, axis=1)
386386

387387
def transform_str_or_callable(self, func) -> DataFrame | Series:
388388
"""
@@ -485,7 +485,7 @@ def wrap_results_list_like(
485485
obj = self.obj
486486

487487
try:
488-
return concat(results, keys=keys, axis=1, sort=False) # nobug
488+
return concat(results, keys=keys, axis=1, sort=False)
489489
except TypeError as err:
490490
# we are concatting non-NDFrame objects,
491491
# e.g. a list of scalars
@@ -635,7 +635,7 @@ def wrap_results_dict_like(
635635
keys_to_use = ktu
636636

637637
axis: AxisInt = 0 if isinstance(obj, ABCSeries) else 1
638-
result = concat( # nobug
638+
result = concat(
639639
results,
640640
axis=axis,
641641
keys=keys_to_use,

pandas/core/arrays/arrow/accessors.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -496,6 +496,6 @@ def explode(self) -> DataFrame:
496496
from pandas import concat
497497

498498
pa_type = self._pa_array.type
499-
return concat( # nobug
499+
return concat(
500500
[self.field(i) for i in range(pa_type.num_fields)], axis="columns"
501501
)

pandas/core/arrays/categorical.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2690,7 +2690,7 @@ def describe(self) -> DataFrame:
26902690
from pandas import Index
26912691
from pandas.core.reshape.concat import concat
26922692

2693-
result = concat([counts, freqs], ignore_index=True, axis=1) # nobug
2693+
result = concat([counts, freqs], ignore_index=True, axis=1)
26942694
result.columns = Index(["counts", "freqs"])
26952695
result.index.name = "categories"
26962696

pandas/core/frame.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6130,7 +6130,7 @@ def shift(
61306130
.shift(periods=period, freq=freq, axis=axis, fill_value=fill_value)
61316131
.add_suffix(f"{suffix}_{period}" if suffix else f"_{period}")
61326132
)
6133-
return concat(shifted_dataframes, axis=1, sort=False) # nobug
6133+
return concat(shifted_dataframes, axis=1, sort=False)
61346134
elif suffix:
61356135
raise ValueError("Cannot specify `suffix` if `periods` is an int.")
61366136
periods = cast(int, periods)
@@ -11166,7 +11166,7 @@ def _append_internal(
1116611166

1116711167
from pandas.core.reshape.concat import concat
1116811168

11169-
result = concat( # nobug
11169+
result = concat(
1117011170
[self, row_df],
1117111171
ignore_index=ignore_index,
1117211172
)
@@ -11394,12 +11394,12 @@ def join(
1139411394
# join indexes only using concat
1139511395
if can_concat:
1139611396
if how == "left":
11397-
res = concat( # nobug
11397+
res = concat(
1139811398
frames, axis=1, join="outer", verify_integrity=True, sort=sort
1139911399
)
1140011400
return res.reindex(self.index)
1140111401
else:
11402-
return concat( # nobug
11402+
return concat(
1140311403
frames, axis=1, join=how, verify_integrity=True, sort=sort
1140411404
)
1140511405

@@ -11590,7 +11590,7 @@ def _series_round(ser: Series, decimals: int) -> Series:
1159011590
return self._constructor(
1159111591
concat(new_cols, axis=1),
1159211592
index=self.index,
11593-
columns=self.columns, # nobug
11593+
columns=self.columns,
1159411594
).__finalize__(self, method="round")
1159511595
else:
1159611596
return self.copy(deep=False)
@@ -14173,7 +14173,7 @@ def isin(self, values: Series | DataFrame | Sequence | Mapping) -> DataFrame:
1417314173
from pandas.core.reshape.concat import concat
1417414174

1417514175
values = collections.defaultdict(list, values)
14176-
result = concat( # nobug
14176+
result = concat(
1417714177
(
1417814178
self.iloc[:, [i]].isin(values[col])
1417914179
for i, col in enumerate(self.columns)

pandas/core/generic.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6525,7 +6525,7 @@ def astype(
65256525
return self.copy(deep=False)
65266526

65276527
# GH 19920: retain column metadata after concat
6528-
result = concat(results, axis=1) # nobug
6528+
result = concat(results, axis=1)
65296529
# GH#40810 retain subclass
65306530
# error: Incompatible types in assignment
65316531
# (expression has type "Self", variable has type "DataFrame")
@@ -9507,7 +9507,7 @@ def compare(
95079507

95089508
# error: List item 0 has incompatible type "NDFrame"; expected
95099509
# "Union[Series, DataFrame]"
9510-
diff = concat( # nobug - self and other must have same index/coluns
9510+
diff = concat(
95119511
[self, other], # type: ignore[list-item]
95129512
axis=axis,
95139513
keys=result_names,

pandas/core/groupby/generic.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -549,7 +549,7 @@ def _aggregate_multiple_funcs(self, arg, *args, **kwargs) -> DataFrame:
549549
if any(isinstance(x, DataFrame) for x in results.values()):
550550
from pandas import concat
551551

552-
res_df = concat( # nobug
552+
res_df = concat(
553553
results.values(), axis=1, keys=[key.label for key in results]
554554
)
555555
return res_df
@@ -722,7 +722,7 @@ def _transform_general(
722722
if results:
723723
from pandas.core.reshape.concat import concat
724724

725-
concatenated = concat(results, ignore_index=True) # nobug
725+
concatenated = concat(results, ignore_index=True)
726726
result = self._set_result_index_ordered(concatenated)
727727
else:
728728
result = self.obj._constructor(dtype=np.float64)
@@ -2238,7 +2238,7 @@ def _transform_general(self, func, engine, engine_kwargs, *args, **kwargs):
22382238
applied.append(res)
22392239

22402240
concat_index = obj.columns
2241-
concatenated = concat( # nobug
2241+
concatenated = concat(
22422242
applied, axis=0, verify_integrity=False, ignore_index=True
22432243
)
22442244
concatenated = concatenated.reindex(concat_index, axis=1)
@@ -2530,7 +2530,7 @@ def _apply_to_column_groupbys(self, func) -> DataFrame:
25302530
# concat would raise
25312531
res_df = DataFrame([], columns=columns, index=self._grouper.result_index)
25322532
else:
2533-
res_df = concat(results, keys=columns, axis=1) # nobug
2533+
res_df = concat(results, keys=columns, axis=1)
25342534

25352535
if not self.as_index:
25362536
res_df.index = default_index(len(res_df))
@@ -3390,9 +3390,7 @@ def _wrap_transform_general_frame(
33903390
# other dimension; this will preserve dtypes
33913391
# GH14457
33923392
if res.index.is_(obj.index):
3393-
res_frame = concat(
3394-
[res] * len(group.columns), axis=1, ignore_index=True
3395-
) # nobug
3393+
res_frame = concat([res] * len(group.columns), axis=1, ignore_index=True)
33963394
res_frame.columns = group.columns
33973395
res_frame.index = group.index
33983396
else:

pandas/core/groupby/groupby.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5238,7 +5238,7 @@ def shift(
52385238
return (
52395239
shifted_dataframes[0]
52405240
if len(shifted_dataframes) == 1
5241-
else concat(shifted_dataframes, axis=1, sort=False) # nobug
5241+
else concat(shifted_dataframes, axis=1, sort=False)
52425242
)
52435243

52445244
@final

pandas/core/indexes/base.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5379,9 +5379,9 @@ def append(self, other: Index | Sequence[Index]) -> Index:
53795379
names = {obj.name for obj in to_concat}
53805380
name = None if len(names) > 1 else self.name
53815381

5382-
return self._concat(to_concat, name) # nobug
5382+
return self._concat(to_concat, name)
53835383

5384-
def _concat(self, to_concat: list[Index], name: Hashable) -> Index: # nobug
5384+
def _concat(self, to_concat: list[Index], name: Hashable) -> Index:
53855385
"""
53865386
Concatenate multiple Index objects.
53875387
"""

pandas/core/indexes/range.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1181,7 +1181,7 @@ def insert(self, loc: int, item) -> Index:
11811181

11821182
return super().insert(loc, item)
11831183

1184-
def _concat(self, indexes: list[Index], name: Hashable) -> Index: # nobug
1184+
def _concat(self, indexes: list[Index], name: Hashable) -> Index:
11851185
"""
11861186
Overriding parent method for the case of all RangeIndex instances.
11871187
@@ -1191,7 +1191,7 @@ def _concat(self, indexes: list[Index], name: Hashable) -> Index: # nobug
11911191
indexes = [RangeIndex(3), RangeIndex(4, 6)] -> Index([0,1,2,4,5], dtype='int64')
11921192
"""
11931193
if not all(isinstance(x, RangeIndex) for x in indexes):
1194-
result = super()._concat(indexes, name) # nobug
1194+
result = super()._concat(indexes, name)
11951195
if result.dtype.kind == "i":
11961196
return self._shallow_copy(result._values)
11971197
return result

0 commit comments

Comments
 (0)