|
73 | 73 | validate_indices, |
74 | 74 | ) |
75 | 75 | from pandas.core.nanops import check_below_min_count |
76 | | -from pandas.core.strings.base import BaseStringArrayMethods |
77 | 76 |
|
78 | 77 | from pandas.io._util import _arrow_dtype_mapping |
79 | 78 | from pandas.tseries.frequencies import to_offset |
@@ -237,7 +236,6 @@ class ArrowExtensionArray( |
237 | 236 | OpsMixin, |
238 | 237 | ExtensionArraySupportsAnyAll, |
239 | 238 | ArrowStringArrayMixin, |
240 | | - BaseStringArrayMethods, |
241 | 239 | ): |
242 | 240 | """ |
243 | 241 | Pandas ExtensionArray backed by a PyArrow ChunkedArray. |
@@ -392,6 +390,73 @@ def _from_sequence_of_strings( |
392 | 390 | ) |
393 | 391 | return cls._from_sequence(scalars, dtype=pa_type, copy=copy) |
394 | 392 |
|
| 393 | + def _cast_pointwise_result(self, values) -> ArrayLike: |
| 394 | + if len(values) == 0: |
| 395 | + # Retain our dtype |
| 396 | + return self[:0].copy() |
| 397 | + |
| 398 | + try: |
| 399 | + arr = pa.array(values, from_pandas=True) |
| 400 | + except (ValueError, TypeError): |
| 401 | + # e.g. test_by_column_values_with_same_starting_value with nested |
| 402 | + # values, one entry of which is an ArrowStringArray |
| 403 | + # or test_agg_lambda_complex128_dtype_conversion for complex values |
| 404 | + return super()._cast_pointwise_result(values) |
| 405 | + |
| 406 | + if pa.types.is_duration(arr.type): |
| 407 | + # workaround for https://github.com/apache/arrow/issues/40620 |
| 408 | + result = ArrowExtensionArray._from_sequence(values) |
| 409 | + if pa.types.is_duration(self._pa_array.type): |
| 410 | + result = result.astype(self.dtype) # type: ignore[assignment] |
| 411 | + elif pa.types.is_timestamp(self._pa_array.type): |
| 412 | + # Try to retain original unit |
| 413 | + new_dtype = ArrowDtype(pa.duration(self._pa_array.type.unit)) |
| 414 | + try: |
| 415 | + result = result.astype(new_dtype) # type: ignore[assignment] |
| 416 | + except ValueError: |
| 417 | + pass |
| 418 | + elif pa.types.is_date64(self._pa_array.type): |
| 419 | + # Try to match unit we get on non-pointwise op |
| 420 | + dtype = ArrowDtype(pa.duration("ms")) |
| 421 | + result = result.astype(dtype) # type: ignore[assignment] |
| 422 | + elif pa.types.is_date(self._pa_array.type): |
| 423 | + # Try to match unit we get on non-pointwise op |
| 424 | + dtype = ArrowDtype(pa.duration("s")) |
| 425 | + result = result.astype(dtype) # type: ignore[assignment] |
| 426 | + return result |
| 427 | + |
| 428 | + elif pa.types.is_date(arr.type) and pa.types.is_date(self._pa_array.type): |
| 429 | + arr = arr.cast(self._pa_array.type) |
| 430 | + elif pa.types.is_time(arr.type) and pa.types.is_time(self._pa_array.type): |
| 431 | + arr = arr.cast(self._pa_array.type) |
| 432 | + elif pa.types.is_decimal(arr.type) and pa.types.is_decimal(self._pa_array.type): |
| 433 | + arr = arr.cast(self._pa_array.type) |
| 434 | + elif pa.types.is_integer(arr.type) and pa.types.is_integer(self._pa_array.type): |
| 435 | + try: |
| 436 | + arr = arr.cast(self._pa_array.type) |
| 437 | + except pa.lib.ArrowInvalid: |
| 438 | + # e.g. test_combine_add if we can't cast |
| 439 | + pass |
| 440 | + elif pa.types.is_floating(arr.type) and pa.types.is_floating( |
| 441 | + self._pa_array.type |
| 442 | + ): |
| 443 | + try: |
| 444 | + arr = arr.cast(self._pa_array.type) |
| 445 | + except pa.lib.ArrowInvalid: |
| 446 | + # e.g. test_combine_add if we can't cast |
| 447 | + pass |
| 448 | + |
| 449 | + if isinstance(self.dtype, StringDtype): |
| 450 | + if pa.types.is_string(arr.type) or pa.types.is_large_string(arr.type): |
| 451 | + # ArrowStringArrayNumpySemantics |
| 452 | + return type(self)(arr).astype(self.dtype) |
| 453 | + if self.dtype.na_value is np.nan: |
| 454 | + # ArrowEA has different semantics, so we return numpy-based |
| 455 | + # result instead |
| 456 | + return super()._cast_pointwise_result(values) |
| 457 | + return ArrowExtensionArray(arr) |
| 458 | + return type(self)(arr) |
| 459 | + |
395 | 460 | @classmethod |
396 | 461 | def _box_pa( |
397 | 462 | cls, value, pa_type: pa.DataType | None = None |
|
0 commit comments