|
2 | 2 |
|
3 | 3 | import pytest |
4 | 4 |
|
| 5 | +import pandas.util._test_decorators as td |
| 6 | + |
5 | 7 | import pandas as pd |
| 8 | +import pandas._testing as tm |
6 | 9 |
|
7 | 10 | pa = pytest.importorskip("pyarrow", minversion="16.0") |
8 | 11 |
|
@@ -59,3 +62,56 @@ def test_series_arrow_interface_stringdtype(): |
59 | 62 | ca = pa.chunked_array(s) |
60 | 63 | expected = pa.chunked_array([["foo", "bar"]], type=pa.large_string()) |
61 | 64 | assert ca.equals(expected) |
| 65 | + |
| 66 | + |
| 67 | +class ArrowArrayWrapper: |
| 68 | + def __init__(self, array): |
| 69 | + self.array = array |
| 70 | + |
| 71 | + def __arrow_c_array__(self, requested_schema=None): |
| 72 | + return self.array.__arrow_c_array__(requested_schema) |
| 73 | + |
| 74 | + |
| 75 | +class ArrowStreamWrapper: |
| 76 | + def __init__(self, chunked_array): |
| 77 | + self.stream = chunked_array |
| 78 | + |
| 79 | + def __arrow_c_stream__(self, requested_schema=None): |
| 80 | + return self.stream.__arrow_c_stream__(requested_schema) |
| 81 | + |
| 82 | + |
| 83 | +@td.skip_if_no("pyarrow", min_version="14.0") |
| 84 | +def test_dataframe_from_arrow(): |
| 85 | + # objects with __arrow_c_stream__ |
| 86 | + arr = pa.chunked_array([[1, 2, 3], [4, 5]]) |
| 87 | + |
| 88 | + result = pd.Series.from_arrow(arr) |
| 89 | + expected = pd.Series([1, 2, 3, 4, 5]) |
| 90 | + tm.assert_series_equal(result, expected) |
| 91 | + |
| 92 | + # not only pyarrow object are supported |
| 93 | + result = pd.Series.from_arrow(ArrowStreamWrapper(arr)) |
| 94 | + tm.assert_series_equal(result, expected) |
| 95 | + |
| 96 | + # table works as well, but will be seen as a StructArray |
| 97 | + table = pa.table({"a": [1, 2, 3], "b": ["a", "b", "c"]}) |
| 98 | + |
| 99 | + result = pd.Series.from_arrow(table) |
| 100 | + expected = pd.Series([{"a": 1, "b": "a"}, {"a": 2, "b": "b"}, {"a": 3, "b": "c"}]) |
| 101 | + tm.assert_series_equal(result, expected) |
| 102 | + |
| 103 | + # objects with __arrow_c_array__ |
| 104 | + arr = pa.array([1, 2, 3]) |
| 105 | + |
| 106 | + expected = pd.Series([1, 2, 3]) |
| 107 | + result = pd.Series.from_arrow(arr) |
| 108 | + tm.assert_series_equal(result, expected) |
| 109 | + |
| 110 | + result = pd.Series.from_arrow(ArrowArrayWrapper(arr)) |
| 111 | + tm.assert_series_equal(result, expected) |
| 112 | + |
| 113 | + # only accept actual Arrow objects |
| 114 | + with pytest.raises( |
| 115 | + TypeError, match="Expected an Arrow-compatible array-like object" |
| 116 | + ): |
| 117 | + pd.Series.from_arrow([1, 2, 3]) |
0 commit comments