diff --git a/pint_pandas/pint_array.py b/pint_pandas/pint_array.py index d673c507..ce7b574c 100644 --- a/pint_pandas/pint_array.py +++ b/pint_pandas/pint_array.py @@ -263,6 +263,7 @@ def __setitem__(self, key, value): value = [item.to(self.units).magnitude for item in value] key = convert_indexing_key(key) + try: self._data[key] = value except IndexError as e: @@ -620,6 +621,7 @@ def convert_values(param): return res op_name = f"__{op}__" + return set_function_name(_binop, op_name, cls) @classmethod diff --git a/pint_pandas/testsuite/test_pandas_interface.py b/pint_pandas/testsuite/test_pandas_interface.py index 9d4e47ae..b2710e68 100644 --- a/pint_pandas/testsuite/test_pandas_interface.py +++ b/pint_pandas/testsuite/test_pandas_interface.py @@ -7,6 +7,7 @@ import pint import pytest from pandas.core import ops +import pandas._testing as tm from pandas.tests.extension import base from pandas.tests.extension.conftest import ( # noqa: F401 as_array, @@ -192,23 +193,55 @@ def all_boolean_reductions(request): # ================================================================= -class TestCasting(base.BaseCastingTests): +class BasePintPandas: + # pandas assert_series_equal (which calls assert_almost_equal) gets + # confused by Pint's duck-typing. + # We work around this by doing something + @classmethod + def assert_series_equal(cls, left, right, *args, **kwargs): + # casting etc. can be done here if helpful + tm.assert_series_equal(left, right, *args, **kwargs) + + @classmethod + def assert_frame_equal(cls, left, right, *args, **kwargs): + obj_type = kwargs.get("obj", "DataFrame") + tm.assert_index_equal( + left.columns, + right.columns, + exact=kwargs.get("check_column_type", "equiv"), + check_names=kwargs.get("check_names", True), + check_exact=kwargs.get("check_exact", False), + check_categorical=kwargs.get("check_categorical", True), + obj=f"{obj_type}.columns", + ) + pints = left.dtypes.map(lambda x: str(x).startswith("pint")) + pints = pints[pints].index + + for col in pints: + cls.assert_series_equal(left[col], right[col], *args, **kwargs) + + left = left.drop(columns=pints) + right = right.drop(columns=pints) + tm.assert_frame_equal(left, right, *args, **kwargs) + + +class TestCasting(BasePintPandas, base.BaseCastingTests): pass -class TestConstructors(base.BaseConstructorsTests): +class TestConstructors(BasePintPandas, base.BaseConstructorsTests): pass -class TestDtype(base.BaseDtypeTests): +class TestDtype(BasePintPandas, base.BaseDtypeTests): pass -class TestGetitem(base.BaseGetitemTests): +class TestGetitem(BasePintPandas, base.BaseGetitemTests): pass -class TestGroupby(base.BaseGroupbyTests): +class TestGroupby(BasePintPandas, base.BaseGroupbyTests): @pytest.mark.xfail( run=True, reason="pintarrays seem not to be numeric in one version of pd" ) @@ -229,7 +262,6 @@ def test_in_numeric_groupby(self, data_for_grouping): self.assert_index_equal(result, expected) - @pytest.mark.xfail(run=True, reason="__iter__ / __len__ issue") def test_groupby_apply_identity(self, data_for_grouping): df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping}) result = df.groupby("A").B.apply(lambda x: x.array) @@ -246,11 +278,11 @@ def test_groupby_apply_identity(self, data_for_grouping): self.assert_series_equal(result, expected) -class TestInterface(base.BaseInterfaceTests): +class TestInterface(BasePintPandas, base.BaseInterfaceTests): pass -class TestMethods(base.BaseMethodsTests): +class TestMethods(BasePintPandas, base.BaseMethodsTests): @pytest.mark.filterwarnings("ignore::pint.UnitStrippedWarning") # See test_setitem_mask_broadcast note @pytest.mark.parametrize("dropna", [True, False]) @@ -279,7 +311,6 @@ def test_unique(self, data, box, method): assert isinstance(result, type(data)) assert result[0] == duplicated[0] - @pytest.mark.xfail(run=True, reason="__iter__ / __len__ issue") def test_fillna_copy_frame(self, data_missing): arr = data_missing.take([1, 1]) df = pd.DataFrame({"A": arr}) @@ -289,7 +320,6 @@ def test_fillna_copy_frame(self, data_missing): assert df.A.values is not result.A.values - @pytest.mark.xfail(run=True, reason="__iter__ / __len__ issue") def test_fillna_copy_series(self, data_missing): arr = data_missing.take([1, 1]) ser = pd.Series(arr) @@ -300,7 +330,6 @@ def test_fillna_copy_series(self, data_missing): assert ser._values is not result._values assert ser._values is arr - @pytest.mark.xfail(run=True, reason="__iter__ / __len__ issue") def test_searchsorted(self, data_for_sorting, as_series): # noqa: F811 b, c, a = data_for_sorting arr = type(data_for_sorting)._from_sequence([a, b, c]) @@ -319,13 +348,13 @@ def test_searchsorted(self, data_for_sorting, as_series): # noqa: F811 result = arr.searchsorted(arr.take([0, 2])) expected = np.array([0, 2], dtype=np.intp) - self.assert_numpy_array_equal(result, expected) + np.testing.assert_array_equal(result, expected) # sorter sorter = np.array([1, 2, 0]) assert data_for_sorting.searchsorted(a, sorter=sorter) == 0 - @pytest.mark.xfail(run=True, reason="__iter__ / __len__ issue") + @pytest.mark.xfail(run=True, reason="pandas lib.is_scalar issue") def test_where_series(self, data, na_value, as_frame): # noqa: F811 assert data[0] != data[1] cls = type(data) @@ -360,7 +389,7 @@ def test_where_series(self, data, na_value, as_frame): # noqa: F811 self.assert_equal(result, expected) -class TestArithmeticOps(base.BaseArithmeticOpsTests): +class TestArithmeticOps(BasePintPandas, base.BaseArithmeticOpsTests): def check_opname(self, s, op_name, other, exc=None): op = self.get_op_from_name(op_name) @@ -401,7 +430,11 @@ def test_arith_series_with_scalar(self, data, all_arithmetic_operators): s = pd.Series(data) self.check_opname(s, op_name, s.iloc[0], exc=exc) - @pytest.mark.xfail(run=True, reason="__iter__ / __len__ issue") + @pytest.mark.xfail(run=True, reason=( + "operating with quantity of different registries " + "OR reverse operation and pint has line `if zero_or_nan(other, True)` which explodes " + "OR multiplying dimensionless and units explodes" + )) def test_arith_frame_with_scalar(self, data, all_arithmetic_operators): # frame & scalar op_name, exc = self._get_exception(data, all_arithmetic_operators) @@ -457,7 +490,7 @@ def test_error(self, data, all_arithmetic_operators): opa(np.arange(len(s)).reshape(-1, len(s))) -class TestComparisonOps(base.BaseComparisonOpsTests): +class TestComparisonOps(BasePintPandas, base.BaseComparisonOpsTests): def _compare_other(self, s, data, op_name, other): op = self.get_op_from_name(op_name) @@ -480,19 +513,17 @@ def test_compare_array(self, data, all_compare_operators): self._compare_other(s, data, op_name, other) -class TestOpsUtil(base.BaseOpsUtil): +class TestOpsUtil(BasePintPandas, base.BaseOpsUtil): pass -class TestMissing(base.BaseMissingTests): - @pytest.mark.xfail(run=True, reason="__iter__ / __len__ issue") +class TestMissing(BasePintPandas, base.BaseMissingTests): def test_fillna_scalar(self, data_missing): valid = data_missing[1] result = data_missing.fillna(valid) expected = data_missing.fillna(valid) self.assert_extension_array_equal(result, expected) - @pytest.mark.xfail(run=True, reason="__iter__ / __len__ issue") def test_fillna_series(self, data_missing): fill_value = data_missing[1] ser = pd.Series(data_missing) @@ -527,10 +558,10 @@ def test_fillna_frame(self, data_missing): "B": [1, 2], } ) - self.assert_series_equal(result, expected) + self.assert_frame_equal(result, expected) -class TestNumericReduce(base.BaseNumericReduceTests): +class TestNumericReduce(BasePintPandas, base.BaseNumericReduceTests): def check_reduce(self, s, op_name, skipna): result = getattr(s, op_name)(skipna=skipna) expected_m = getattr(pd.Series(s.values.quantity._magnitude), op_name)( @@ -541,7 +572,7 @@ def check_reduce(self, s, op_name, skipna): assert result == expected -class TestBooleanReduce(base.BaseBooleanReduceTests): +class TestBooleanReduce(BasePintPandas, base.BaseBooleanReduceTests): def check_reduce(self, s, op_name, skipna): result = getattr(s, op_name)(skipna=skipna) expected = getattr(pd.Series(s.values.quantity._magnitude), op_name)( @@ -550,8 +581,27 @@ def check_reduce(self, s, op_name, skipna): assert result == expected -class TestReshaping(base.BaseReshapingTests): - @pytest.mark.xfail(run=True, reason="__iter__ / __len__ issue") +class TestReshaping(BasePintPandas, base.BaseReshapingTests): + @pytest.mark.parametrize( + "index", + [ + # Two levels, uniform. + pd.MultiIndex.from_product(([["A", "B"], ["a", "b"]]), names=["a", "b"]), + # non-uniform + pd.MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "b")]), + # three levels, non-uniform + pd.MultiIndex.from_product([("A", "B"), ("a", "b", "c"), (0, 1, 2)]), + pd.MultiIndex.from_tuples( + [ + ("A", "a", 1), + ("A", "b", 0), + ("A", "a", 0), + ("B", "a", 0), + ("B", "c", 1), + ] + ), + ], + ) @pytest.mark.parametrize("obj", ["series", "frame"]) def test_unstack(self, data, index, obj): data = data[: len(index)] @@ -581,20 +631,21 @@ def test_unstack(self, data, index, obj): alt = df.unstack(level=level).droplevel(0, axis=1) self.assert_frame_equal(result, alt) - expected = ser.astype(object).unstack(level=level) - result = result.astype(object) + expected = ser.unstack(level=level, fill_value=data.dtype.na_value) + # convert to common pint datatype for comparisons rather than + # object which causes panda's assert_almost_equal to explode + result = result.astype(data.dtype) self.assert_frame_equal(result, expected) -class TestSetitem(base.BaseSetitemTests): +class TestSetitem(BasePintPandas, base.BaseSetitemTests): @pytest.mark.parametrize("setter", ["loc", None]) @pytest.mark.filterwarnings("ignore::pint.UnitStrippedWarning") # Pandas performs a hasattr(__array__), which triggers the warning # Debugging it does not pass through a PintArray, so # I think this needs changing in pint quantity # eg s[[True]*len(s)]=Q_(1,"m") - @pytest.mark.xfail(run=True, reason="__iter__ / __len__ issue") def test_setitem_mask_broadcast(self, data, setter): ser = pd.Series(data) mask = np.zeros(len(data), dtype=bool) @@ -609,7 +660,6 @@ def test_setitem_mask_broadcast(self, data, setter): assert ser[0] == data[10] assert ser[1] == data[10] - @pytest.mark.xfail(run=True, reason="__iter__ / __len__ issue") def test_setitem_sequence_broadcasts(self, data, box_in_series): if box_in_series: data = pd.Series(data) @@ -617,7 +667,6 @@ def test_setitem_sequence_broadcasts(self, data, box_in_series): assert data[0] == data[2] assert data[1] == data[2] - @pytest.mark.xfail(run=True, reason="__iter__ / __len__ issue") @pytest.mark.parametrize( "idx", [[0, 1, 2], pd.array([0, 1, 2], dtype="Int64"), np.array([0, 1, 2])], @@ -634,7 +683,6 @@ def test_setitem_integer_array(self, data, idx, box_in_series): arr[idx] = arr[0] self.assert_equal(arr, expected) - @pytest.mark.xfail(run=True, reason="__iter__ / __len__ issue") def test_setitem_slice(self, data, box_in_series): arr = data[:5].copy() expected = data.take([0, 0, 0, 3, 4]) @@ -645,7 +693,6 @@ def test_setitem_slice(self, data, box_in_series): arr[:3] = data[0] self.assert_equal(arr, expected) - @pytest.mark.xfail(run=True, reason="__iter__ / __len__ issue") def test_setitem_loc_iloc_slice(self, data): arr = data[:5].copy() s = pd.Series(arr, index=["a", "b", "c", "d", "e"]) @@ -660,7 +707,7 @@ def test_setitem_loc_iloc_slice(self, data): self.assert_equal(result, expected) -class TestOffsetUnits(object): +class TestOffsetUnits: def test_offset_concat(): a = pd.Series(PintArray(range(5), ureg.Unit("degC"))) b = pd.Series(PintArray(range(6), ureg.Unit("degC"))) @@ -672,7 +719,7 @@ def test_offset_concat(): # but this isn't a discussion we've had yet -class TestUserInterface(object): +class TestUserInterface: def test_get_underlying_data(self, data): ser = pd.Series(data) # this first test creates an array of bool (which is desired, eg for indexing) @@ -736,7 +783,7 @@ def test_df_operations(self): df_.pint.to_base_units().pint.dequantify() -class TestDataFrameAccessor(object): +class TestDataFrameAccessor: def test_index_maintained(self): test_csv = join(dirname(__file__), "pandas_test.csv") @@ -784,7 +831,7 @@ def get_pint_value(in_str): pd.testing.assert_frame_equal(result, expected) -class TestSeriesAccessors(object): +class TestSeriesAccessors: @pytest.mark.parametrize( "attr", [