From d6a94fa96eccb76cf36383ea437351eae85e7683 Mon Sep 17 00:00:00 2001 From: Zebedee Nicholls Date: Wed, 8 Jul 2020 13:24:55 +1000 Subject: [PATCH 1/5] Determine where pandas needs different is_list_like etc. --- pint_pandas/pint_array.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pint_pandas/pint_array.py b/pint_pandas/pint_array.py index d673c507..8db891ab 100644 --- a/pint_pandas/pint_array.py +++ b/pint_pandas/pint_array.py @@ -262,7 +262,15 @@ def __setitem__(self, key, value): elif is_list_like(value) and isinstance(value[0], _Quantity): value = [item.to(self.units).magnitude for item in value] + # _is_scalar = is_scalar(value) + # if _is_scalar: + # value = [value] + # # why the same if clause again? + # if _is_scalar: + # value = value[0] + key = convert_indexing_key(key) + try: self._data[key] = value except IndexError as e: From 9edf19ea939893ee2286a17a895ef00d0aa935de Mon Sep 17 00:00:00 2001 From: Zebedee Nicholls Date: Tue, 16 Feb 2021 11:29:52 +1100 Subject: [PATCH 2/5] One more fix --- pint_pandas/pint_array.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pint_pandas/pint_array.py b/pint_pandas/pint_array.py index 8db891ab..9a261f5f 100644 --- a/pint_pandas/pint_array.py +++ b/pint_pandas/pint_array.py @@ -628,6 +628,8 @@ def convert_values(param): return res op_name = f"__{op}__" + # op_name = f"__{op.__name__.strip('_')}__" + return set_function_name(_binop, op_name, cls) @classmethod From e9db40efde1ab7fd8f6b48072d8f990630609efb Mon Sep 17 00:00:00 2001 From: Zebedee Nicholls Date: Tue, 16 Feb 2021 13:21:21 +1100 Subject: [PATCH 3/5] Fix iter issues Requires https://github.com/znicholls/pandas/commit/334e69c46371c5cbad80fcbc64c192cd72a6575e --- .../testsuite/test_pandas_interface.py | 130 +++++++++++++----- 1 file changed, 93 insertions(+), 37 deletions(-) diff --git a/pint_pandas/testsuite/test_pandas_interface.py b/pint_pandas/testsuite/test_pandas_interface.py index 9d4e47ae..5352cdb1 100644 --- a/pint_pandas/testsuite/test_pandas_interface.py +++ b/pint_pandas/testsuite/test_pandas_interface.py @@ -7,6 +7,7 @@ import pint import pytest from pandas.core import ops +import pandas._testing as tm from pandas.tests.extension import base from pandas.tests.extension.conftest import ( # noqa: F401 as_array, @@ -192,23 +193,64 @@ def all_boolean_reductions(request): # ================================================================= -class TestCasting(base.BaseCastingTests): +class BasePintPandas: + # pandas assert_series_equal (which calls assert_almost_equal) gets + # confused by Pint's duck-typing. + # We work around this by doing something + @classmethod + def assert_series_equal(cls, left, right, *args, **kwargs): + if left.dtype.name == "json": + assert left.dtype == right.dtype + left = pd.Series( + PintArray(left.values.astype(object)), index=left.index, name=left.name + ) + right = pd.Series( + PintArray(right.values.astype(object)), + index=right.index, + name=right.name, + ) + tm.assert_series_equal(left, right, *args, **kwargs) + + @classmethod + def assert_frame_equal(cls, left, right, *args, **kwargs): + obj_type = kwargs.get("obj", "DataFrame") + tm.assert_index_equal( + left.columns, + right.columns, + exact=kwargs.get("check_column_type", "equiv"), + check_names=kwargs.get("check_names", True), + check_exact=kwargs.get("check_exact", False), + check_categorical=kwargs.get("check_categorical", True), + obj=f"{obj_type}.columns", + ) + pints = left.dtypes.map(lambda x: str(x).startswith("pint")) + pints = pints[pints].index + + for col in pints: + cls.assert_series_equal(left[col], right[col], *args, **kwargs) + + left = left.drop(columns=pints) + right = right.drop(columns=pints) + tm.assert_frame_equal(left, right, *args, **kwargs) + + +class TestCasting(BasePintPandas, base.BaseCastingTests): pass -class TestConstructors(base.BaseConstructorsTests): +class TestConstructors(BasePintPandas, base.BaseConstructorsTests): pass -class TestDtype(base.BaseDtypeTests): +class TestDtype(BasePintPandas, base.BaseDtypeTests): pass -class TestGetitem(base.BaseGetitemTests): +class TestGetitem(BasePintPandas, base.BaseGetitemTests): pass -class TestGroupby(base.BaseGroupbyTests): +class TestGroupby(BasePintPandas, base.BaseGroupbyTests): @pytest.mark.xfail( run=True, reason="pintarrays seem not to be numeric in one version of pd" ) @@ -229,7 +271,6 @@ def test_in_numeric_groupby(self, data_for_grouping): self.assert_index_equal(result, expected) - @pytest.mark.xfail(run=True, reason="__iter__ / __len__ issue") def test_groupby_apply_identity(self, data_for_grouping): df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping}) result = df.groupby("A").B.apply(lambda x: x.array) @@ -246,11 +287,11 @@ def test_groupby_apply_identity(self, data_for_grouping): self.assert_series_equal(result, expected) -class TestInterface(base.BaseInterfaceTests): +class TestInterface(BasePintPandas, base.BaseInterfaceTests): pass -class TestMethods(base.BaseMethodsTests): +class TestMethods(BasePintPandas, base.BaseMethodsTests): @pytest.mark.filterwarnings("ignore::pint.UnitStrippedWarning") # See test_setitem_mask_broadcast note @pytest.mark.parametrize("dropna", [True, False]) @@ -279,7 +320,6 @@ def test_unique(self, data, box, method): assert isinstance(result, type(data)) assert result[0] == duplicated[0] - @pytest.mark.xfail(run=True, reason="__iter__ / __len__ issue") def test_fillna_copy_frame(self, data_missing): arr = data_missing.take([1, 1]) df = pd.DataFrame({"A": arr}) @@ -289,7 +329,6 @@ def test_fillna_copy_frame(self, data_missing): assert df.A.values is not result.A.values - @pytest.mark.xfail(run=True, reason="__iter__ / __len__ issue") def test_fillna_copy_series(self, data_missing): arr = data_missing.take([1, 1]) ser = pd.Series(arr) @@ -300,7 +339,6 @@ def test_fillna_copy_series(self, data_missing): assert ser._values is not result._values assert ser._values is arr - @pytest.mark.xfail(run=True, reason="__iter__ / __len__ issue") def test_searchsorted(self, data_for_sorting, as_series): # noqa: F811 b, c, a = data_for_sorting arr = type(data_for_sorting)._from_sequence([a, b, c]) @@ -319,13 +357,13 @@ def test_searchsorted(self, data_for_sorting, as_series): # noqa: F811 result = arr.searchsorted(arr.take([0, 2])) expected = np.array([0, 2], dtype=np.intp) - self.assert_numpy_array_equal(result, expected) + np.testing.assert_array_equal(result, expected) # sorter sorter = np.array([1, 2, 0]) assert data_for_sorting.searchsorted(a, sorter=sorter) == 0 - @pytest.mark.xfail(run=True, reason="__iter__ / __len__ issue") + @pytest.mark.xfail(run=True, reason="pandas lib.is_scalar issue") def test_where_series(self, data, na_value, as_frame): # noqa: F811 assert data[0] != data[1] cls = type(data) @@ -360,7 +398,7 @@ def test_where_series(self, data, na_value, as_frame): # noqa: F811 self.assert_equal(result, expected) -class TestArithmeticOps(base.BaseArithmeticOpsTests): +class TestArithmeticOps(BasePintPandas, base.BaseArithmeticOpsTests): def check_opname(self, s, op_name, other, exc=None): op = self.get_op_from_name(op_name) @@ -401,7 +439,11 @@ def test_arith_series_with_scalar(self, data, all_arithmetic_operators): s = pd.Series(data) self.check_opname(s, op_name, s.iloc[0], exc=exc) - @pytest.mark.xfail(run=True, reason="__iter__ / __len__ issue") + @pytest.mark.xfail(run=True, reason=( + "operating with quantity of different registries " + "OR reverse operation and pint has line `if zero_or_nan(other, True)` which explodes " + "OR multiplying dimensionless and units explodes" + )) def test_arith_frame_with_scalar(self, data, all_arithmetic_operators): # frame & scalar op_name, exc = self._get_exception(data, all_arithmetic_operators) @@ -457,7 +499,7 @@ def test_error(self, data, all_arithmetic_operators): opa(np.arange(len(s)).reshape(-1, len(s))) -class TestComparisonOps(base.BaseComparisonOpsTests): +class TestComparisonOps(BasePintPandas, base.BaseComparisonOpsTests): def _compare_other(self, s, data, op_name, other): op = self.get_op_from_name(op_name) @@ -480,19 +522,17 @@ def test_compare_array(self, data, all_compare_operators): self._compare_other(s, data, op_name, other) -class TestOpsUtil(base.BaseOpsUtil): +class TestOpsUtil(BasePintPandas, base.BaseOpsUtil): pass -class TestMissing(base.BaseMissingTests): - @pytest.mark.xfail(run=True, reason="__iter__ / __len__ issue") +class TestMissing(BasePintPandas, base.BaseMissingTests): def test_fillna_scalar(self, data_missing): valid = data_missing[1] result = data_missing.fillna(valid) expected = data_missing.fillna(valid) self.assert_extension_array_equal(result, expected) - @pytest.mark.xfail(run=True, reason="__iter__ / __len__ issue") def test_fillna_series(self, data_missing): fill_value = data_missing[1] ser = pd.Series(data_missing) @@ -527,10 +567,10 @@ def test_fillna_frame(self, data_missing): "B": [1, 2], } ) - self.assert_series_equal(result, expected) + self.assert_frame_equal(result, expected) -class TestNumericReduce(base.BaseNumericReduceTests): +class TestNumericReduce(BasePintPandas, base.BaseNumericReduceTests): def check_reduce(self, s, op_name, skipna): result = getattr(s, op_name)(skipna=skipna) expected_m = getattr(pd.Series(s.values.quantity._magnitude), op_name)( @@ -541,7 +581,7 @@ def check_reduce(self, s, op_name, skipna): assert result == expected -class TestBooleanReduce(base.BaseBooleanReduceTests): +class TestBooleanReduce(BasePintPandas, base.BaseBooleanReduceTests): def check_reduce(self, s, op_name, skipna): result = getattr(s, op_name)(skipna=skipna) expected = getattr(pd.Series(s.values.quantity._magnitude), op_name)( @@ -550,8 +590,27 @@ def check_reduce(self, s, op_name, skipna): assert result == expected -class TestReshaping(base.BaseReshapingTests): - @pytest.mark.xfail(run=True, reason="__iter__ / __len__ issue") +class TestReshaping(BasePintPandas, base.BaseReshapingTests): + @pytest.mark.parametrize( + "index", + [ + # Two levels, uniform. + pd.MultiIndex.from_product(([["A", "B"], ["a", "b"]]), names=["a", "b"]), + # non-uniform + pd.MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "b")]), + # three levels, non-uniform + pd.MultiIndex.from_product([("A", "B"), ("a", "b", "c"), (0, 1, 2)]), + pd.MultiIndex.from_tuples( + [ + ("A", "a", 1), + ("A", "b", 0), + ("A", "a", 0), + ("B", "a", 0), + ("B", "c", 1), + ] + ), + ], + ) @pytest.mark.parametrize("obj", ["series", "frame"]) def test_unstack(self, data, index, obj): data = data[: len(index)] @@ -581,20 +640,21 @@ def test_unstack(self, data, index, obj): alt = df.unstack(level=level).droplevel(0, axis=1) self.assert_frame_equal(result, alt) - expected = ser.astype(object).unstack(level=level) - result = result.astype(object) + expected = ser.unstack(level=level, fill_value=data.dtype.na_value) + # convert to common pint datatype for comparisons rather than + # object which causes panda's assert_almost_equal to explode + result = result.astype(data.dtype) self.assert_frame_equal(result, expected) -class TestSetitem(base.BaseSetitemTests): +class TestSetitem(BasePintPandas, base.BaseSetitemTests): @pytest.mark.parametrize("setter", ["loc", None]) @pytest.mark.filterwarnings("ignore::pint.UnitStrippedWarning") # Pandas performs a hasattr(__array__), which triggers the warning # Debugging it does not pass through a PintArray, so # I think this needs changing in pint quantity # eg s[[True]*len(s)]=Q_(1,"m") - @pytest.mark.xfail(run=True, reason="__iter__ / __len__ issue") def test_setitem_mask_broadcast(self, data, setter): ser = pd.Series(data) mask = np.zeros(len(data), dtype=bool) @@ -609,7 +669,6 @@ def test_setitem_mask_broadcast(self, data, setter): assert ser[0] == data[10] assert ser[1] == data[10] - @pytest.mark.xfail(run=True, reason="__iter__ / __len__ issue") def test_setitem_sequence_broadcasts(self, data, box_in_series): if box_in_series: data = pd.Series(data) @@ -617,7 +676,6 @@ def test_setitem_sequence_broadcasts(self, data, box_in_series): assert data[0] == data[2] assert data[1] == data[2] - @pytest.mark.xfail(run=True, reason="__iter__ / __len__ issue") @pytest.mark.parametrize( "idx", [[0, 1, 2], pd.array([0, 1, 2], dtype="Int64"), np.array([0, 1, 2])], @@ -634,7 +692,6 @@ def test_setitem_integer_array(self, data, idx, box_in_series): arr[idx] = arr[0] self.assert_equal(arr, expected) - @pytest.mark.xfail(run=True, reason="__iter__ / __len__ issue") def test_setitem_slice(self, data, box_in_series): arr = data[:5].copy() expected = data.take([0, 0, 0, 3, 4]) @@ -645,7 +702,6 @@ def test_setitem_slice(self, data, box_in_series): arr[:3] = data[0] self.assert_equal(arr, expected) - @pytest.mark.xfail(run=True, reason="__iter__ / __len__ issue") def test_setitem_loc_iloc_slice(self, data): arr = data[:5].copy() s = pd.Series(arr, index=["a", "b", "c", "d", "e"]) @@ -660,7 +716,7 @@ def test_setitem_loc_iloc_slice(self, data): self.assert_equal(result, expected) -class TestOffsetUnits(object): +class TestOffsetUnits: def test_offset_concat(): a = pd.Series(PintArray(range(5), ureg.Unit("degC"))) b = pd.Series(PintArray(range(6), ureg.Unit("degC"))) @@ -672,7 +728,7 @@ def test_offset_concat(): # but this isn't a discussion we've had yet -class TestUserInterface(object): +class TestUserInterface: def test_get_underlying_data(self, data): ser = pd.Series(data) # this first test creates an array of bool (which is desired, eg for indexing) @@ -736,7 +792,7 @@ def test_df_operations(self): df_.pint.to_base_units().pint.dequantify() -class TestDataFrameAccessor(object): +class TestDataFrameAccessor: def test_index_maintained(self): test_csv = join(dirname(__file__), "pandas_test.csv") @@ -784,7 +840,7 @@ def get_pint_value(in_str): pd.testing.assert_frame_equal(result, expected) -class TestSeriesAccessors(object): +class TestSeriesAccessors: @pytest.mark.parametrize( "attr", [ From e849096edd898b3d027f92e946f1912e49ba0584 Mon Sep 17 00:00:00 2001 From: Zebedee Nicholls Date: Tue, 16 Feb 2021 13:27:24 +1100 Subject: [PATCH 4/5] Remove irrelevant comments --- pint_pandas/pint_array.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/pint_pandas/pint_array.py b/pint_pandas/pint_array.py index 9a261f5f..ce7b574c 100644 --- a/pint_pandas/pint_array.py +++ b/pint_pandas/pint_array.py @@ -262,13 +262,6 @@ def __setitem__(self, key, value): elif is_list_like(value) and isinstance(value[0], _Quantity): value = [item.to(self.units).magnitude for item in value] - # _is_scalar = is_scalar(value) - # if _is_scalar: - # value = [value] - # # why the same if clause again? - # if _is_scalar: - # value = value[0] - key = convert_indexing_key(key) try: @@ -628,7 +621,6 @@ def convert_values(param): return res op_name = f"__{op}__" - # op_name = f"__{op.__name__.strip('_')}__" return set_function_name(_binop, op_name, cls) From a4eef470c97cd8e411aa71742633cc7c4e71e4e9 Mon Sep 17 00:00:00 2001 From: Zebedee Nicholls Date: Wed, 17 Feb 2021 16:31:18 +1100 Subject: [PATCH 5/5] Remove unused code --- pint_pandas/testsuite/test_pandas_interface.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/pint_pandas/testsuite/test_pandas_interface.py b/pint_pandas/testsuite/test_pandas_interface.py index 5352cdb1..b2710e68 100644 --- a/pint_pandas/testsuite/test_pandas_interface.py +++ b/pint_pandas/testsuite/test_pandas_interface.py @@ -199,16 +199,7 @@ class BasePintPandas: # We work around this by doing something @classmethod def assert_series_equal(cls, left, right, *args, **kwargs): - if left.dtype.name == "json": - assert left.dtype == right.dtype - left = pd.Series( - PintArray(left.values.astype(object)), index=left.index, name=left.name - ) - right = pd.Series( - PintArray(right.values.astype(object)), - index=right.index, - name=right.name, - ) + # casting etc. can be done here if helpful tm.assert_series_equal(left, right, *args, **kwargs) @classmethod