-
-
Notifications
You must be signed in to change notification settings - Fork 19.3k
BUG: Dataframe arithmatic operators don't work with Series using fill_value #61828
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 8 commits
99ae672
4e77fb7
eb12b34
7e23b65
4617108
bca56fe
7273396
4493e08
406cd15
ad9614b
5a64e5c
73d168b
13ed0d6
3d3a2a6
76a122e
1d3260e
32360a8
30d4a07
771a19c
a321daf
c79c210
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -626,11 +626,43 @@ def test_arith_flex_frame_corner(self, float_frame): | |
| expected = float_frame.sort_index() * np.nan | ||
| tm.assert_frame_equal(result, expected) | ||
|
|
||
| with pytest.raises(NotImplementedError, match="fill_value"): | ||
| float_frame.add(float_frame.iloc[0], fill_value=3) | ||
| @pytest.mark.parametrize("axis", [0, 1]) | ||
| def test_arith_flex_frame_fill_value_series(self, float_frame, axis): | ||
| rng = np.random.default_rng(60) | ||
| mask = rng.random(float_frame.shape) < 0.2 | ||
| left = float_frame.mask(mask) | ||
| right = left.iloc[0] | ||
|
|
||
| result = left.add(right, axis=axis, fill_value=3) | ||
|
|
||
| if axis == 0: # axis = index, vertical | ||
| pad_num = abs(result.shape[0] - len(right)) | ||
| mult_num = result.shape[1] | ||
| right_pad = np.pad( | ||
| right, (0, pad_num), mode="constant", constant_values=(np.nan) | ||
| ) | ||
| right_df = DataFrame( | ||
| [right_pad] * mult_num, columns=result.index, index=result.columns | ||
| ).T | ||
|
|
||
| left = left.reindex_like(result) | ||
|
|
||
| else: # axis = columns, horizontal | ||
| pad_num = abs(result.shape[1] - len(right)) | ||
| mult_num = result.shape[0] | ||
| right_pad = np.pad( | ||
| right, (0, pad_num), mode="constant", constant_values=(np.nan) | ||
| ) | ||
| right_df = DataFrame( | ||
| [right_pad] * mult_num, index=result.index, columns=result.columns | ||
| ) | ||
|
|
||
| with pytest.raises(NotImplementedError, match="fill_value"): | ||
| float_frame.add(float_frame.iloc[0], axis="index", fill_value=3) | ||
| left_filled = left.fillna(3) | ||
| right_filled = right_df.fillna(3) | ||
| expected = right_filled + left_filled | ||
| expected = expected.mask(expected == 6, pd.NA) | ||
|
|
||
| tm.assert_frame_equal(result, expected) | ||
|
|
||
| @pytest.mark.parametrize("op", ["add", "sub", "mul", "mod"]) | ||
| def test_arith_flex_series_ops(self, simple_frame, op): | ||
|
|
@@ -672,11 +704,21 @@ def test_arith_flex_zero_len_raises(self): | |
| df_len0 = DataFrame(columns=["A", "B"]) | ||
| df = DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) | ||
|
|
||
| with pytest.raises(NotImplementedError, match="fill_value"): | ||
| msg = r"unsupported operand type\(s\) for \+: 'int' and 'str'" | ||
| with pytest.raises(TypeError, match=msg): | ||
| df.add(ser_len0, fill_value="E") | ||
|
|
||
| with pytest.raises(NotImplementedError, match="fill_value"): | ||
| df_len0.sub(df["A"], axis=None, fill_value=3) | ||
| result = df_len0.sub(df, axis=None, fill_value=3) | ||
| expected = DataFrame([[2, 1], [0, -1]], columns=["A", "B"]) | ||
| tm.assert_frame_equal(result, expected, check_dtype=False) | ||
|
|
||
| result = df_len0.sub(df["A"], axis=0, fill_value=3) | ||
| expected = DataFrame([[2, 2], [0, 0]], columns=["A", "B"]) | ||
| tm.assert_frame_equal(result, expected, check_dtype=False) | ||
|
|
||
| result = df_len0.sub(df["A"], axis=1, fill_value=3) | ||
| expected = DataFrame([], columns=["A", "B", 0, 1]) | ||
| tm.assert_frame_equal(result, expected, check_dtype=False) | ||
|
|
||
| def test_flex_add_scalar_fill_value(self): | ||
| # GH#12723 | ||
|
|
@@ -2192,3 +2234,54 @@ def test_mixed_col_index_dtype(string_dtype_no_object): | |
| expected.columns = expected.columns.astype(string_dtype_no_object) | ||
|
|
||
| tm.assert_frame_equal(result, expected) | ||
|
|
||
|
|
||
| dt_params = [ | ||
| (tm.ALL_INT_NUMPY_DTYPES[0], 10), | ||
| (tm.ALL_INT_EA_DTYPES[0], 10), | ||
| (tm.FLOAT_NUMPY_DTYPES[0], 4.9), | ||
| (tm.FLOAT_EA_DTYPES[0], 4.9), | ||
| ] | ||
|
|
||
| axes = [0, 1] | ||
|
|
||
|
|
||
| @pytest.mark.parametrize( | ||
| "data_type,fill_val, axis", | ||
| [(dt, val, axis) for axis in axes for dt, val in dt_params], | ||
| ) | ||
| def test_df_mul_array_fill_value(data_type, fill_val, axis): | ||
|
||
| # GH 61581 | ||
| base_data = np.arange(12).reshape(4, 3) | ||
| df = DataFrame(base_data) | ||
| mult_list = [np.nan, 1, 5, np.nan] | ||
| mult_list = mult_list[: df.shape[axis]] | ||
|
|
||
| if data_type in tm.ALL_INT_NUMPY_DTYPES: | ||
| # Numpy int type cannot represent NaN | ||
| mult_np = np.asarray(mult_list) | ||
| mult_list = np.nan_to_num(mult_np, nan=fill_val) | ||
|
|
||
| mult_data = pd.array(mult_list, dtype=data_type) | ||
|
|
||
| for i in range(df.shape[0]): | ||
| try: | ||
| df.iat[i, i] = np.nan | ||
| df.iat[i + 2, i] = pd.NA | ||
| except IndexError: | ||
| pass | ||
|
|
||
| if axis == 0: | ||
| mult_mat = np.broadcast_to(mult_data.reshape(-1, 1), df.shape) | ||
| mask = np.isnan(mult_mat) | ||
| else: | ||
| mult_mat = np.broadcast_to(mult_data.reshape(1, -1), df.shape) | ||
| mask = np.isnan(mult_mat) | ||
| mask = df.isna().values & mask | ||
|
|
||
| df_result = df.mul(mult_data, axis=axis, fill_value=fill_val) | ||
| df_expected = (df.fillna(fill_val).mul(mult_data.fillna(fill_val), axis=axis)).mask( | ||
| mask, np.nan | ||
| ) | ||
|
|
||
| tm.assert_frame_equal(df_result, df_expected) | ||
Uh oh!
There was an error while loading. Please reload this page.