Skip to content

Commit 1b9c904

Browse files
committed
POC: pd.Scalar
1 parent 53cb639 commit 1b9c904

File tree

9 files changed

+88
-13
lines changed

9 files changed

+88
-13
lines changed

pandas/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,7 @@
134134
from pandas import api, arrays, errors, io, plotting, tseries
135135
from pandas import testing
136136
from pandas.util._print_versions import show_versions
137+
from pandas._libs.lib import Scalar
137138

138139
from pandas.io.api import (
139140
# excel
@@ -268,6 +269,7 @@
268269
"PeriodDtype",
269270
"PeriodIndex",
270271
"RangeIndex",
272+
"Scalar",
271273
"Series",
272274
"SparseDtype",
273275
"StringDtype",

pandas/_libs/lib.pyi

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,9 @@ class _NoDefault(Enum):
3434
no_default: Final = _NoDefault.no_default
3535
NoDefault: TypeAlias = Literal[_NoDefault.no_default]
3636

37+
class Scalar:
38+
item: object
39+
3740
i8max: int
3841
u8max: int
3942

pandas/_libs/lib.pyx

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -212,15 +212,18 @@ def is_scalar(val: object) -> bool:
212212
"""
213213

214214
# Start with C-optimized checks
215-
if (cnp.PyArray_IsAnyScalar(val)
216-
# PyArray_IsAnyScalar is always False for bytearrays on Py3
217-
or PyDate_Check(val)
218-
or PyDelta_Check(val)
219-
or PyTime_Check(val)
220-
# We differ from numpy, which claims that None is not scalar;
221-
# see np.isscalar
222-
or val is C_NA
223-
or val is None):
215+
if (
216+
cnp.PyArray_IsAnyScalar(val)
217+
# PyArray_IsAnyScalar is always False for bytearrays on Py3
218+
or PyDate_Check(val)
219+
or PyDelta_Check(val)
220+
or PyTime_Check(val)
221+
# We differ from numpy, which claims that None is not scalar;
222+
# see np.isscalar
223+
or val is C_NA
224+
or val is None
225+
or type(val) is Scalar
226+
):
224227
return True
225228

226229
# Next use C-optimized checks to exclude common non-scalars before falling
@@ -3297,3 +3300,15 @@ def is_np_dtype(object dtype, str kinds=None) -> bool:
32973300
if kinds is None:
32983301
return True
32993302
return dtype.kind in kinds
3303+
3304+
3305+
cdef class Scalar:
3306+
"""
3307+
Class for wrapping list-like objects to indicate they should be treated
3308+
as scalars for e.g. arithmetic operations.
3309+
"""
3310+
cdef:
3311+
readonly object item
3312+
3313+
def __cinit__(self, obj):
3314+
self.item = obj

pandas/core/arrays/categorical.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,15 @@ def _cat_compare_op(op):
126126

127127
@unpack_zerodim_and_defer(opname)
128128
def func(self, other):
129+
if is_list_like(other) and not isinstance(other, (np.ndarray, ExtensionArray)):
130+
warnings.warn(
131+
"Comparison of Categorical to list-like objects intended "
132+
"to be treated as scalars is deprecated. Wrap the scalar in "
133+
"pd.Scalar(item) before comparing instead.",
134+
Pandas4Warning,
135+
stacklevel=find_stack_level(),
136+
)
137+
129138
hashable = is_hashable(other)
130139
if is_list_like(other) and len(other) != len(self) and not hashable:
131140
# in hashable case we may have a tuple that is itself a category
@@ -158,7 +167,10 @@ def func(self, other):
158167
ret[mask] = fill_value
159168
return ret
160169

161-
if hashable:
170+
if hashable or isinstance(other, lib.Scalar):
171+
if isinstance(other, lib.Scalar):
172+
other = other.item
173+
162174
if other in self.categories:
163175
i = self._unbox_scalar(other)
164176
ret = op(self._codes, i)

pandas/tests/api/test_api.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ class TestPDApi(Base):
9696
"Float32Dtype",
9797
"Float64Dtype",
9898
"NamedAgg",
99+
"Scalar",
99100
]
100101

101102
# these are already deprecated; awaiting removal

pandas/tests/arithmetic/test_categorical.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
import numpy as np
22

3+
from pandas.errors import Pandas4Warning
4+
35
from pandas import (
46
Categorical,
7+
Scalar,
58
Series,
69
)
710
import pandas._testing as tm
@@ -18,8 +21,14 @@ def test_categorical_tuple_equality(self):
1821
# GH 18050
1922
ser = Series([(0, 0), (0, 1), (0, 0), (1, 0), (1, 1)])
2023
expected = Series([True, False, True, False, False])
24+
2125
result = ser == (0, 0)
2226
tm.assert_series_equal(result, expected)
2327

24-
result = ser.astype("category") == (0, 0)
28+
msg = "Comparison of Categorical to list-like objects"
29+
with tm.assert_produces_warning(Pandas4Warning, match=msg):
30+
result = ser.astype("category") == (0, 0)
31+
tm.assert_series_equal(result, expected)
32+
33+
result = ser.astype("category") == Scalar((0, 0))
2534
tm.assert_series_equal(result, expected)

pandas/tests/arrays/categorical/test_operators.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import numpy as np
22
import pytest
33

4+
from pandas.errors import Pandas4Warning
5+
46
import pandas as pd
57
from pandas import (
68
Categorical,
@@ -202,11 +204,21 @@ def test_comparison_with_tuple(self):
202204
expected = np.array([True, False, False, False], dtype=bool)
203205
tm.assert_numpy_array_equal(result, expected)
204206

205-
result = cat == (0, 1)
207+
msg = "Comparison of Categorical to list-like objects"
208+
with tm.assert_produces_warning(Pandas4Warning, match=msg):
209+
result = cat == (0, 1)
206210
expected = np.array([False, True, False, True], dtype=bool)
207211
tm.assert_numpy_array_equal(result, expected)
208212

209-
result = cat != (0, 1)
213+
result = cat == pd.Scalar((0, 1))
214+
tm.assert_numpy_array_equal(result, expected)
215+
216+
msg = "Comparison of Categorical to list-like objects"
217+
with tm.assert_produces_warning(Pandas4Warning, match=msg):
218+
result = cat != (0, 1)
219+
tm.assert_numpy_array_equal(result, ~expected)
220+
221+
result = cat != pd.Scalar((0, 1))
210222
tm.assert_numpy_array_equal(result, ~expected)
211223

212224
@pytest.mark.filterwarnings("ignore::RuntimeWarning")

pandas/tests/dtypes/test_inference.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,14 @@ def test_is_list_like_native_container_types():
260260
assert not inference.is_list_like(tuple[str])
261261

262262

263+
def test_scalar_list_not_listlike():
264+
item = lib.Scalar([0, 1])
265+
assert not inference.is_list_like(item)
266+
267+
tup = lib.Scalar((0, 1))
268+
assert not inference.is_list_like(tup)
269+
270+
263271
def test_is_sequence():
264272
is_seq = inference.is_sequence
265273
assert is_seq((1, 2))
@@ -1892,6 +1900,10 @@ def test_is_timedelta(self):
18921900

18931901

18941902
class TestIsScalar:
1903+
def test_is_scalar_scalar_class(self):
1904+
assert is_scalar(lib.Scalar([0, 1]))
1905+
assert is_scalar(lib.Scalar((0, 1)))
1906+
18951907
def test_is_scalar_builtin_scalars(self):
18961908
assert is_scalar(None)
18971909
assert is_scalar(True)

pandas/tests/libs/test_lib.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,15 @@
1414
import pandas._testing as tm
1515

1616

17+
class TestScalar:
18+
def test_scalar_immutable(self):
19+
scalar = lib.Scalar("foo")
20+
21+
msg = "attribute 'item' of 'pandas._libs.lib.Scalar' objects is not writable"
22+
with pytest.raises(AttributeError, match=msg):
23+
scalar.item = 2
24+
25+
1726
class TestMisc:
1827
def test_max_len_string_array(self):
1928
arr = a = np.array(["foo", "b", np.nan], dtype="object")

0 commit comments

Comments
 (0)