77import numpy as np
88import pytest
99
10+ from pandas .compat import pa_version_under21p0
11+
1012from pandas import (
1113 NA ,
1214 DataFrame ,
1315 Index ,
1416 MultiIndex ,
1517 Series ,
18+ StringDtype ,
1619)
1720import pandas ._testing as tm
1821from pandas .core .strings .accessor import StringMethods
@@ -240,8 +243,9 @@ def test_ismethods(method, expected, any_string_dtype):
240243@pytest .mark .parametrize (
241244 "method, expected" ,
242245 [
243- ("isnumeric" , [False , True , True , False , True , True , False ]),
244- ("isdecimal" , [False , True , False , False , False , True , False ]),
246+ ("isnumeric" , [False , True , True , True , False , True , True , False ]),
247+ ("isdecimal" , [False , True , False , False , False , False , True , False ]),
248+ ("isdigit" , [False , True , True , False , False , False , True , False ]),
245249 ],
246250)
247251def test_isnumeric_unicode (method , expected , any_string_dtype ):
@@ -250,18 +254,35 @@ def test_isnumeric_unicode(method, expected, any_string_dtype):
250254 # 0x1378: ፸ ETHIOPIC NUMBER SEVENTY
251255 # 0xFF13: 3 Em 3 # noqa: RUF003
252256 ser = Series (
253- ["A" , "3" , "¼" , "★" , "፸" , "3" , "four" ], dtype = any_string_dtype # noqa: RUF001
257+ ["A" , "3" , "³" , "¼" , "★" , "፸" , "3" , "four" ], # noqa: RUF001
258+ dtype = any_string_dtype ,
254259 )
255260 expected_dtype = (
256261 "bool" if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
257262 )
258263 expected = Series (expected , dtype = expected_dtype )
264+ if (
265+ method == "isdigit"
266+ and isinstance (ser .dtype , StringDtype )
267+ and ser .dtype .storage == "pyarrow"
268+ and not pa_version_under21p0
269+ ):
270+ # known difference in behavior between python and pyarrow unicode handling
271+ # pyarrow 21+ considers ¼ and ፸ as a digit, while python does not
272+ expected .iloc [3 ] = True
273+ expected .iloc [5 ] = True
274+
259275 result = getattr (ser .str , method )()
260276 tm .assert_series_equal (result , expected )
261277
262278 # compare with standard library
263- expected = [getattr (item , method )() for item in ser ]
264- assert list (result ) == expected
279+ # (only for non-pyarrow storage given the above differences)
280+ if any_string_dtype == "object" or (
281+ isinstance (any_string_dtype , StringDtype )
282+ and any_string_dtype .storage == "python"
283+ ):
284+ expected = [getattr (item , method )() for item in ser ]
285+ assert list (result ) == expected
265286
266287
267288@pytest .mark .filterwarnings ("ignore:Downcasting object dtype arrays:FutureWarning" )
0 commit comments