From dbc83b52841f1a17572b6942664b1e35482401e3 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 22 Sep 2025 09:06:28 +0200 Subject: [PATCH 1/3] add extra test --- pandas/tests/strings/test_find_replace.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/pandas/tests/strings/test_find_replace.py b/pandas/tests/strings/test_find_replace.py index ec9ddc916a856..f7d05593c250b 100644 --- a/pandas/tests/strings/test_find_replace.py +++ b/pandas/tests/strings/test_find_replace.py @@ -973,6 +973,27 @@ def test_match_compiled_regex(any_string_dtype): values.str.match(re.compile("ab"), flags=re.IGNORECASE) +@pytest.mark.parametrize( + "pat, case, exp", + [ + ["ab", False, [True]], + ["Ab", True, [False]], + ["bc", True, [False]], + ["a[a-z]{1}", False, [True]], + ["A[a-z]{1}", True, [False]], + ], +) +def test_str_match_extra_cases(any_string_dtype, pat, case, exp): + ser = Series(["abc"], dtype=any_string_dtype) + result = ser.str.match(pat, case=case) + + expected_dtype = ( + np.bool_ if is_object_or_nan_string_dtype(any_string_dtype) else "boolean" + ) + expected = Series(exp, dtype=expected_dtype) + tm.assert_series_equal(result, expected) + + # -------------------------------------------------------------------------------------- # str.fullmatch # -------------------------------------------------------------------------------------- @@ -1108,7 +1129,6 @@ def test_str_fullmatch_extra_cases(any_string_dtype, pat, case, na, exp): expected_dtype = ( "object" if is_object_or_nan_string_dtype(any_string_dtype) else "boolean" ) - expected = Series([True, False, np.nan, False], dtype=expected_dtype) expected = Series(exp, dtype=expected_dtype) tm.assert_series_equal(result, expected) From f9da39d43709dee9b2d60dcae4e4241c1f16daad Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 22 Sep 2025 09:16:38 +0200 Subject: [PATCH 2/3] add case that fails --- pandas/tests/strings/test_find_replace.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/pandas/tests/strings/test_find_replace.py b/pandas/tests/strings/test_find_replace.py index f7d05593c250b..a53b8475aa379 100644 --- a/pandas/tests/strings/test_find_replace.py +++ b/pandas/tests/strings/test_find_replace.py @@ -976,15 +976,18 @@ def test_match_compiled_regex(any_string_dtype): @pytest.mark.parametrize( "pat, case, exp", [ - ["ab", False, [True]], - ["Ab", True, [False]], - ["bc", True, [False]], - ["a[a-z]{1}", False, [True]], - ["A[a-z]{1}", True, [False]], + ["ab", False, [True, False]], + ["Ab", True, [False, False]], + ["bc", True, [False, False]], + ["a[a-z]{1}", False, [True, False]], + ["A[a-z]{1}", True, [False, False]], + # https://github.com/pandas-dev/pandas/issues/61072 + ["(bc)|(ab)", True, [True, False]], + ["((bc)|(ab))", True, [True, False]], ], ) def test_str_match_extra_cases(any_string_dtype, pat, case, exp): - ser = Series(["abc"], dtype=any_string_dtype) + ser = Series(["abc", "Xab"], dtype=any_string_dtype) result = ser.str.match(pat, case=case) expected_dtype = ( From 4ecb551aef2e14fee7544bd80903ea9d79b09e72 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 22 Sep 2025 09:19:05 +0200 Subject: [PATCH 3/3] fix match --- doc/source/whatsnew/v2.3.3.rst | 2 +- pandas/core/arrays/_arrow_string_mixins.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v2.3.3.rst b/doc/source/whatsnew/v2.3.3.rst index 6ef1c3aeea51d..a4dd096236d26 100644 --- a/doc/source/whatsnew/v2.3.3.rst +++ b/doc/source/whatsnew/v2.3.3.rst @@ -35,7 +35,7 @@ Bug fixes - Fix bug in :meth:`Series.str.replace` using named capture groups (e.g., ``\g``) with the Arrow-backed dtype would raise an error (:issue:`57636`) - Fix regression in ``~Series.str.contains``, ``~Series.str.match`` and ``~Series.str.fullmatch`` with a compiled regex and custom flags (:issue:`62240`) -- Fix :meth:`Series.str.fullmatch` not matching patterns with groups correctly for the Arrow-backed string dtype (:issue:`61072`) +- Fix :meth:`Series.str.match` and :meth:`Series.str.fullmatch` not matching patterns with groups correctly for the Arrow-backed string dtype (:issue:`61072`) Improvements and fixes for Copy-on-Write diff --git a/pandas/core/arrays/_arrow_string_mixins.py b/pandas/core/arrays/_arrow_string_mixins.py index d5f44226d5c9f..b2c1e07b23a1e 100644 --- a/pandas/core/arrays/_arrow_string_mixins.py +++ b/pandas/core/arrays/_arrow_string_mixins.py @@ -316,7 +316,7 @@ def _str_match( na: Scalar | lib.NoDefault = lib.no_default, ): if not pat.startswith("^"): - pat = f"^{pat}" + pat = f"^({pat})" return self._str_contains(pat, case, flags, na, regex=True) def _str_fullmatch(