@@ -292,13 +292,60 @@ def test_contains_nan(any_string_dtype):
292292
293293def test_contains_compiled_regex (any_string_dtype ):
294294 # GH#61942
295- ser = Series (["foo" , "bar" , "baz" ], dtype = any_string_dtype )
295+ expected_dtype = (
296+ np .bool_ if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
297+ )
298+
299+ ser = Series (["foo" , "bar" , "Baz" ], dtype = any_string_dtype )
300+
296301 pat = re .compile ("ba." )
297302 result = ser .str .contains (pat )
303+ expected = Series ([False , True , False ], dtype = expected_dtype )
304+ tm .assert_series_equal (result , expected )
305+
306+ # TODO this currently works for pyarrow-backed dtypes but raises for python
307+ if any_string_dtype == "string" and any_string_dtype .storage == "pyarrow" :
308+ result = ser .str .contains (pat , case = False )
309+ expected = Series ([False , True , True ], dtype = expected_dtype )
310+ tm .assert_series_equal (result , expected )
311+ else :
312+ with pytest .raises (
313+ ValueError , match = "cannot process flags argument with a compiled pattern"
314+ ):
315+ ser .str .contains (pat , case = False )
316+
317+ pat = re .compile ("ba." , flags = re .IGNORECASE )
318+ result = ser .str .contains (pat )
319+ expected = Series ([False , True , True ], dtype = expected_dtype )
320+ tm .assert_series_equal (result , expected )
298321
322+ # TODO should this be supported?
323+ with pytest .raises (
324+ ValueError , match = "cannot process flags argument with a compiled pattern"
325+ ):
326+ ser .str .contains (pat , flags = re .IGNORECASE )
327+
328+
329+ def test_contains_compiled_regex_flags (any_string_dtype ):
330+ # ensure other (than ignorecase) flags are respected
299331 expected_dtype = (
300332 np .bool_ if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
301333 )
334+
335+ ser = Series (["foobar" , "foo\n bar" , "Baz" ], dtype = any_string_dtype )
336+
337+ pat = re .compile ("^ba" )
338+ result = ser .str .contains (pat )
339+ expected = Series ([False , False , False ], dtype = expected_dtype )
340+ tm .assert_series_equal (result , expected )
341+
342+ pat = re .compile ("^ba" , flags = re .MULTILINE )
343+ result = ser .str .contains (pat )
344+ expected = Series ([False , True , False ], dtype = expected_dtype )
345+ tm .assert_series_equal (result , expected )
346+
347+ pat = re .compile ("^ba" , flags = re .MULTILINE | re .IGNORECASE )
348+ result = ser .str .contains (pat )
302349 expected = Series ([False , True , True ], dtype = expected_dtype )
303350 tm .assert_series_equal (result , expected )
304351
@@ -837,14 +884,36 @@ def test_match_case_kwarg(any_string_dtype):
837884
838885def test_match_compiled_regex (any_string_dtype ):
839886 # GH#61952
840- values = Series (["ab" , "AB" , "abc" , "ABC" ], dtype = any_string_dtype )
841- result = values .str .match (re .compile (r"ab" ), case = False )
842887 expected_dtype = (
843888 np .bool_ if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
844889 )
890+
891+ values = Series (["ab" , "AB" , "abc" , "ABC" ], dtype = any_string_dtype )
892+
893+ result = values .str .match (re .compile ("ab" ))
894+ expected = Series ([True , False , True , False ], dtype = expected_dtype )
895+ tm .assert_series_equal (result , expected )
896+
897+ # TODO this currently works for pyarrow-backed dtypes but raises for python
898+ if any_string_dtype == "string" and any_string_dtype .storage == "pyarrow" :
899+ result = values .str .match (re .compile ("ab" ), case = False )
900+ expected = Series ([True , True , True , True ], dtype = expected_dtype )
901+ tm .assert_series_equal (result , expected )
902+ else :
903+ with pytest .raises (
904+ ValueError , match = "cannot process flags argument with a compiled pattern"
905+ ):
906+ values .str .match (re .compile ("ab" ), case = False )
907+
908+ result = values .str .match (re .compile ("ab" , flags = re .IGNORECASE ))
845909 expected = Series ([True , True , True , True ], dtype = expected_dtype )
846910 tm .assert_series_equal (result , expected )
847911
912+ with pytest .raises (
913+ ValueError , match = "cannot process flags argument with a compiled pattern"
914+ ):
915+ values .str .match (re .compile ("ab" ), flags = re .IGNORECASE )
916+
848917
849918# --------------------------------------------------------------------------------------
850919# str.fullmatch
@@ -917,14 +986,36 @@ def test_fullmatch_case_kwarg(any_string_dtype):
917986
918987def test_fullmatch_compiled_regex (any_string_dtype ):
919988 # GH#61952
920- values = Series (["ab" , "AB" , "abc" , "ABC" ], dtype = any_string_dtype )
921- result = values .str .fullmatch (re .compile (r"ab" ), case = False )
922989 expected_dtype = (
923990 np .bool_ if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
924991 )
992+
993+ values = Series (["ab" , "AB" , "abc" , "ABC" ], dtype = any_string_dtype )
994+
995+ result = values .str .fullmatch (re .compile ("ab" ))
996+ expected = Series ([True , False , False , False ], dtype = expected_dtype )
997+ tm .assert_series_equal (result , expected )
998+
999+ # TODO this currently works for pyarrow-backed dtypes but raises for python
1000+ if any_string_dtype == "string" and any_string_dtype .storage == "pyarrow" :
1001+ result = values .str .fullmatch (re .compile ("ab" ), case = False )
1002+ expected = Series ([True , True , False , False ], dtype = expected_dtype )
1003+ tm .assert_series_equal (result , expected )
1004+ else :
1005+ with pytest .raises (
1006+ ValueError , match = "cannot process flags argument with a compiled pattern"
1007+ ):
1008+ values .str .fullmatch (re .compile ("ab" ), case = False )
1009+
1010+ result = values .str .fullmatch (re .compile ("ab" , flags = re .IGNORECASE ))
9251011 expected = Series ([True , True , False , False ], dtype = expected_dtype )
9261012 tm .assert_series_equal (result , expected )
9271013
1014+ with pytest .raises (
1015+ ValueError , match = "cannot process flags argument with a compiled pattern"
1016+ ):
1017+ values .str .fullmatch (re .compile ("ab" ), flags = re .IGNORECASE )
1018+
9281019
9291020# --------------------------------------------------------------------------------------
9301021# str.findall
0 commit comments