44import numpy as np
55import pytest
66
7- from pandas ._config import using_string_dtype
8-
9- from pandas .compat import HAS_PYARROW
10- from pandas .errors import PerformanceWarning
117import pandas .util ._test_decorators as td
128
139import pandas as pd
2521# --------------------------------------------------------------------------------------
2622
2723
28- def using_pyarrow (dtype ):
29- return dtype == "string" and dtype .storage == "pyarrow"
30-
31-
3224def test_contains (any_string_dtype ):
3325 values = np .array (
3426 ["foo" , np .nan , "fooommm__foo" , "mmm_" , "foommm[_]+bar" ], dtype = np .object_
@@ -281,10 +273,13 @@ def test_contains_nan(any_string_dtype):
281273# --------------------------------------------------------------------------------------
282274
283275
284- @pytest .mark .xfail (
285- using_string_dtype () and not HAS_PYARROW , reason = "TODO(infer_string)" , strict = False
286- )
287- def test_startswith_endswith_validate_na (any_string_dtype ):
276+ def test_startswith_endswith_validate_na (request , any_string_dtype ):
277+ if (
278+ any_string_dtype == "string"
279+ and any_string_dtype .na_value is np .nan
280+ and any_string_dtype .storage == "python"
281+ ):
282+ request .applymarker (pytest .mark .xfail (reason = "TODO(infer_string)" ))
288283 # GH#59615
289284 ser = Series (
290285 ["om" , np .nan , "foo_nom" , "nom" , "bar_foo" , np .nan , "foo" ],
@@ -462,8 +457,7 @@ def test_replace_mixed_object():
462457def test_replace_unicode (any_string_dtype ):
463458 ser = Series ([b"abcd,\xc3 \xa0 " .decode ("utf-8" )], dtype = any_string_dtype )
464459 expected = Series ([b"abcd, \xc3 \xa0 " .decode ("utf-8" )], dtype = any_string_dtype )
465- with tm .maybe_produces_warning (PerformanceWarning , using_pyarrow (any_string_dtype )):
466- result = ser .str .replace (r"(?<=\w),(?=\w)" , ", " , flags = re .UNICODE , regex = True )
460+ result = ser .str .replace (r"(?<=\w),(?=\w)" , ", " , flags = re .UNICODE , regex = True )
467461 tm .assert_series_equal (result , expected )
468462
469463
@@ -483,8 +477,7 @@ def test_replace_callable(any_string_dtype):
483477
484478 # test with callable
485479 repl = lambda m : m .group (0 ).swapcase ()
486- with tm .maybe_produces_warning (PerformanceWarning , using_pyarrow (any_string_dtype )):
487- result = ser .str .replace ("[a-z][A-Z]{2}" , repl , n = 2 , regex = True )
480+ result = ser .str .replace ("[a-z][A-Z]{2}" , repl , n = 2 , regex = True )
488481 expected = Series (["foObaD__baRbaD" , np .nan ], dtype = any_string_dtype )
489482 tm .assert_series_equal (result , expected )
490483
@@ -502,19 +495,15 @@ def test_replace_callable_raises(any_string_dtype, repl):
502495 r"(?(3)required )positional arguments?"
503496 )
504497 with pytest .raises (TypeError , match = msg ):
505- with tm .maybe_produces_warning (
506- PerformanceWarning , using_pyarrow (any_string_dtype )
507- ):
508- values .str .replace ("a" , repl , regex = True )
498+ values .str .replace ("a" , repl , regex = True )
509499
510500
511501def test_replace_callable_named_groups (any_string_dtype ):
512502 # test regex named groups
513503 ser = Series (["Foo Bar Baz" , np .nan ], dtype = any_string_dtype )
514504 pat = r"(?P<first>\w+) (?P<middle>\w+) (?P<last>\w+)"
515505 repl = lambda m : m .group ("middle" ).swapcase ()
516- with tm .maybe_produces_warning (PerformanceWarning , using_pyarrow (any_string_dtype )):
517- result = ser .str .replace (pat , repl , regex = True )
506+ result = ser .str .replace (pat , repl , regex = True )
518507 expected = Series (["bAR" , np .nan ], dtype = any_string_dtype )
519508 tm .assert_series_equal (result , expected )
520509
@@ -525,13 +514,11 @@ def test_replace_compiled_regex(any_string_dtype):
525514
526515 # test with compiled regex
527516 pat = re .compile (r"BAD_*" )
528- with tm .maybe_produces_warning (PerformanceWarning , using_pyarrow (any_string_dtype )):
529- result = ser .str .replace (pat , "" , regex = True )
517+ result = ser .str .replace (pat , "" , regex = True )
530518 expected = Series (["foobar" , np .nan ], dtype = any_string_dtype )
531519 tm .assert_series_equal (result , expected )
532520
533- with tm .maybe_produces_warning (PerformanceWarning , using_pyarrow (any_string_dtype )):
534- result = ser .str .replace (pat , "" , n = 1 , regex = True )
521+ result = ser .str .replace (pat , "" , n = 1 , regex = True )
535522 expected = Series (["foobarBAD" , np .nan ], dtype = any_string_dtype )
536523 tm .assert_series_equal (result , expected )
537524
@@ -552,8 +539,7 @@ def test_replace_compiled_regex_unicode(any_string_dtype):
552539 ser = Series ([b"abcd,\xc3 \xa0 " .decode ("utf-8" )], dtype = any_string_dtype )
553540 expected = Series ([b"abcd, \xc3 \xa0 " .decode ("utf-8" )], dtype = any_string_dtype )
554541 pat = re .compile (r"(?<=\w),(?=\w)" , flags = re .UNICODE )
555- with tm .maybe_produces_warning (PerformanceWarning , using_pyarrow (any_string_dtype )):
556- result = ser .str .replace (pat , ", " , regex = True )
542+ result = ser .str .replace (pat , ", " , regex = True )
557543 tm .assert_series_equal (result , expected )
558544
559545
@@ -580,8 +566,7 @@ def test_replace_compiled_regex_callable(any_string_dtype):
580566 ser = Series (["fooBAD__barBAD" , np .nan ], dtype = any_string_dtype )
581567 repl = lambda m : m .group (0 ).swapcase ()
582568 pat = re .compile ("[a-z][A-Z]{2}" )
583- with tm .maybe_produces_warning (PerformanceWarning , using_pyarrow (any_string_dtype )):
584- result = ser .str .replace (pat , repl , n = 2 , regex = True )
569+ result = ser .str .replace (pat , repl , n = 2 , regex = True )
585570 expected = Series (["foObaD__baRbaD" , np .nan ], dtype = any_string_dtype )
586571 tm .assert_series_equal (result , expected )
587572
@@ -629,8 +614,7 @@ def test_replace_moar(any_string_dtype):
629614 )
630615 tm .assert_series_equal (result , expected )
631616
632- with tm .maybe_produces_warning (PerformanceWarning , using_pyarrow (any_string_dtype )):
633- result = ser .str .replace ("A" , "YYY" , case = False )
617+ result = ser .str .replace ("A" , "YYY" , case = False )
634618 expected = Series (
635619 [
636620 "YYY" ,
@@ -648,8 +632,7 @@ def test_replace_moar(any_string_dtype):
648632 )
649633 tm .assert_series_equal (result , expected )
650634
651- with tm .maybe_produces_warning (PerformanceWarning , using_pyarrow (any_string_dtype )):
652- result = ser .str .replace ("^.a|dog" , "XX-XX " , case = False , regex = True )
635+ result = ser .str .replace ("^.a|dog" , "XX-XX " , case = False , regex = True )
653636 expected = Series (
654637 [
655638 "A" ,
@@ -672,13 +655,11 @@ def test_replace_not_case_sensitive_not_regex(any_string_dtype):
672655 # https://github.com/pandas-dev/pandas/issues/41602
673656 ser = Series (["A." , "a." , "Ab" , "ab" , np .nan ], dtype = any_string_dtype )
674657
675- with tm .maybe_produces_warning (PerformanceWarning , using_pyarrow (any_string_dtype )):
676- result = ser .str .replace ("a" , "c" , case = False , regex = False )
658+ result = ser .str .replace ("a" , "c" , case = False , regex = False )
677659 expected = Series (["c." , "c." , "cb" , "cb" , np .nan ], dtype = any_string_dtype )
678660 tm .assert_series_equal (result , expected )
679661
680- with tm .maybe_produces_warning (PerformanceWarning , using_pyarrow (any_string_dtype )):
681- result = ser .str .replace ("a." , "c." , case = False , regex = False )
662+ result = ser .str .replace ("a." , "c." , case = False , regex = False )
682663 expected = Series (["c." , "c." , "Ab" , "ab" , np .nan ], dtype = any_string_dtype )
683664 tm .assert_series_equal (result , expected )
684665
@@ -850,8 +831,7 @@ def test_fullmatch_case_kwarg(any_string_dtype):
850831 result = ser .str .fullmatch ("ab" , case = False )
851832 tm .assert_series_equal (result , expected )
852833
853- with tm .maybe_produces_warning (PerformanceWarning , using_pyarrow (any_string_dtype )):
854- result = ser .str .fullmatch ("ab" , flags = re .IGNORECASE )
834+ result = ser .str .fullmatch ("ab" , flags = re .IGNORECASE )
855835 tm .assert_series_equal (result , expected )
856836
857837
@@ -1036,17 +1016,13 @@ def test_flags_kwarg(any_string_dtype):
10361016
10371017 pat = r"([A-Z0-9._%+-]+)@([A-Z0-9.-]+)\.([A-Z]{2,4})"
10381018
1039- use_pyarrow = using_pyarrow (any_string_dtype )
1040-
10411019 result = data .str .extract (pat , flags = re .IGNORECASE , expand = True )
10421020 assert result .iloc [0 ].tolist () == ["dave" , "google" , "com" ]
10431021
1044- with tm .maybe_produces_warning (PerformanceWarning , use_pyarrow ):
1045- result = data .str .match (pat , flags = re .IGNORECASE )
1022+ result = data .str .match (pat , flags = re .IGNORECASE )
10461023 assert result .iloc [0 ]
10471024
1048- with tm .maybe_produces_warning (PerformanceWarning , use_pyarrow ):
1049- result = data .str .fullmatch (pat , flags = re .IGNORECASE )
1025+ result = data .str .fullmatch (pat , flags = re .IGNORECASE )
10501026 assert result .iloc [0 ]
10511027
10521028 result = data .str .findall (pat , flags = re .IGNORECASE )
@@ -1056,8 +1032,6 @@ def test_flags_kwarg(any_string_dtype):
10561032 assert result .iloc [0 ] == 1
10571033
10581034 msg = "has match groups"
1059- with tm .assert_produces_warning (
1060- UserWarning , match = msg , raise_on_extra_warnings = not use_pyarrow
1061- ):
1035+ with tm .assert_produces_warning (UserWarning , match = msg ):
10621036 result = data .str .contains (pat , flags = re .IGNORECASE )
10631037 assert result .iloc [0 ]
0 commit comments