2323 BytesIO ,
2424 StringIO ,
2525)
26+ from itertools import combinations
2627import operator
2728import pickle
2829import re
@@ -1933,13 +1934,18 @@ def test_str_fullmatch(pat, case, na, exp):
19331934
19341935
19351936@pytest .mark .parametrize (
1936- "sub, start, end, exp, exp_typ" ,
1937- [["ab" , 0 , None , [0 , None ], pa .int32 ()], ["bc" , 1 , 3 , [1 , None ], pa .int64 ()]],
1937+ "sub, start, end, exp, exp_type" ,
1938+ [
1939+ ["ab" , 0 , None , [0 , None ], pa .int32 ()],
1940+ ["bc" , 1 , 3 , [1 , None ], pa .int64 ()],
1941+ ["ab" , 1 , 3 , [- 1 , None ], pa .int64 ()],
1942+ ["ab" , - 3 , - 3 , [- 1 , None ], pa .int64 ()],
1943+ ],
19381944)
1939- def test_str_find (sub , start , end , exp , exp_typ ):
1945+ def test_str_find (sub , start , end , exp , exp_type ):
19401946 ser = pd .Series (["abc" , None ], dtype = ArrowDtype (pa .string ()))
19411947 result = ser .str .find (sub , start = start , end = end )
1942- expected = pd .Series (exp , dtype = ArrowDtype (exp_typ ))
1948+ expected = pd .Series (exp , dtype = ArrowDtype (exp_type ))
19431949 tm .assert_series_equal (result , expected )
19441950
19451951
@@ -1951,10 +1957,70 @@ def test_str_find_negative_start():
19511957 tm .assert_series_equal (result , expected )
19521958
19531959
1954- def test_str_find_notimplemented ():
1960+ def test_str_find_no_end ():
19551961 ser = pd .Series (["abc" , None ], dtype = ArrowDtype (pa .string ()))
1956- with pytest .raises (NotImplementedError , match = "find not implemented" ):
1957- ser .str .find ("ab" , start = 1 )
1962+ if pa_version_under13p0 :
1963+ # https://github.com/apache/arrow/issues/36311
1964+ with pytest .raises (pa .lib .ArrowInvalid , match = "Negative buffer resize" ):
1965+ ser .str .find ("ab" , start = 1 )
1966+ else :
1967+ result = ser .str .find ("ab" , start = 1 )
1968+ expected = pd .Series ([- 1 , None ], dtype = "int64[pyarrow]" )
1969+ tm .assert_series_equal (result , expected )
1970+
1971+
1972+ def test_str_find_negative_start_negative_end ():
1973+ # GH 56791
1974+ ser = pd .Series (["abcdefg" , None ], dtype = ArrowDtype (pa .string ()))
1975+ result = ser .str .find (sub = "d" , start = - 6 , end = - 3 )
1976+ expected = pd .Series ([3 , None ], dtype = ArrowDtype (pa .int64 ()))
1977+ tm .assert_series_equal (result , expected )
1978+
1979+
1980+ def test_str_find_large_start ():
1981+ # GH 56791
1982+ ser = pd .Series (["abcdefg" , None ], dtype = ArrowDtype (pa .string ()))
1983+ if pa_version_under13p0 :
1984+ # https://github.com/apache/arrow/issues/36311
1985+ with pytest .raises (pa .lib .ArrowInvalid , match = "Negative buffer resize" ):
1986+ ser .str .find (sub = "d" , start = 16 )
1987+ else :
1988+ result = ser .str .find (sub = "d" , start = 16 )
1989+ expected = pd .Series ([- 1 , None ], dtype = ArrowDtype (pa .int64 ()))
1990+ tm .assert_series_equal (result , expected )
1991+
1992+
1993+ @pytest .mark .skipif (
1994+ pa_version_under13p0 , reason = "https://github.com/apache/arrow/issues/36311"
1995+ )
1996+ @pytest .mark .parametrize ("start" , list (range (- 15 , 15 )) + [None ])
1997+ @pytest .mark .parametrize ("end" , list (range (- 15 , 15 )) + [None ])
1998+ @pytest .mark .parametrize (
1999+ "sub" ,
2000+ ["abcaadef" [x :y ] for x , y in combinations (range (len ("abcaadef" ) + 1 ), r = 2 )]
2001+ + [
2002+ "" ,
2003+ "az" ,
2004+ "abce" ,
2005+ ],
2006+ )
2007+ def test_str_find_e2e (start , end , sub ):
2008+ s = pd .Series (
2009+ ["abcaadef" , "abc" , "abcdeddefgj8292" , "ab" , "a" , "" ],
2010+ dtype = ArrowDtype (pa .string ()),
2011+ )
2012+ object_series = s .astype (pd .StringDtype ())
2013+ result = s .str .find (sub , start , end )
2014+ expected = object_series .str .find (sub , start , end ).astype (result .dtype )
2015+ tm .assert_series_equal (result , expected )
2016+
2017+
2018+ def test_str_find_negative_start_negative_end_no_match ():
2019+ # GH 56791
2020+ ser = pd .Series (["abcdefg" , None ], dtype = ArrowDtype (pa .string ()))
2021+ result = ser .str .find (sub = "d" , start = - 3 , end = - 6 )
2022+ expected = pd .Series ([- 1 , None ], dtype = ArrowDtype (pa .int64 ()))
2023+ tm .assert_series_equal (result , expected )
19582024
19592025
19602026@pytest .mark .parametrize (
0 commit comments