6565from pandas .core .arrays .arrow .extension_types import ArrowPeriodType
6666
6767
68+ def _require_timezone_database (request ):
69+ if is_platform_windows () and is_ci_environment ():
70+ mark = pytest .mark .xfail (
71+ raises = pa .ArrowInvalid ,
72+ reason = (
73+ "TODO: Set ARROW_TIMEZONE_DATABASE environment variable "
74+ "on CI to path to the tzdata for pyarrow."
75+ ),
76+ )
77+ request .node .add_marker (mark )
78+
79+
6880@pytest .fixture (params = tm .ALL_PYARROW_DTYPES , ids = str )
6981def dtype (request ):
7082 return ArrowDtype (pyarrow_dtype = request .param )
@@ -314,16 +326,8 @@ def test_from_sequence_of_strings_pa_array(self, data, request):
314326 )
315327 )
316328 elif pa .types .is_timestamp (pa_dtype ) and pa_dtype .tz is not None :
317- if is_platform_windows () and is_ci_environment ():
318- request .node .add_marker (
319- pytest .mark .xfail (
320- raises = pa .ArrowInvalid ,
321- reason = (
322- "TODO: Set ARROW_TIMEZONE_DATABASE environment variable "
323- "on CI to path to the tzdata for pyarrow."
324- ),
325- )
326- )
329+ _require_timezone_database (request )
330+
327331 pa_array = data ._pa_array .cast (pa .string ())
328332 result = type (data )._from_sequence_of_strings (pa_array , dtype = data .dtype )
329333 tm .assert_extension_array_equal (result , data )
@@ -795,20 +799,6 @@ def test_value_counts_returns_pyarrow_int64(self, data):
795799 result = data .value_counts ()
796800 assert result .dtype == ArrowDtype (pa .int64 ())
797801
798- def test_value_counts_with_normalize (self , data , request ):
799- data = data [:10 ].unique ()
800- values = np .array (data [~ data .isna ()])
801- ser = pd .Series (data , dtype = data .dtype )
802-
803- result = ser .value_counts (normalize = True ).sort_index ()
804-
805- expected = pd .Series (
806- [1 / len (values )] * len (values ), index = result .index , name = "proportion"
807- )
808- expected = expected .astype ("double[pyarrow]" )
809-
810- self .assert_series_equal (result , expected )
811-
812802 def test_argmin_argmax (
813803 self , data_for_sorting , data_missing_for_sorting , na_value , request
814804 ):
@@ -865,10 +855,6 @@ def test_combine_add(self, data_repeated, request):
865855 else :
866856 super ().test_combine_add (data_repeated )
867857
868- def test_basic_equals (self , data ):
869- # https://github.com/pandas-dev/pandas/issues/34660
870- assert pd .Series (data ).equals (pd .Series (data ))
871-
872858
873859class TestBaseArithmeticOps (base .BaseArithmeticOpsTests ):
874860 divmod_exc = NotImplementedError
@@ -2563,33 +2549,17 @@ def test_dt_isocalendar():
25632549)
25642550def test_dt_day_month_name (method , exp , request ):
25652551 # GH 52388
2566- if is_platform_windows () and is_ci_environment ():
2567- request .node .add_marker (
2568- pytest .mark .xfail (
2569- raises = pa .ArrowInvalid ,
2570- reason = (
2571- "TODO: Set ARROW_TIMEZONE_DATABASE environment variable "
2572- "on CI to path to the tzdata for pyarrow."
2573- ),
2574- )
2575- )
2552+ _require_timezone_database (request )
2553+
25762554 ser = pd .Series ([datetime (2023 , 1 , 1 ), None ], dtype = ArrowDtype (pa .timestamp ("ms" )))
25772555 result = getattr (ser .dt , method )()
25782556 expected = pd .Series ([exp , None ], dtype = ArrowDtype (pa .string ()))
25792557 tm .assert_series_equal (result , expected )
25802558
25812559
25822560def test_dt_strftime (request ):
2583- if is_platform_windows () and is_ci_environment ():
2584- request .node .add_marker (
2585- pytest .mark .xfail (
2586- raises = pa .ArrowInvalid ,
2587- reason = (
2588- "TODO: Set ARROW_TIMEZONE_DATABASE environment variable "
2589- "on CI to path to the tzdata for pyarrow."
2590- ),
2591- )
2592- )
2561+ _require_timezone_database (request )
2562+
25932563 ser = pd .Series (
25942564 [datetime (year = 2023 , month = 1 , day = 2 , hour = 3 ), None ],
25952565 dtype = ArrowDtype (pa .timestamp ("ns" )),
@@ -2700,16 +2670,8 @@ def test_dt_tz_localize_none():
27002670
27012671@pytest .mark .parametrize ("unit" , ["us" , "ns" ])
27022672def test_dt_tz_localize (unit , request ):
2703- if is_platform_windows () and is_ci_environment ():
2704- request .node .add_marker (
2705- pytest .mark .xfail (
2706- raises = pa .ArrowInvalid ,
2707- reason = (
2708- "TODO: Set ARROW_TIMEZONE_DATABASE environment variable "
2709- "on CI to path to the tzdata for pyarrow."
2710- ),
2711- )
2712- )
2673+ _require_timezone_database (request )
2674+
27132675 ser = pd .Series (
27142676 [datetime (year = 2023 , month = 1 , day = 2 , hour = 3 ), None ],
27152677 dtype = ArrowDtype (pa .timestamp (unit )),
@@ -2731,16 +2693,8 @@ def test_dt_tz_localize(unit, request):
27312693 ],
27322694)
27332695def test_dt_tz_localize_nonexistent (nonexistent , exp_date , request ):
2734- if is_platform_windows () and is_ci_environment ():
2735- request .node .add_marker (
2736- pytest .mark .xfail (
2737- raises = pa .ArrowInvalid ,
2738- reason = (
2739- "TODO: Set ARROW_TIMEZONE_DATABASE environment variable "
2740- "on CI to path to the tzdata for pyarrow."
2741- ),
2742- )
2743- )
2696+ _require_timezone_database (request )
2697+
27442698 ser = pd .Series (
27452699 [datetime (year = 2023 , month = 3 , day = 12 , hour = 2 , minute = 30 ), None ],
27462700 dtype = ArrowDtype (pa .timestamp ("ns" )),
0 commit comments