TST: Speed up slow tests (#62885)

mroeschke · web-flow · commit 235e6ff41c14 · 2025-10-28T23:32:19.000Z
diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py
@@ -836,9 +836,9 @@ def test_to_csv_dups_cols2(self, temp_file):
         result = result.rename(columns={"a.1": "a"})
         tm.assert_frame_equal(result, df)
 
-    @pytest.mark.parametrize("chunksize", [10000, 50000, 100000])
+    @pytest.mark.parametrize("chunksize", [1, 5, 10])
     def test_to_csv_chunking(self, chunksize, temp_file):
-        aa = DataFrame({"A": range(100000)})
+        aa = DataFrame({"A": range(10)})
         aa["B"] = aa.A + 1.0
         aa["C"] = aa.A + 2.0
         aa["D"] = aa.A + 3.0
diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py
@@ -394,8 +394,20 @@ def test_groupby_drop_nan_with_multi_index():
     tm.assert_frame_equal(result, expected)
 
 
-# sequence_index enumerates all strings made up of x, y, z of length 4
-@pytest.mark.parametrize("sequence_index", range(3**4))
+# y >x and z is the missing value
+@pytest.mark.parametrize(
+    "sequence",
+    [
+        "xyzy",
+        "xxyz",
+        "yzxz",
+        "zzzz",
+        "zyzx",
+        "yyyy",
+        "zzxy",
+        "xyxy",
+    ],
+)
 @pytest.mark.parametrize(
     "dtype",
     [
@@ -419,15 +431,9 @@ def test_groupby_drop_nan_with_multi_index():
     ],
 )
 @pytest.mark.parametrize("test_series", [True, False])
-def test_no_sort_keep_na(sequence_index, dtype, test_series, as_index):
+def test_no_sort_keep_na(sequence, dtype, test_series, as_index):
     # GH#46584, GH#48794
 
-    # Convert sequence_index into a string sequence, e.g. 5 becomes "xxyz"
-    # This sequence is used for the grouper.
-    sequence = "".join(
-        [{0: "x", 1: "y", 2: "z"}[sequence_index // (3**k) % 3] for k in range(4)]
-    )
-
     # Unique values to use for grouper, depends on dtype
     if dtype in ("string", "string[pyarrow]"):
         uniques = {"x": "x", "y": "y", "z": pd.NA}
diff --git a/pandas/tests/indexes/datetimes/methods/test_tz_localize.py b/pandas/tests/indexes/datetimes/methods/test_tz_localize.py
@@ -149,23 +149,23 @@ def test_dti_tz_localize_pass_dates_to_utc(self, tzstr):
     @pytest.mark.parametrize("prefix", ["", "dateutil/"])
     def test_dti_tz_localize(self, prefix):
         tzstr = prefix + "US/Eastern"
-        dti = date_range(start="1/1/2005", end="1/1/2005 0:00:30.256", freq="ms")
+        dti = date_range(start="1/1/2005", end="1/1/2005 0:00:02.256", freq="ms")
         dti2 = dti.tz_localize(tzstr)
 
         dti_utc = date_range(
-            start="1/1/2005 05:00", end="1/1/2005 5:00:30.256", freq="ms", tz="utc"
+            start="1/1/2005 05:00", end="1/1/2005 5:00:02.256", freq="ms", tz="utc"
         )
 
         tm.assert_numpy_array_equal(dti2.values, dti_utc.values)
 
         dti3 = dti2.tz_convert(prefix + "US/Pacific")
         tm.assert_numpy_array_equal(dti3.values, dti_utc.values)
 
-        dti = date_range(start="11/6/2011 1:59", end="11/6/2011 2:00", freq="ms")
+        dti = date_range(start="11/6/2011 1:59:59", end="11/6/2011 2:00", freq="ms")
         with pytest.raises(ValueError, match="Cannot infer dst time"):
             dti.tz_localize(tzstr)
 
-        dti = date_range(start="3/13/2011 1:59", end="3/13/2011 2:00", freq="ms")
+        dti = date_range(start="3/13/2011 1:59:59", end="3/13/2011 2:00", freq="ms")
         with pytest.raises(ValueError, match="2011-03-13 02:00:00"):
             dti.tz_localize(tzstr)
 
diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py
@@ -4,7 +4,6 @@
     datetime,
     timedelta,
 )
-import itertools
 
 import numpy as np
 import pytest
@@ -23,36 +22,6 @@
 ###############################################################
 
 
-@pytest.fixture(autouse=True, scope="class")
-def check_comprehensiveness(request):
-    # Iterate over combination of dtype, method and klass
-    # and ensure that each are contained within a collected test
-    cls = request.cls
-    combos = itertools.product(cls.klasses, cls.dtypes, [cls.method])
-
-    def has_test(combo):
-        klass, dtype, method = combo
-        cls_funcs = request.node.session.items
-        return any(
-            klass in x.name and dtype in x.name and method in x.name for x in cls_funcs
-        )
-
-    opts = request.config.option
-    if opts.lf or opts.keyword:
-        # If we are running with "last-failed" or -k foo, we expect to only
-        #  run a subset of tests.
-        yield
-
-    else:
-        for combo in combos:
-            if not has_test(combo):
-                raise AssertionError(
-                    f"test method is not defined: {cls.__name__}, {combo}"
-                )
-
-        yield
-
-
 class CoercionBase:
     klasses = ["index", "series"]
     dtypes = [
@@ -541,10 +510,6 @@ class TestFillnaSeriesCoercion(CoercionBase):
 
     method = "fillna"
 
-    @pytest.mark.xfail(reason="Test not implemented")
-    def test_has_comprehensive_tests(self):
-        raise NotImplementedError
-
     def _assert_fillna_conversion(self, original, value, expected, expected_dtype):
         """test coercion triggered by fillna"""
         target = original.copy()
@@ -823,7 +788,7 @@ def replacer(self, how, from_key, to_key):
             raise ValueError
         return replacer
 
-    def test_replace_series(self, how, to_key, from_key, replacer):
+    def test_replace_series(self, to_key, from_key, replacer):
         index = pd.Index([3, 4], name="xxx")
         obj = pd.Series(self.rep[from_key], index=index, name="yyy")
         obj = obj.astype(from_key)
@@ -860,7 +825,7 @@ def test_replace_series(self, how, to_key, from_key, replacer):
         "from_key", ["datetime64[ns, UTC]", "datetime64[ns, US/Eastern]"], indirect=True
     )
     def test_replace_series_datetime_tz(
-        self, how, to_key, from_key, replacer, using_infer_string
+        self, to_key, from_key, replacer, using_infer_string
     ):
         index = pd.Index([3, 4], name="xyz")
         obj = pd.Series(self.rep[from_key], index=index, name="yyy").dt.as_unit("ns")
@@ -885,7 +850,7 @@ def test_replace_series_datetime_tz(
         ["datetime64[ns]", "datetime64[ns, UTC]", "datetime64[ns, US/Eastern]"],
         indirect=True,
     )
-    def test_replace_series_datetime_datetime(self, how, to_key, from_key, replacer):
+    def test_replace_series_datetime_datetime(self, to_key, from_key, replacer):
         index = pd.Index([3, 4], name="xyz")
         obj = pd.Series(self.rep[from_key], index=index, name="yyy").dt.as_unit("ns")
         assert obj.dtype == from_key
diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py
@@ -1027,6 +1027,7 @@ def test_loc_non_unique(self):
         tm.assert_frame_equal(result, expected)
 
     @pytest.mark.arm_slow
+    @pytest.mark.slow
     @pytest.mark.parametrize("length, l2", [[900, 100], [900000, 100000]])
     def test_loc_non_unique_memory_error(self, length, l2):
         # GH 4280
diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py
@@ -1340,25 +1340,26 @@ def _convert_categorical(from_frame: DataFrame) -> DataFrame:
         return from_frame
 
     def test_iterator(self, datapath):
-        fname = datapath("io", "data", "stata", "stata3_117.dta")
+        fname = datapath("io", "data", "stata", "stata12_117.dta")
 
         parsed = read_stata(fname)
+        expected = parsed.iloc[0:5, :]
 
         with read_stata(fname, iterator=True) as itr:
             chunk = itr.read(5)
-            tm.assert_frame_equal(parsed.iloc[0:5, :], chunk)
+            tm.assert_frame_equal(expected, chunk)
 
         with read_stata(fname, chunksize=5) as itr:
-            chunk = list(itr)
-            tm.assert_frame_equal(parsed.iloc[0:5, :], chunk[0])
+            chunk = next(itr)
+            tm.assert_frame_equal(expected, chunk)
 
         with read_stata(fname, iterator=True) as itr:
             chunk = itr.get_chunk(5)
-            tm.assert_frame_equal(parsed.iloc[0:5, :], chunk)
+            tm.assert_frame_equal(expected, chunk)
 
         with read_stata(fname, chunksize=5) as itr:
             chunk = itr.get_chunk()
-            tm.assert_frame_equal(parsed.iloc[0:5, :], chunk)
+            tm.assert_frame_equal(expected, chunk)
 
         # GH12153
         with read_stata(fname, chunksize=4) as itr:
diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py
@@ -98,9 +98,9 @@ def test_asfreq_fill_value(index):
 @pytest.mark.parametrize(
     "index",
     [
-        timedelta_range("1 day", "10 day", freq="D"),
-        date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D"),
-        period_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D"),
+        timedelta_range("1 day", "3 day", freq="D"),
+        date_range(datetime(2005, 1, 1), datetime(2005, 1, 3), freq="D"),
+        period_range(datetime(2005, 1, 1), datetime(2005, 1, 3), freq="D"),
     ],
 )
 def test_resample_interpolate(index):
diff --git a/pandas/tests/test_sorting.py b/pandas/tests/test_sorting.py
@@ -89,6 +89,7 @@ def test_int64_overflow_groupby_large_range(self):
         grouped = data.groupby(["a", "b", "c", "d"])
         assert len(grouped) == len(values)
 
+    @pytest.mark.slow
     @pytest.mark.parametrize("agg", ["mean", "median"])
     def test_int64_overflow_groupby_large_df_shuffled(self, agg):
         rs = np.random.default_rng(2)
diff --git a/pandas/tests/tslibs/test_conversion.py b/pandas/tests/tslibs/test_conversion.py
@@ -68,8 +68,8 @@ def test_tz_localize_to_utc_copies():
 
 def test_tz_convert_single_matches_tz_convert_hourly(tz_aware_fixture):
     tz = tz_aware_fixture
-    tz_didx = date_range("2014-03-01", "2015-01-10", freq="h", tz=tz)
-    naive_didx = date_range("2014-03-01", "2015-01-10", freq="h")
+    tz_didx = date_range("2014-03-01", "2014-04-01", freq="h", tz=tz)
+    naive_didx = date_range("2014-03-01", "2014-04-01", freq="h")
 
     _compare_utc_to_local(tz_didx)
     _compare_local_to_utc(tz_didx, naive_didx)
diff --git a/pandas/tests/tslibs/test_parsing.py b/pandas/tests/tslibs/test_parsing.py
@@ -6,7 +6,6 @@
 import re
 
 from dateutil.parser import parse as du_parse
-from hypothesis import given
 import numpy as np
 import pytest
 
@@ -30,7 +29,6 @@
     option_context,
 )
 import pandas._testing as tm
-from pandas._testing._hypothesis import DATETIME_NO_TZ
 
 
 @pytest.mark.skipif(WASM, reason="tzset is not available on WASM")
@@ -391,42 +389,6 @@ def _helper_hypothesis_delimited_date(call, date_string, **kwargs):
     return msg, result
 
 
-@pytest.mark.slow
-@given(DATETIME_NO_TZ)
-@pytest.mark.parametrize("delimiter", list(" -./"))
-@pytest.mark.parametrize("dayfirst", [True, False])
-@pytest.mark.parametrize(
-    "date_format",
-    ["%d %m %Y", "%m %d %Y", "%m %Y", "%Y %m %d", "%y %m %d", "%Y%m%d", "%y%m%d"],
-)
-def test_hypothesis_delimited_date(
-    request, date_format, dayfirst, delimiter, test_datetime
-):
-    if date_format == "%m %Y" and delimiter == ".":
-        request.applymarker(
-            pytest.mark.xfail(
-                reason="parse_datetime_string cannot reliably tell whether "
-                "e.g. %m.%Y is a float or a date",
-                strict=False,
-            )
-        )
-    date_string = test_datetime.strftime(date_format.replace(" ", delimiter))
-
-    except_out_dateutil, result = _helper_hypothesis_delimited_date(
-        parsing.py_parse_datetime_string, date_string, dayfirst=dayfirst
-    )
-    except_in_dateutil, expected = _helper_hypothesis_delimited_date(
-        du_parse,
-        date_string,
-        default=datetime(1, 1, 1),
-        dayfirst=dayfirst,
-        yearfirst=False,
-    )
-
-    assert except_out_dateutil == except_in_dateutil
-    assert result == expected
-
-
 @pytest.mark.parametrize("input", ["21-01-01", "01-01-21"])
 @pytest.mark.parametrize("dayfirst", [True, False])
 def test_parse_datetime_string_with_reso_dayfirst(dayfirst, input):

Original file line number	Diff line number	Diff line change
`@@ -98,9 +98,9 @@ def test_asfreq_fill_value(index):`
`98`	`98`	`@pytest.mark.parametrize(`
`99`	`99`	`"index",`
`100`	`100`	`[`
`101`		`- timedelta_range("1 day", "10 day", freq="D"),`
`102`		`- date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D"),`
`103`		`- period_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D"),`
	`101`	`+ timedelta_range("1 day", "3 day", freq="D"),`
	`102`	`+ date_range(datetime(2005, 1, 1), datetime(2005, 1, 3), freq="D"),`
	`103`	`+ period_range(datetime(2005, 1, 1), datetime(2005, 1, 3), freq="D"),`
`104`	`104`	`],`
`105`	`105`	`)`
`106`	`106`	`def test_resample_interpolate(index):`