Skip to content

Commit 235e6ff

Browse files
authored
TST: Speed up slow tests (#62885)
1 parent 4c71a22 commit 235e6ff

File tree

10 files changed

+38
-102
lines changed

10 files changed

+38
-102
lines changed

pandas/tests/frame/methods/test_to_csv.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -836,9 +836,9 @@ def test_to_csv_dups_cols2(self, temp_file):
836836
result = result.rename(columns={"a.1": "a"})
837837
tm.assert_frame_equal(result, df)
838838

839-
@pytest.mark.parametrize("chunksize", [10000, 50000, 100000])
839+
@pytest.mark.parametrize("chunksize", [1, 5, 10])
840840
def test_to_csv_chunking(self, chunksize, temp_file):
841-
aa = DataFrame({"A": range(100000)})
841+
aa = DataFrame({"A": range(10)})
842842
aa["B"] = aa.A + 1.0
843843
aa["C"] = aa.A + 2.0
844844
aa["D"] = aa.A + 3.0

pandas/tests/groupby/test_groupby_dropna.py

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -394,8 +394,20 @@ def test_groupby_drop_nan_with_multi_index():
394394
tm.assert_frame_equal(result, expected)
395395

396396

397-
# sequence_index enumerates all strings made up of x, y, z of length 4
398-
@pytest.mark.parametrize("sequence_index", range(3**4))
397+
# y >x and z is the missing value
398+
@pytest.mark.parametrize(
399+
"sequence",
400+
[
401+
"xyzy",
402+
"xxyz",
403+
"yzxz",
404+
"zzzz",
405+
"zyzx",
406+
"yyyy",
407+
"zzxy",
408+
"xyxy",
409+
],
410+
)
399411
@pytest.mark.parametrize(
400412
"dtype",
401413
[
@@ -419,15 +431,9 @@ def test_groupby_drop_nan_with_multi_index():
419431
],
420432
)
421433
@pytest.mark.parametrize("test_series", [True, False])
422-
def test_no_sort_keep_na(sequence_index, dtype, test_series, as_index):
434+
def test_no_sort_keep_na(sequence, dtype, test_series, as_index):
423435
# GH#46584, GH#48794
424436

425-
# Convert sequence_index into a string sequence, e.g. 5 becomes "xxyz"
426-
# This sequence is used for the grouper.
427-
sequence = "".join(
428-
[{0: "x", 1: "y", 2: "z"}[sequence_index // (3**k) % 3] for k in range(4)]
429-
)
430-
431437
# Unique values to use for grouper, depends on dtype
432438
if dtype in ("string", "string[pyarrow]"):
433439
uniques = {"x": "x", "y": "y", "z": pd.NA}

pandas/tests/indexes/datetimes/methods/test_tz_localize.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -149,23 +149,23 @@ def test_dti_tz_localize_pass_dates_to_utc(self, tzstr):
149149
@pytest.mark.parametrize("prefix", ["", "dateutil/"])
150150
def test_dti_tz_localize(self, prefix):
151151
tzstr = prefix + "US/Eastern"
152-
dti = date_range(start="1/1/2005", end="1/1/2005 0:00:30.256", freq="ms")
152+
dti = date_range(start="1/1/2005", end="1/1/2005 0:00:02.256", freq="ms")
153153
dti2 = dti.tz_localize(tzstr)
154154

155155
dti_utc = date_range(
156-
start="1/1/2005 05:00", end="1/1/2005 5:00:30.256", freq="ms", tz="utc"
156+
start="1/1/2005 05:00", end="1/1/2005 5:00:02.256", freq="ms", tz="utc"
157157
)
158158

159159
tm.assert_numpy_array_equal(dti2.values, dti_utc.values)
160160

161161
dti3 = dti2.tz_convert(prefix + "US/Pacific")
162162
tm.assert_numpy_array_equal(dti3.values, dti_utc.values)
163163

164-
dti = date_range(start="11/6/2011 1:59", end="11/6/2011 2:00", freq="ms")
164+
dti = date_range(start="11/6/2011 1:59:59", end="11/6/2011 2:00", freq="ms")
165165
with pytest.raises(ValueError, match="Cannot infer dst time"):
166166
dti.tz_localize(tzstr)
167167

168-
dti = date_range(start="3/13/2011 1:59", end="3/13/2011 2:00", freq="ms")
168+
dti = date_range(start="3/13/2011 1:59:59", end="3/13/2011 2:00", freq="ms")
169169
with pytest.raises(ValueError, match="2011-03-13 02:00:00"):
170170
dti.tz_localize(tzstr)
171171

pandas/tests/indexing/test_coercion.py

Lines changed: 3 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
datetime,
55
timedelta,
66
)
7-
import itertools
87

98
import numpy as np
109
import pytest
@@ -23,36 +22,6 @@
2322
###############################################################
2423

2524

26-
@pytest.fixture(autouse=True, scope="class")
27-
def check_comprehensiveness(request):
28-
# Iterate over combination of dtype, method and klass
29-
# and ensure that each are contained within a collected test
30-
cls = request.cls
31-
combos = itertools.product(cls.klasses, cls.dtypes, [cls.method])
32-
33-
def has_test(combo):
34-
klass, dtype, method = combo
35-
cls_funcs = request.node.session.items
36-
return any(
37-
klass in x.name and dtype in x.name and method in x.name for x in cls_funcs
38-
)
39-
40-
opts = request.config.option
41-
if opts.lf or opts.keyword:
42-
# If we are running with "last-failed" or -k foo, we expect to only
43-
# run a subset of tests.
44-
yield
45-
46-
else:
47-
for combo in combos:
48-
if not has_test(combo):
49-
raise AssertionError(
50-
f"test method is not defined: {cls.__name__}, {combo}"
51-
)
52-
53-
yield
54-
55-
5625
class CoercionBase:
5726
klasses = ["index", "series"]
5827
dtypes = [
@@ -541,10 +510,6 @@ class TestFillnaSeriesCoercion(CoercionBase):
541510

542511
method = "fillna"
543512

544-
@pytest.mark.xfail(reason="Test not implemented")
545-
def test_has_comprehensive_tests(self):
546-
raise NotImplementedError
547-
548513
def _assert_fillna_conversion(self, original, value, expected, expected_dtype):
549514
"""test coercion triggered by fillna"""
550515
target = original.copy()
@@ -823,7 +788,7 @@ def replacer(self, how, from_key, to_key):
823788
raise ValueError
824789
return replacer
825790

826-
def test_replace_series(self, how, to_key, from_key, replacer):
791+
def test_replace_series(self, to_key, from_key, replacer):
827792
index = pd.Index([3, 4], name="xxx")
828793
obj = pd.Series(self.rep[from_key], index=index, name="yyy")
829794
obj = obj.astype(from_key)
@@ -860,7 +825,7 @@ def test_replace_series(self, how, to_key, from_key, replacer):
860825
"from_key", ["datetime64[ns, UTC]", "datetime64[ns, US/Eastern]"], indirect=True
861826
)
862827
def test_replace_series_datetime_tz(
863-
self, how, to_key, from_key, replacer, using_infer_string
828+
self, to_key, from_key, replacer, using_infer_string
864829
):
865830
index = pd.Index([3, 4], name="xyz")
866831
obj = pd.Series(self.rep[from_key], index=index, name="yyy").dt.as_unit("ns")
@@ -885,7 +850,7 @@ def test_replace_series_datetime_tz(
885850
["datetime64[ns]", "datetime64[ns, UTC]", "datetime64[ns, US/Eastern]"],
886851
indirect=True,
887852
)
888-
def test_replace_series_datetime_datetime(self, how, to_key, from_key, replacer):
853+
def test_replace_series_datetime_datetime(self, to_key, from_key, replacer):
889854
index = pd.Index([3, 4], name="xyz")
890855
obj = pd.Series(self.rep[from_key], index=index, name="yyy").dt.as_unit("ns")
891856
assert obj.dtype == from_key

pandas/tests/indexing/test_loc.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1027,6 +1027,7 @@ def test_loc_non_unique(self):
10271027
tm.assert_frame_equal(result, expected)
10281028

10291029
@pytest.mark.arm_slow
1030+
@pytest.mark.slow
10301031
@pytest.mark.parametrize("length, l2", [[900, 100], [900000, 100000]])
10311032
def test_loc_non_unique_memory_error(self, length, l2):
10321033
# GH 4280

pandas/tests/io/test_stata.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1340,25 +1340,26 @@ def _convert_categorical(from_frame: DataFrame) -> DataFrame:
13401340
return from_frame
13411341

13421342
def test_iterator(self, datapath):
1343-
fname = datapath("io", "data", "stata", "stata3_117.dta")
1343+
fname = datapath("io", "data", "stata", "stata12_117.dta")
13441344

13451345
parsed = read_stata(fname)
1346+
expected = parsed.iloc[0:5, :]
13461347

13471348
with read_stata(fname, iterator=True) as itr:
13481349
chunk = itr.read(5)
1349-
tm.assert_frame_equal(parsed.iloc[0:5, :], chunk)
1350+
tm.assert_frame_equal(expected, chunk)
13501351

13511352
with read_stata(fname, chunksize=5) as itr:
1352-
chunk = list(itr)
1353-
tm.assert_frame_equal(parsed.iloc[0:5, :], chunk[0])
1353+
chunk = next(itr)
1354+
tm.assert_frame_equal(expected, chunk)
13541355

13551356
with read_stata(fname, iterator=True) as itr:
13561357
chunk = itr.get_chunk(5)
1357-
tm.assert_frame_equal(parsed.iloc[0:5, :], chunk)
1358+
tm.assert_frame_equal(expected, chunk)
13581359

13591360
with read_stata(fname, chunksize=5) as itr:
13601361
chunk = itr.get_chunk()
1361-
tm.assert_frame_equal(parsed.iloc[0:5, :], chunk)
1362+
tm.assert_frame_equal(expected, chunk)
13621363

13631364
# GH12153
13641365
with read_stata(fname, chunksize=4) as itr:

pandas/tests/resample/test_base.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -98,9 +98,9 @@ def test_asfreq_fill_value(index):
9898
@pytest.mark.parametrize(
9999
"index",
100100
[
101-
timedelta_range("1 day", "10 day", freq="D"),
102-
date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D"),
103-
period_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D"),
101+
timedelta_range("1 day", "3 day", freq="D"),
102+
date_range(datetime(2005, 1, 1), datetime(2005, 1, 3), freq="D"),
103+
period_range(datetime(2005, 1, 1), datetime(2005, 1, 3), freq="D"),
104104
],
105105
)
106106
def test_resample_interpolate(index):

pandas/tests/test_sorting.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ def test_int64_overflow_groupby_large_range(self):
8989
grouped = data.groupby(["a", "b", "c", "d"])
9090
assert len(grouped) == len(values)
9191

92+
@pytest.mark.slow
9293
@pytest.mark.parametrize("agg", ["mean", "median"])
9394
def test_int64_overflow_groupby_large_df_shuffled(self, agg):
9495
rs = np.random.default_rng(2)

pandas/tests/tslibs/test_conversion.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,8 @@ def test_tz_localize_to_utc_copies():
6868

6969
def test_tz_convert_single_matches_tz_convert_hourly(tz_aware_fixture):
7070
tz = tz_aware_fixture
71-
tz_didx = date_range("2014-03-01", "2015-01-10", freq="h", tz=tz)
72-
naive_didx = date_range("2014-03-01", "2015-01-10", freq="h")
71+
tz_didx = date_range("2014-03-01", "2014-04-01", freq="h", tz=tz)
72+
naive_didx = date_range("2014-03-01", "2014-04-01", freq="h")
7373

7474
_compare_utc_to_local(tz_didx)
7575
_compare_local_to_utc(tz_didx, naive_didx)

pandas/tests/tslibs/test_parsing.py

Lines changed: 0 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
import re
77

88
from dateutil.parser import parse as du_parse
9-
from hypothesis import given
109
import numpy as np
1110
import pytest
1211

@@ -30,7 +29,6 @@
3029
option_context,
3130
)
3231
import pandas._testing as tm
33-
from pandas._testing._hypothesis import DATETIME_NO_TZ
3432

3533

3634
@pytest.mark.skipif(WASM, reason="tzset is not available on WASM")
@@ -391,42 +389,6 @@ def _helper_hypothesis_delimited_date(call, date_string, **kwargs):
391389
return msg, result
392390

393391

394-
@pytest.mark.slow
395-
@given(DATETIME_NO_TZ)
396-
@pytest.mark.parametrize("delimiter", list(" -./"))
397-
@pytest.mark.parametrize("dayfirst", [True, False])
398-
@pytest.mark.parametrize(
399-
"date_format",
400-
["%d %m %Y", "%m %d %Y", "%m %Y", "%Y %m %d", "%y %m %d", "%Y%m%d", "%y%m%d"],
401-
)
402-
def test_hypothesis_delimited_date(
403-
request, date_format, dayfirst, delimiter, test_datetime
404-
):
405-
if date_format == "%m %Y" and delimiter == ".":
406-
request.applymarker(
407-
pytest.mark.xfail(
408-
reason="parse_datetime_string cannot reliably tell whether "
409-
"e.g. %m.%Y is a float or a date",
410-
strict=False,
411-
)
412-
)
413-
date_string = test_datetime.strftime(date_format.replace(" ", delimiter))
414-
415-
except_out_dateutil, result = _helper_hypothesis_delimited_date(
416-
parsing.py_parse_datetime_string, date_string, dayfirst=dayfirst
417-
)
418-
except_in_dateutil, expected = _helper_hypothesis_delimited_date(
419-
du_parse,
420-
date_string,
421-
default=datetime(1, 1, 1),
422-
dayfirst=dayfirst,
423-
yearfirst=False,
424-
)
425-
426-
assert except_out_dateutil == except_in_dateutil
427-
assert result == expected
428-
429-
430392
@pytest.mark.parametrize("input", ["21-01-01", "01-01-21"])
431393
@pytest.mark.parametrize("dayfirst", [True, False])
432394
def test_parse_datetime_string_with_reso_dayfirst(dayfirst, input):

0 commit comments

Comments
 (0)