|
30 | 30 | Y = np.array([1, 0, 1, 0, 1, 1, 1, 1, 0, 1]) |
31 | 31 |
|
32 | 32 |
|
33 | | -@pytest.mark.parametrize("as_frame", [True, False], ids=['dataframe', 'array']) |
34 | | -def test_rus_fit_resample(as_frame): |
35 | | - if as_frame: |
36 | | - pd = pytest.importorskip("pandas") |
37 | | - X_ = pd.DataFrame(X) |
38 | | - else: |
39 | | - X_ = X |
40 | | - rus = RandomUnderSampler(random_state=RND_SEED, replacement=True) |
41 | | - X_resampled, y_resampled = rus.fit_resample(X_, Y) |
42 | | - |
43 | | - X_gt = np.array( |
44 | | - [ |
45 | | - [0.92923648, 0.76103773], |
46 | | - [0.47104475, 0.44386323], |
47 | | - [0.13347175, 0.12167502], |
48 | | - [0.09125309, -0.85409574], |
49 | | - [0.12372842, 0.6536186], |
50 | | - [0.04352327, -0.20515826], |
51 | | - ] |
52 | | - ) |
53 | | - y_gt = np.array([0, 0, 0, 1, 1, 1]) |
54 | | - |
55 | | - if as_frame: |
56 | | - assert hasattr(X_resampled, "loc") |
57 | | - X_resampled = X_resampled.to_numpy() |
58 | | - |
59 | | - assert_array_equal(X_resampled, X_gt) |
60 | | - assert_array_equal(y_resampled, y_gt) |
61 | | - |
62 | | - |
63 | | -def test_rus_fit_resample_half(): |
64 | | - sampling_strategy = {0: 3, 1: 6} |
65 | | - rus = RandomUnderSampler( |
66 | | - sampling_strategy=sampling_strategy, |
67 | | - random_state=RND_SEED, |
68 | | - replacement=True, |
69 | | - ) |
70 | | - X_resampled, y_resampled = rus.fit_resample(X, Y) |
71 | | - |
72 | | - X_gt = np.array( |
73 | | - [ |
74 | | - [0.92923648, 0.76103773], |
75 | | - [0.47104475, 0.44386323], |
76 | | - [0.92923648, 0.76103773], |
77 | | - [0.15490546, 0.3130677], |
78 | | - [0.15490546, 0.3130677], |
79 | | - [0.15490546, 0.3130677], |
80 | | - [0.20792588, 1.49407907], |
81 | | - [0.15490546, 0.3130677], |
82 | | - [0.12372842, 0.6536186], |
83 | | - ] |
84 | | - ) |
85 | | - y_gt = np.array([0, 0, 0, 1, 1, 1, 1, 1, 1]) |
86 | | - assert_array_equal(X_resampled, X_gt) |
87 | | - assert_array_equal(y_resampled, y_gt) |
| 33 | +@pytest.mark.parametrize( |
| 34 | + "sampling_strategy, expected_counts", |
| 35 | + [ |
| 36 | + ("auto", {0: 3, 1: 3}), |
| 37 | + ({0: 3, 1: 6}, {0: 3, 1: 6}), |
| 38 | + ] |
| 39 | +) |
| 40 | +def test_rus_fit_resample(sampling_strategy, expected_counts): |
| 41 | + rus = RandomUnderSampler(sampling_strategy=sampling_strategy) |
| 42 | + X_res, y_res = rus.fit_resample(X, Y) |
| 43 | + |
| 44 | + # check that there is not samples from class 0 resampled as class 1 and |
| 45 | + # vice-versa |
| 46 | + classes = [0, 1] |
| 47 | + for c0, c1 in (classes, classes[::-1]): |
| 48 | + X_c0 = X[Y == c0] |
| 49 | + X_c1 = X_res[y_res == c1] |
| 50 | + for s0 in X_c0: |
| 51 | + assert not np.isclose(s0, X_c1).all(axis=1).any() |
| 52 | + |
| 53 | + assert Counter(y_res) == expected_counts |
88 | 54 |
|
89 | 55 |
|
90 | 56 | def test_multiclass_fit_resample(): |
|
0 commit comments