Skip to content

Commit 4b635cf

Browse files
bing-jmarcharper
authored andcommitted
strategy tests completed and passedl; updated docstring to clarify initial behaviour.
1 parent e650cf2 commit 4b635cf

File tree

2 files changed

+63
-53
lines changed

2 files changed

+63
-53
lines changed

axelrod/strategies/epsilon_greedy.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,9 @@ class EpsilonGreedy(Player):
99
Behaves greedily (chooses the optimal action) with a probability of 1 - epsilon,
1010
and chooses randomly between the actions with a probability of epsilon.
1111
12-
The optimal action is determined from the average payoff of each action in previous turns.
12+
The optimal action is determined from the average payoff of each action in previous turns;
13+
if initial rewards for each action are equivalent (true by default),
14+
then the optimal action for the first turn is cooperate.
1315
1416
Names:
1517
@@ -45,7 +47,7 @@ def __init__(
4547
4648
Special cases
4749
----------
48-
epsilon = 0 is equal to Random(0.5)
50+
When epsilon <= 0, this player behaves like Random(0.5)
4951
"""
5052
super().__init__()
5153
self.epsilon = epsilon
@@ -75,9 +77,9 @@ def update_rewards(self, opponent: Player):
7577
else:
7678
num_plays = self.history.defections
7779

78-
self._rewards[last_play] = self._rewards[last_play] + (
79-
1 / num_plays
80-
) * (last_score - self._rewards[last_play])
80+
self._rewards[last_play] = self._rewards[last_play] + (1 / num_plays) * (
81+
last_score - self._rewards[last_play]
82+
)
8183

8284
def strategy(self, opponent: Player) -> Action:
8385
"""Actual strategy definition that determines player's action."""

axelrod/tests/strategies/test_epsilon_greedy.py

Lines changed: 56 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
class TestEpsilonGreedy(TestPlayer):
1111

12-
name = "$\varepsilon$-greedy"
12+
name = "$\varepsilon$-greedy: 0.1, 0.0, 0.0"
1313
player = axl.EpsilonGreedy
1414
expected_classifier = {
1515
"memory_depth": float("inf"),
@@ -24,59 +24,67 @@ class TestEpsilonGreedy(TestPlayer):
2424
def test_deterministic(self):
2525
# cases where epsilon = 0
2626
actions = [(C, C), (C, C), (C, C)]
27-
self.versus_test(axl.Cooperator(),
28-
expected_actions=actions,
29-
init_kwargs={"epsilon": 0, "init_c_reward": 0, "init_d_reward": -1},
30-
attrs={"_rewards": {C: 3, D: -1}})
27+
self.versus_test(
28+
axl.Cooperator(),
29+
expected_actions=actions,
30+
init_kwargs={"epsilon": 0, "init_c_reward": 0, "init_d_reward": -1},
31+
attrs={"_rewards": {C: 3, D: -1}},
32+
)
3133

3234
actions = [(D, D), (D, D), (D, D)]
33-
self.versus_test(axl.Defector(),
34-
expected_actions=actions,
35-
init_kwargs={"epsilon": 0, "init_c_reward": -1, "init_d_reward": 0},
36-
attrs={"_rewards": {C: -1, D: 1}})
35+
self.versus_test(
36+
axl.Defector(),
37+
expected_actions=actions,
38+
init_kwargs={"epsilon": 0, "init_c_reward": -1, "init_d_reward": 0},
39+
attrs={"_rewards": {C: -1, D: 1}},
40+
)
3741

38-
# actions = [(D, C), (D, D)]
39-
# self.versus_test(axl.TitForTat(),
40-
# expected_actions=actions,
41-
# init_kwargs={"epsilon": 0, "init_c_reward": 3.2, "init_d_reward": 4},
42-
# attrs={"_rewards": {C: 3.2, D: 9}})
42+
actions = [(D, C), (D, D), (C, D)]
43+
self.versus_test(
44+
axl.TitForTat(),
45+
expected_actions=actions,
46+
init_kwargs={"epsilon": 0, "init_c_reward": 3.2, "init_d_reward": 4.0},
47+
attrs={"_rewards": {C: 3.2, D: 3.0}},
48+
)
4349

4450
def test_random(self):
45-
# case where epsilon = 1
51+
# cases where epsilon = 1
4652
opponent = axl.MockPlayer()
4753
actions = [(C, C), (D, C), (D, C), (C, C)]
48-
self.versus_test(opponent, expected_actions=actions, init_kwargs={"epsilon": 1}, seed=5)
54+
self.versus_test(
55+
opponent, expected_actions=actions, init_kwargs={"epsilon": 1}, seed=5
56+
)
4957

58+
opponent = axl.MockPlayer(actions=[C, D, C])
59+
actions = [(D, C), (C, D), (C, C)]
60+
self.versus_test(
61+
opponent, expected_actions=actions, init_kwargs={"epsilon": 1.0}, seed=1
62+
)
5063

51-
# def versus_test(
52-
# self,
53-
# opponent,
54-
# expected_actions,
55-
# turns=None,
56-
# noise=None,
57-
# seed=None,
58-
# match_attributes=None,
59-
# attrs=None,
60-
# init_kwargs=None,
61-
# ):
62-
#
63-
# if init_kwargs is None:
64-
# init_kwargs = dict()
65-
#
66-
# player = self.player(**init_kwargs)
67-
#
68-
# test_match = TestMatch()
69-
# seed = test_match.search_seeds(
70-
# player,
71-
# opponent,
72-
# [x for (x, y) in expected_actions],
73-
# [y for (x, y) in expected_actions],
74-
# turns=turns,
75-
# noise=noise,
76-
# seed=seed,
77-
# attrs=attrs,
78-
# match_attributes=match_attributes,
79-
# )
80-
# self.assertIsNotNone(seed)
81-
# print(seed)
82-
#
64+
def test_strategy(self):
65+
# sometimes explores
66+
actions = [(C, C), (D, C), (D, C)]
67+
self.versus_test(
68+
axl.Cooperator(),
69+
expected_actions=actions,
70+
attrs={"_rewards": {C: 3, D: 5}},
71+
seed=21,
72+
)
73+
74+
# always explores
75+
actions = [(D, D), (C, D), (C, D)]
76+
self.versus_test(
77+
axl.Defector(),
78+
expected_actions=actions,
79+
attrs={"_rewards": {C: 0, D: 1}},
80+
seed=13741,
81+
)
82+
83+
# never explores/always exploits
84+
actions = [(C, C), (C, C), (C, C)]
85+
self.versus_test(
86+
axl.TitForTat(),
87+
expected_actions=actions,
88+
attrs={"_rewards": {C: 3, D: 0}},
89+
seed=1,
90+
)

0 commit comments

Comments
 (0)