Skip to content

Commit 3b43fef

Browse files
bing-jmarcharper
authored andcommitted
added and implemented epsilon_greedy.py
1 parent 38d2cb9 commit 3b43fef

File tree

1 file changed

+88
-0
lines changed

1 file changed

+88
-0
lines changed
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
from axelrod.action import Action
2+
from axelrod.player import Player
3+
4+
C, D = Action.C, Action.D
5+
6+
7+
class EpsilonGreedy(Player):
8+
"""
9+
Behaves greedily (chooses the optimal action) with a probability of 1 - epsilon,
10+
and chooses randomly between the actions with a probability of epsilon.
11+
12+
The optimal action is determined from the average payoff of each action in previous turns.
13+
14+
Names:
15+
16+
# TODO: reference Sutton & Barto's Reinforcement Learning: an Introduction
17+
"""
18+
19+
name = "$\varepsilon$-greedy"
20+
classifier = {
21+
"memory_depth": float("inf"),
22+
"stochastic": True,
23+
"long_run_time": False,
24+
"inspects_source": False,
25+
"manipulates_source": False,
26+
"manipulates_state": False,
27+
}
28+
29+
def __init__(
30+
self,
31+
epsilon: float = 0.1,
32+
init_c_reward: float = 0.0,
33+
init_d_reward: float = 0.0,
34+
) -> None:
35+
"""
36+
Parameters
37+
----------
38+
epsilon
39+
0.0 <= epsilon <= 1.0
40+
the probability that the player will "explore" (act uniformly random); defaults to 0.1
41+
init_c_reward
42+
initial expected utility from action C; defaults to 0.0.
43+
init_d_reward
44+
initial expected utility from action D; defaults to 0.0
45+
46+
Special cases
47+
----------
48+
epsilon = 0 is equal to Random(0.5)
49+
"""
50+
super().__init__()
51+
self.epsilon = epsilon
52+
53+
# treat out of range values as extremes
54+
if epsilon <= 0:
55+
self.epsilon = 0.0
56+
if epsilon >= 1:
57+
self.epsilon = 1.0
58+
59+
self._rewards = {C: init_c_reward, D: init_d_reward}
60+
61+
def _post_init(self):
62+
super()._post_init()
63+
if self.epsilon == 0:
64+
self.classifier["stochastic"] = False
65+
66+
def update_rewards(self, opponent: Player):
67+
game = self.match_attributes["game"]
68+
last_round = (self.history[-1], opponent.history[-1])
69+
last_play = self.history[-1]
70+
last_score = game.score(last_round)[0]
71+
72+
# update the expected rewards based on previous play
73+
num_plays = (
74+
self.cooperations() if last_play == C else self.defections()
75+
)
76+
self._rewards[last_play] = self._rewards[last_play] + (
77+
1 / num_plays
78+
) * (last_score - self._rewards[last_play])
79+
80+
def strategy(self, opponent: Player) -> Action:
81+
"""Actual strategy definition that determines player's action."""
82+
83+
# explore
84+
if self._random.uniform(0.0, 1.0) <= self.epsilon:
85+
return self._random.random_choice()
86+
# exploit
87+
else:
88+
return max(self._rewards, key=self._rewards.get)

0 commit comments

Comments
 (0)