Skip to content

Commit 2a9dfaf

Browse files
bing-jmarcharper
authored andcommitted
for extendability: renamed module to armed_bandits, created Greedy superclass of EpsilonGreedy, and implemented option to weigh reward based on recency
1 parent 10de4c9 commit 2a9dfaf

File tree

6 files changed

+164
-98
lines changed

6 files changed

+164
-98
lines changed

axelrod/data/all_classifiers.yml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1999,3 +1999,12 @@ ZD-SET-2:
19991999
manipulates_state: false
20002000
memory_depth: 1
20012001
stochastic: true
2002+
greedy:
2003+
inspects_source: false
2004+
long_run_time: false
2005+
makes_use_of: !!set
2006+
game: null
2007+
manipulates_source: false
2008+
manipulates_state: false
2009+
memory_depth: .inf
2010+
stochastic: false

axelrod/strategies/_strategies.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@
9090
from .dbs import DBS
9191
from .defector import Defector, TrickyDefector
9292
from .doubler import Doubler
93-
from .epsilon_greedy import EpsilonGreedy
93+
from .armed_bandits import Greedy, EpsilonGreedy
9494
from .finite_state_machines import (
9595
TF1,
9696
TF2,
@@ -378,6 +378,7 @@
378378
Golden,
379379
Gradual,
380380
GradualKiller,
381+
Greedy,
381382
Grudger,
382383
GrudgerAlternator,
383384
Grumpy,
Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
import math
2+
3+
from axelrod.action import Action
4+
from axelrod.player import Player
5+
6+
C, D = Action.C, Action.D
7+
8+
9+
class Greedy(Player):
10+
"""
11+
A player that always chooses the optimal action based on the average reward of each action from previous turns.
12+
13+
If initial rewards for each action are equivalent (true by default),
14+
then the optimal action for the first turn is cooperate.
15+
16+
Names:
17+
18+
- Greedy: [Sutton2018]_
19+
"""
20+
21+
name = "greedy"
22+
classifier = {
23+
"memory_depth": float("inf"),
24+
"stochastic": False,
25+
"long_run_time": False,
26+
"inspects_source": False,
27+
"manipulates_source": False,
28+
"manipulates_state": False,
29+
}
30+
31+
UNIFORM = float("-inf") # constant that replaces weight when rewards aren't weighted
32+
33+
def __init__(
34+
self,
35+
init_c_reward: float = 0.0,
36+
init_d_reward: float = 0.0,
37+
recency_weight: float = UNIFORM
38+
) -> None:
39+
"""
40+
Parameters
41+
----------
42+
init_c_reward
43+
Initial expected utility from action C; defaults to 0.0.
44+
init_d_reward
45+
Initial expected utility from action D; defaults to 0.0
46+
recency_weight
47+
0.0 <= recency_weight <= 1.0
48+
The exponential recency weight used in calculating the average reward.
49+
If this argument is not provided, the player will not weigh rewards based on recency.
50+
"""
51+
super().__init__()
52+
self._rewards = {C: init_c_reward, D: init_d_reward}
53+
self.weight = recency_weight
54+
55+
# treat out of range values as extremes
56+
if (not math.isinf(self.weight)) and (self.weight <= 0):
57+
self.weight = 0.0
58+
if recency_weight >= 1:
59+
self.weight = 1.0
60+
61+
def update_rewards(self, opponent: Player):
62+
"""Updates the expected reward associated with the last action."""
63+
game = self.match_attributes["game"]
64+
last_round = (self.history[-1], opponent.history[-1])
65+
last_play = self.history[-1]
66+
last_score = game.score(last_round)[0]
67+
68+
# if UNIFORM, use 1 / total number of times the updated action was taken previously
69+
if math.isinf(self.weight):
70+
weight = self.history.cooperations if last_play == C else self.defections
71+
else:
72+
weight = self.weight
73+
74+
self._rewards[last_play] = self._rewards[last_play] + weight * (
75+
last_score - self._rewards[last_play]
76+
)
77+
78+
def strategy(self, opponent: Player) -> Action:
79+
"""Actual strategy definition that determines player's action."""
80+
# if not the first turn
81+
if len(self.history) != 0:
82+
self.update_rewards(opponent)
83+
84+
# select the optimal play
85+
return max(self._rewards, key=self._rewards.get)
86+
87+
88+
class EpsilonGreedy(Greedy):
89+
"""
90+
Has a 1 - epsilon probability of behaving like Greedy(), and plays randomly otherwise.
91+
92+
Names:
93+
94+
- Epsilon-greedy: [Sutton2018]_
95+
"""
96+
97+
name = "$\varepsilon$-greedy"
98+
classifier = {
99+
"memory_depth": float("inf"),
100+
"stochastic": True,
101+
"long_run_time": False,
102+
"inspects_source": False,
103+
"manipulates_source": False,
104+
"manipulates_state": False,
105+
}
106+
107+
def __init__(
108+
self,
109+
epsilon: float = 0.1,
110+
init_c_reward: float = 0.0,
111+
init_d_reward: float = 0.0,
112+
) -> None:
113+
"""
114+
Parameters
115+
----------
116+
epsilon
117+
0.0 <= epsilon <= 1.0
118+
the probability that the player will "explore" (act uniformly random); defaults to 0.1
119+
init_c_reward
120+
initial expected utility from action C; defaults to 0.0.
121+
init_d_reward
122+
initial expected utility from action D; defaults to 0.0
123+
124+
Special cases
125+
----------
126+
When epsilon <= 0, this player behaves like Random(0.5)
127+
When epsilon >= 1, this player behaves like Greedy()
128+
"""
129+
super().__init__(init_c_reward, init_d_reward)
130+
self.epsilon = epsilon
131+
132+
# treat out of range values as extremes
133+
if epsilon <= 0:
134+
self.epsilon = 0.0
135+
if epsilon >= 1:
136+
self.epsilon = 1.0
137+
138+
def _post_init(self):
139+
super()._post_init()
140+
if self.epsilon == 0:
141+
self.classifier["stochastic"] = False
142+
143+
def strategy(self, opponent: Player) -> Action:
144+
"""Actual strategy definition that determines player's action."""
145+
146+
# explore
147+
if self.epsilon > 0 and self._random.uniform(0.0, 1.0) <= self.epsilon:
148+
return self._random.random_choice()
149+
# exploit
150+
else:
151+
return super().strategy(opponent)

axelrod/strategies/epsilon_greedy.py

Lines changed: 0 additions & 95 deletions
This file was deleted.

docs/reference/strategy_index.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ Here are the docstrings of all the strategies in the library.
1818
:members:
1919
.. automodule:: axelrod.strategies.appeaser
2020
:members:
21+
.. automodule:: axelrod.strategies.armed_bandits
22+
:members:
2123
.. automodule:: axelrod.strategies.averagecopier
2224
:members:
2325
.. automodule:: axelrod.strategies.axelrod_first
@@ -48,8 +50,6 @@ Here are the docstrings of all the strategies in the library.
4850
:members:
4951
.. automodule:: axelrod.strategies.forgiver
5052
:members:
51-
.. automodule:: axelrod.strategies.epsilon_greedy
52-
:members:
5353
.. automodule:: axelrod.strategies.gambler
5454
:members:
5555
.. automodule:: axelrod.strategies.gobymajority

0 commit comments

Comments
 (0)