Skip to content

Commit 07430cb

Browse files
bing-jmarcharper
authored andcommitted
modified UNIFORM to use np.inf instead; fixed EpsilonGreedy.strategy() so that it correctly calls update_rewards() through the parent method; all previous tests passed.
1 parent 2a9dfaf commit 07430cb

File tree

2 files changed

+48
-12
lines changed

2 files changed

+48
-12
lines changed

axelrod/strategies/armed_bandits.py

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import math
1+
import numpy as np
22

33
from axelrod.action import Action
44
from axelrod.player import Player
@@ -28,7 +28,7 @@ class Greedy(Player):
2828
"manipulates_state": False,
2929
}
3030

31-
UNIFORM = float("-inf") # constant that replaces weight when rewards aren't weighted
31+
UNIFORM = np.inf # constant that replaces weight when rewards aren't weighted
3232

3333
def __init__(
3434
self,
@@ -53,9 +53,9 @@ def __init__(
5353
self.weight = recency_weight
5454

5555
# treat out of range values as extremes
56-
if (not math.isinf(self.weight)) and (self.weight <= 0):
56+
if self.weight <= 0:
5757
self.weight = 0.0
58-
if recency_weight >= 1:
58+
if (not np.isinf(self.weight)) and (self.weight >= 1):
5959
self.weight = 1.0
6060

6161
def update_rewards(self, opponent: Player):
@@ -66,8 +66,8 @@ def update_rewards(self, opponent: Player):
6666
last_score = game.score(last_round)[0]
6767

6868
# if UNIFORM, use 1 / total number of times the updated action was taken previously
69-
if math.isinf(self.weight):
70-
weight = self.history.cooperations if last_play == C else self.defections
69+
if np.isinf(self.weight):
70+
weight = 1 / (self.history.cooperations if last_play == C else self.history.defections)
7171
else:
7272
weight = self.weight
7373

@@ -109,6 +109,7 @@ def __init__(
109109
epsilon: float = 0.1,
110110
init_c_reward: float = 0.0,
111111
init_d_reward: float = 0.0,
112+
recency_weight: float = Greedy.UNIFORM
112113
) -> None:
113114
"""
114115
Parameters
@@ -126,7 +127,7 @@ def __init__(
126127
When epsilon <= 0, this player behaves like Random(0.5)
127128
When epsilon >= 1, this player behaves like Greedy()
128129
"""
129-
super().__init__(init_c_reward, init_d_reward)
130+
super().__init__(init_c_reward, init_d_reward, recency_weight)
130131
self.epsilon = epsilon
131132

132133
# treat out of range values as extremes
@@ -142,10 +143,12 @@ def _post_init(self):
142143

143144
def strategy(self, opponent: Player) -> Action:
144145
"""Actual strategy definition that determines player's action."""
146+
# this will also update the reward appropriately
147+
greedy_action = super().strategy(opponent)
145148

146149
# explore
147-
if self.epsilon > 0 and self._random.uniform(0.0, 1.0) <= self.epsilon:
150+
if self.epsilon > 0 and self._random.uniform() <= self.epsilon:
148151
return self._random.random_choice()
149152
# exploit
150153
else:
151-
return super().strategy(opponent)
154+
return greedy_action

axelrod/tests/strategies/test_armed_bandits.py

Lines changed: 36 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
"""Tests for the epsilon greedy strategy."""
1+
"""Tests for the armed bandits strategies."""
22

33
import axelrod as axl
44

@@ -9,7 +9,7 @@
99

1010
class TestEpsilonGreedy(TestPlayer):
1111

12-
name = "$\varepsilon$-greedy: 0.1, 0.0, 0.0"
12+
name = "$\varepsilon$-greedy: 0.1, 0.0, 0.0, inf"
1313
player = axl.EpsilonGreedy
1414
expected_classifier = {
1515
"memory_depth": float("inf"),
@@ -67,8 +67,9 @@ def test_strategy(self):
6767
self.versus_test(
6868
axl.Cooperator(),
6969
expected_actions=actions,
70+
init_kwargs={"epsilon": 0.5},
7071
attrs={"_rewards": {C: 3, D: 5}},
71-
seed=21,
72+
seed=2,
7273
)
7374

7475
# always explores
@@ -88,3 +89,35 @@ def test_strategy(self):
8889
attrs={"_rewards": {C: 3, D: 0}},
8990
seed=1,
9091
)
92+
93+
# temporary overriding function used to search for seeds
94+
# def versus_test(
95+
# self,
96+
# opponent,
97+
# expected_actions,
98+
# turns=None,
99+
# noise=None,
100+
# seed=None,
101+
# match_attributes=None,
102+
# attrs=None,
103+
# init_kwargs=None,
104+
# ):
105+
#
106+
# if init_kwargs is None:
107+
# init_kwargs = dict()
108+
#
109+
# player = self.player(**init_kwargs)
110+
#
111+
# test_match = TestMatch()
112+
# seed = test_match.search_seeds(
113+
# player,
114+
# opponent,
115+
# [x for (x, y) in expected_actions],
116+
# [y for (x, y) in expected_actions],
117+
# turns=turns,
118+
# noise=noise,
119+
# seed=seed,
120+
# attrs=attrs,
121+
# match_attributes=match_attributes,
122+
# )
123+
# self.assertIsNotNone(seed)

0 commit comments

Comments
 (0)