modified UNIFORM to use np.inf instead; fixed EpsilonGreedy.strategy() so that it correctly calls update_rewards() through the parent method; all previous tests passed.

bing-j · marcharper · commit 07430cb53dd4 · 2024-07-12T14:39:32.000-07:00
diff --git a/axelrod/strategies/armed_bandits.py b/axelrod/strategies/armed_bandits.py
@@ -1,4 +1,4 @@
-import math
+import numpy as np
 
 from axelrod.action import Action
 from axelrod.player import Player
@@ -28,7 +28,7 @@ class Greedy(Player):
         "manipulates_state": False,
     }
 
-    UNIFORM = float("-inf")  # constant that replaces weight when rewards aren't weighted
+    UNIFORM = np.inf  # constant that replaces weight when rewards aren't weighted
 
     def __init__(
         self,
@@ -53,9 +53,9 @@ def __init__(
         self.weight = recency_weight
 
         # treat out of range values as extremes
-        if (not math.isinf(self.weight)) and (self.weight <= 0):
+        if self.weight <= 0:
             self.weight = 0.0
-        if recency_weight >= 1:
+        if (not np.isinf(self.weight)) and (self.weight >= 1):
             self.weight = 1.0
 
     def update_rewards(self, opponent: Player):
@@ -66,8 +66,8 @@ def update_rewards(self, opponent: Player):
         last_score = game.score(last_round)[0]
 
         # if UNIFORM, use 1 / total number of times the updated action was taken previously
-        if math.isinf(self.weight):
-            weight = self.history.cooperations if last_play == C else self.defections
+        if np.isinf(self.weight):
+            weight = 1 / (self.history.cooperations if last_play == C else self.history.defections)
         else:
             weight = self.weight
 
@@ -109,6 +109,7 @@ def __init__(
         epsilon: float = 0.1,
         init_c_reward: float = 0.0,
         init_d_reward: float = 0.0,
+        recency_weight: float = Greedy.UNIFORM
     ) -> None:
         """
         Parameters
@@ -126,7 +127,7 @@ def __init__(
             When epsilon <= 0, this player behaves like Random(0.5)
             When epsilon >= 1, this player behaves like Greedy()
         """
-        super().__init__(init_c_reward, init_d_reward)
+        super().__init__(init_c_reward, init_d_reward, recency_weight)
         self.epsilon = epsilon
 
         # treat out of range values as extremes
@@ -142,10 +143,12 @@ def _post_init(self):
 
     def strategy(self, opponent: Player) -> Action:
         """Actual strategy definition that determines player's action."""
+        # this will also update the reward appropriately
+        greedy_action = super().strategy(opponent)
 
         # explore
-        if self.epsilon > 0 and self._random.uniform(0.0, 1.0) <= self.epsilon:
+        if self.epsilon > 0 and self._random.uniform() <= self.epsilon:
             return self._random.random_choice()
         # exploit
         else:
-            return super().strategy(opponent)
+            return greedy_action
diff --git a/axelrod/tests/strategies/test_armed_bandits.py b/axelrod/tests/strategies/test_armed_bandits.py
@@ -1,4 +1,4 @@
-"""Tests for the epsilon greedy strategy."""
+"""Tests for the armed bandits strategies."""
 
 import axelrod as axl
 
@@ -9,7 +9,7 @@
 
 class TestEpsilonGreedy(TestPlayer):
 
-    name = "$\varepsilon$-greedy: 0.1, 0.0, 0.0"
+    name = "$\varepsilon$-greedy: 0.1, 0.0, 0.0, inf"
     player = axl.EpsilonGreedy
     expected_classifier = {
         "memory_depth": float("inf"),
@@ -67,8 +67,9 @@ def test_strategy(self):
         self.versus_test(
             axl.Cooperator(),
             expected_actions=actions,
+            init_kwargs={"epsilon": 0.5},
             attrs={"_rewards": {C: 3, D: 5}},
-            seed=21,
+            seed=2,
         )
 
         # always explores
@@ -88,3 +89,35 @@ def test_strategy(self):
             attrs={"_rewards": {C: 3, D: 0}},
             seed=1,
         )
+
+    # temporary overriding function used to search for seeds
+    # def versus_test(
+    #     self,
+    #     opponent,
+    #     expected_actions,
+    #     turns=None,
+    #     noise=None,
+    #     seed=None,
+    #     match_attributes=None,
+    #     attrs=None,
+    #     init_kwargs=None,
+    # ):
+    #
+    #     if init_kwargs is None:
+    #         init_kwargs = dict()
+    #
+    #     player = self.player(**init_kwargs)
+    #
+    #     test_match = TestMatch()
+    #     seed = test_match.search_seeds(
+    #         player,
+    #         opponent,
+    #         [x for (x, y) in expected_actions],
+    #         [y for (x, y) in expected_actions],
+    #         turns=turns,
+    #         noise=noise,
+    #         seed=seed,
+    #         attrs=attrs,
+    #         match_attributes=match_attributes,
+    #     )
+    #     self.assertIsNotNone(seed)