strategy tests completed and passedl; updated docstring to clarify initial behaviour.

bing-j · marcharper · commit 4b635cfa1b28 · 2024-07-12T14:39:32.000-07:00
diff --git a/axelrod/strategies/epsilon_greedy.py b/axelrod/strategies/epsilon_greedy.py
@@ -9,7 +9,9 @@ class EpsilonGreedy(Player):
     Behaves greedily (chooses the optimal action) with a probability of 1 - epsilon,
     and chooses randomly between the actions with a probability of epsilon.
 
-    The optimal action is determined from the average payoff of each action in previous turns.
+    The optimal action is determined from the average payoff of each action in previous turns;
+    if initial rewards for each action are equivalent (true by default),
+    then the optimal action for the first turn is cooperate.
 
     Names:
 
@@ -45,7 +47,7 @@ def __init__(
 
         Special cases
         ----------
-            epsilon = 0 is equal to Random(0.5)
+            When epsilon <= 0, this player behaves like Random(0.5)
         """
         super().__init__()
         self.epsilon = epsilon
@@ -75,9 +77,9 @@ def update_rewards(self, opponent: Player):
         else:
             num_plays = self.history.defections
 
-        self._rewards[last_play] = self._rewards[last_play] + (
-            1 / num_plays
-        ) * (last_score - self._rewards[last_play])
+        self._rewards[last_play] = self._rewards[last_play] + (1 / num_plays) * (
+            last_score - self._rewards[last_play]
+        )
 
     def strategy(self, opponent: Player) -> Action:
         """Actual strategy definition that determines player's action."""
diff --git a/axelrod/tests/strategies/test_epsilon_greedy.py b/axelrod/tests/strategies/test_epsilon_greedy.py
@@ -9,7 +9,7 @@
 
 class TestEpsilonGreedy(TestPlayer):
 
-    name = "$\varepsilon$-greedy"
+    name = "$\varepsilon$-greedy: 0.1, 0.0, 0.0"
     player = axl.EpsilonGreedy
     expected_classifier = {
         "memory_depth": float("inf"),
@@ -24,59 +24,67 @@ class TestEpsilonGreedy(TestPlayer):
     def test_deterministic(self):
         # cases where epsilon = 0
         actions = [(C, C), (C, C), (C, C)]
-        self.versus_test(axl.Cooperator(),
-                         expected_actions=actions,
-                         init_kwargs={"epsilon": 0, "init_c_reward": 0, "init_d_reward": -1},
-                         attrs={"_rewards": {C: 3, D: -1}})
+        self.versus_test(
+            axl.Cooperator(),
+            expected_actions=actions,
+            init_kwargs={"epsilon": 0, "init_c_reward": 0, "init_d_reward": -1},
+            attrs={"_rewards": {C: 3, D: -1}},
+        )
 
         actions = [(D, D), (D, D), (D, D)]
-        self.versus_test(axl.Defector(),
-                         expected_actions=actions,
-                         init_kwargs={"epsilon": 0, "init_c_reward": -1, "init_d_reward": 0},
-                         attrs={"_rewards": {C: -1, D: 1}})
+        self.versus_test(
+            axl.Defector(),
+            expected_actions=actions,
+            init_kwargs={"epsilon": 0, "init_c_reward": -1, "init_d_reward": 0},
+            attrs={"_rewards": {C: -1, D: 1}},
+        )
 
-        # actions = [(D, C), (D, D)]
-        # self.versus_test(axl.TitForTat(),
-        #                  expected_actions=actions,
-        #                  init_kwargs={"epsilon": 0, "init_c_reward": 3.2, "init_d_reward": 4},
-        #                  attrs={"_rewards": {C: 3.2, D: 9}})
+        actions = [(D, C), (D, D), (C, D)]
+        self.versus_test(
+            axl.TitForTat(),
+            expected_actions=actions,
+            init_kwargs={"epsilon": 0, "init_c_reward": 3.2, "init_d_reward": 4.0},
+            attrs={"_rewards": {C: 3.2, D: 3.0}},
+        )
 
     def test_random(self):
-        # case where epsilon = 1
+        # cases where epsilon = 1
         opponent = axl.MockPlayer()
         actions = [(C, C), (D, C), (D, C), (C, C)]
-        self.versus_test(opponent, expected_actions=actions, init_kwargs={"epsilon": 1}, seed=5)
+        self.versus_test(
+            opponent, expected_actions=actions, init_kwargs={"epsilon": 1}, seed=5
+        )
 
+        opponent = axl.MockPlayer(actions=[C, D, C])
+        actions = [(D, C), (C, D), (C, C)]
+        self.versus_test(
+            opponent, expected_actions=actions, init_kwargs={"epsilon": 1.0}, seed=1
+        )
 
-    # def versus_test(
-    #     self,
-    #     opponent,
-    #     expected_actions,
-    #     turns=None,
-    #     noise=None,
-    #     seed=None,
-    #     match_attributes=None,
-    #     attrs=None,
-    #     init_kwargs=None,
-    # ):
-    #
-    #     if init_kwargs is None:
-    #         init_kwargs = dict()
-    #
-    #     player = self.player(**init_kwargs)
-    #
-    #     test_match = TestMatch()
-    #     seed = test_match.search_seeds(
-    #         player,
-    #         opponent,
-    #         [x for (x, y) in expected_actions],
-    #         [y for (x, y) in expected_actions],
-    #         turns=turns,
-    #         noise=noise,
-    #         seed=seed,
-    #         attrs=attrs,
-    #         match_attributes=match_attributes,
-    #     )
-    #     self.assertIsNotNone(seed)
-    #     print(seed)
-    #
+    def test_strategy(self):
+        # sometimes explores
+        actions = [(C, C), (D, C), (D, C)]
+        self.versus_test(
+            axl.Cooperator(),
+            expected_actions=actions,
+            attrs={"_rewards": {C: 3, D: 5}},
+            seed=21,
+        )
+
+        # always explores
+        actions = [(D, D), (C, D), (C, D)]
+        self.versus_test(
+            axl.Defector(),
+            expected_actions=actions,
+            attrs={"_rewards": {C: 0, D: 1}},
+            seed=13741,
+        )
+
+        # never explores/always exploits
+        actions = [(C, C), (C, C), (C, C)]
+        self.versus_test(
+            axl.TitForTat(),
+            expected_actions=actions,
+            attrs={"_rewards": {C: 3, D: 0}},
+            seed=1,
+        )