99
1010class TestEpsilonGreedy (TestPlayer ):
1111
12- name = "$\v arepsilon$-greedy"
12+ name = "$\v arepsilon$-greedy: 0.1, 0.0, 0.0 "
1313 player = axl .EpsilonGreedy
1414 expected_classifier = {
1515 "memory_depth" : float ("inf" ),
@@ -24,59 +24,67 @@ class TestEpsilonGreedy(TestPlayer):
2424 def test_deterministic (self ):
2525 # cases where epsilon = 0
2626 actions = [(C , C ), (C , C ), (C , C )]
27- self .versus_test (axl .Cooperator (),
28- expected_actions = actions ,
29- init_kwargs = {"epsilon" : 0 , "init_c_reward" : 0 , "init_d_reward" : - 1 },
30- attrs = {"_rewards" : {C : 3 , D : - 1 }})
27+ self .versus_test (
28+ axl .Cooperator (),
29+ expected_actions = actions ,
30+ init_kwargs = {"epsilon" : 0 , "init_c_reward" : 0 , "init_d_reward" : - 1 },
31+ attrs = {"_rewards" : {C : 3 , D : - 1 }},
32+ )
3133
3234 actions = [(D , D ), (D , D ), (D , D )]
33- self .versus_test (axl .Defector (),
34- expected_actions = actions ,
35- init_kwargs = {"epsilon" : 0 , "init_c_reward" : - 1 , "init_d_reward" : 0 },
36- attrs = {"_rewards" : {C : - 1 , D : 1 }})
35+ self .versus_test (
36+ axl .Defector (),
37+ expected_actions = actions ,
38+ init_kwargs = {"epsilon" : 0 , "init_c_reward" : - 1 , "init_d_reward" : 0 },
39+ attrs = {"_rewards" : {C : - 1 , D : 1 }},
40+ )
3741
38- # actions = [(D, C), (D, D)]
39- # self.versus_test(axl.TitForTat(),
40- # expected_actions=actions,
41- # init_kwargs={"epsilon": 0, "init_c_reward": 3.2, "init_d_reward": 4},
42- # attrs={"_rewards": {C: 3.2, D: 9}})
42+ actions = [(D , C ), (D , D ), (C , D )]
43+ self .versus_test (
44+ axl .TitForTat (),
45+ expected_actions = actions ,
46+ init_kwargs = {"epsilon" : 0 , "init_c_reward" : 3.2 , "init_d_reward" : 4.0 },
47+ attrs = {"_rewards" : {C : 3.2 , D : 3.0 }},
48+ )
4349
4450 def test_random (self ):
45- # case where epsilon = 1
51+ # cases where epsilon = 1
4652 opponent = axl .MockPlayer ()
4753 actions = [(C , C ), (D , C ), (D , C ), (C , C )]
48- self .versus_test (opponent , expected_actions = actions , init_kwargs = {"epsilon" : 1 }, seed = 5 )
54+ self .versus_test (
55+ opponent , expected_actions = actions , init_kwargs = {"epsilon" : 1 }, seed = 5
56+ )
4957
58+ opponent = axl .MockPlayer (actions = [C , D , C ])
59+ actions = [(D , C ), (C , D ), (C , C )]
60+ self .versus_test (
61+ opponent , expected_actions = actions , init_kwargs = {"epsilon" : 1.0 }, seed = 1
62+ )
5063
51- # def versus_test(
52- # self,
53- # opponent,
54- # expected_actions,
55- # turns=None,
56- # noise=None,
57- # seed=None,
58- # match_attributes=None,
59- # attrs=None,
60- # init_kwargs=None,
61- # ):
62- #
63- # if init_kwargs is None:
64- # init_kwargs = dict()
65- #
66- # player = self.player(**init_kwargs)
67- #
68- # test_match = TestMatch()
69- # seed = test_match.search_seeds(
70- # player,
71- # opponent,
72- # [x for (x, y) in expected_actions],
73- # [y for (x, y) in expected_actions],
74- # turns=turns,
75- # noise=noise,
76- # seed=seed,
77- # attrs=attrs,
78- # match_attributes=match_attributes,
79- # )
80- # self.assertIsNotNone(seed)
81- # print(seed)
82- #
64+ def test_strategy (self ):
65+ # sometimes explores
66+ actions = [(C , C ), (D , C ), (D , C )]
67+ self .versus_test (
68+ axl .Cooperator (),
69+ expected_actions = actions ,
70+ attrs = {"_rewards" : {C : 3 , D : 5 }},
71+ seed = 21 ,
72+ )
73+
74+ # always explores
75+ actions = [(D , D ), (C , D ), (C , D )]
76+ self .versus_test (
77+ axl .Defector (),
78+ expected_actions = actions ,
79+ attrs = {"_rewards" : {C : 0 , D : 1 }},
80+ seed = 13741 ,
81+ )
82+
83+ # never explores/always exploits
84+ actions = [(C , C ), (C , C ), (C , C )]
85+ self .versus_test (
86+ axl .TitForTat (),
87+ expected_actions = actions ,
88+ attrs = {"_rewards" : {C : 3 , D : 0 }},
89+ seed = 1 ,
90+ )
0 commit comments