1- import math
1+ import numpy as np
22
33from axelrod .action import Action
44from axelrod .player import Player
@@ -28,7 +28,7 @@ class Greedy(Player):
2828 "manipulates_state" : False ,
2929 }
3030
31- UNIFORM = float ( "- inf" ) # constant that replaces weight when rewards aren't weighted
31+ UNIFORM = np . inf # constant that replaces weight when rewards aren't weighted
3232
3333 def __init__ (
3434 self ,
@@ -53,9 +53,9 @@ def __init__(
5353 self .weight = recency_weight
5454
5555 # treat out of range values as extremes
56- if ( not math . isinf ( self .weight )) and ( self . weight <= 0 ) :
56+ if self .weight <= 0 :
5757 self .weight = 0.0
58- if recency_weight >= 1 :
58+ if ( not np . isinf ( self . weight )) and ( self . weight >= 1 ) :
5959 self .weight = 1.0
6060
6161 def update_rewards (self , opponent : Player ):
@@ -66,8 +66,8 @@ def update_rewards(self, opponent: Player):
6666 last_score = game .score (last_round )[0 ]
6767
6868 # if UNIFORM, use 1 / total number of times the updated action was taken previously
69- if math .isinf (self .weight ):
70- weight = self .history .cooperations if last_play == C else self .defections
69+ if np .isinf (self .weight ):
70+ weight = 1 / ( self .history .cooperations if last_play == C else self .history . defections )
7171 else :
7272 weight = self .weight
7373
@@ -109,6 +109,7 @@ def __init__(
109109 epsilon : float = 0.1 ,
110110 init_c_reward : float = 0.0 ,
111111 init_d_reward : float = 0.0 ,
112+ recency_weight : float = Greedy .UNIFORM
112113 ) -> None :
113114 """
114115 Parameters
@@ -126,7 +127,7 @@ def __init__(
126127 When epsilon <= 0, this player behaves like Random(0.5)
127128 When epsilon >= 1, this player behaves like Greedy()
128129 """
129- super ().__init__ (init_c_reward , init_d_reward )
130+ super ().__init__ (init_c_reward , init_d_reward , recency_weight )
130131 self .epsilon = epsilon
131132
132133 # treat out of range values as extremes
@@ -142,10 +143,12 @@ def _post_init(self):
142143
143144 def strategy (self , opponent : Player ) -> Action :
144145 """Actual strategy definition that determines player's action."""
146+ # this will also update the reward appropriately
147+ greedy_action = super ().strategy (opponent )
145148
146149 # explore
147- if self .epsilon > 0 and self ._random .uniform (0.0 , 1.0 ) <= self .epsilon :
150+ if self .epsilon > 0 and self ._random .uniform () <= self .epsilon :
148151 return self ._random .random_choice ()
149152 # exploit
150153 else :
151- return super (). strategy ( opponent )
154+ return greedy_action
0 commit comments