From e58062903a923bb31d0240edbae6ea743747ff48 Mon Sep 17 00:00:00 2001
From: Manan Khattar <khattar.manan@gmail.com>
Date: Tue, 8 Dec 2020 22:10:08 -0800
Subject: [PATCH 1/6] 12/8 work

---
 multiagent/SG_environment.py        | 365 ++++++++++++++++++++++++++++
 multiagent/environment.py           | 127 ++++++----
 multiagent/scenarios/simple_push.py |  45 +++-
 3 files changed, 479 insertions(+), 58 deletions(-)
 create mode 100644 multiagent/SG_environment.py

diff --git a/multiagent/SG_environment.py b/multiagent/SG_environment.py
new file mode 100644
index 000000000..f7d09d581
--- /dev/null
+++ b/multiagent/SG_environment.py
@@ -0,0 +1,365 @@
+import gym
+from gym import spaces
+from gym.envs.registration import EnvSpec
+import numpy as np
+from multiagent.multi_discrete import MultiDiscrete
+
+import numpy as np
+import random
+
+import tensorflow as tf
+
+from gym_socialgame.envs.utils import price_signal, fourier_points_from_action
+from gym_socialgame.envs.agents import *
+from gym_socialgame.envs.reward import Reward
+
+import pickle
+
+# environment for all agents in the multiagent world
+# currently code assumes that no agents will be created/destroyed at runtime!
+class MultiAgentEnv(gym.Env):
+    metadata = {
+        'render.modes' : ['human', 'rgb_array'] # ?? necc?
+    }
+
+    def __init__(
+    	self,
+        action_space_string = "continuous", 
+        response_type_string = "l", 
+        number_of_participants = 10,
+        one_day = 0, 
+        energy_in_state = False, 
+        yesterday_in_state = False,
+        day_of_week = False,
+        pricing_type="TOU",
+        reward_function = "scaled_cost_distance",
+        fourier_basis_size = 10,
+    	):
+
+    # self.world = world  ## does world appear elsewhere 
+
+    player_dict = self._create_agents()
+    self.agents = player_dict # or transform however needed # world.policy_agents
+    self.n = len(self.agents)
+
+    self.discrete_action_space = False
+    self.discrete_action_input = False
+    self.force_discrete_action = False
+
+    self.shared_reward = False ## right? 
+    self.time = 0
+
+	self.points_length = 10
+	self.action_length = 10
+	self.action_subspace = 3
+
+	self.action_space = []
+	
+
+	# (our inputs)
+	self.prev_energy = np.zeros(10)
+    self.one_day = self._find_one_day(one_day)
+    self.energy_in_state = energy_in_state
+    self.yesterday_in_state = yesterday_in_state
+    self.reward_function = reward_function
+    self.fourier_basis_size = fourier_basis_size
+
+    self.day = 0
+    self.days_of_week = [0, 1, 2, 3, 4]
+    self.day_of_week_flag = day_of_week
+    self.day_of_week = self.days_of_week[self.day % 5]
+
+    #Create Observation Space (aka State Space)
+    self.observation_space = []
+
+    if pricing_type=="TOU":
+        self.pricing_type = "time_of_use"
+    elif pricing_type == "RTP":
+        self.pricing_type = "real_time_pricing"
+    else:
+        print("Wrong pricing type")
+        raise ValueError
+    
+    self.prices = self._get_prices()
+
+    ## back to filling in their skeleton 
+
+    for agent in self.agents:
+        # action seq isn't continuous, so skipping Multidiscrete action
+        self.action_space.append(self._create_action_space())
+
+        obs_dim = 10 + 10 * self.energy_in_state + 10 * self.yesterday_in_state
+        self.observation_space.append(self._create_observation_space())
+        
+  
+    def _find_one_day(self, one_day: int):
+            """
+            Purpose: Helper function to find one_day to train on (if applicable)
+
+            Args:
+                One_day: (Int) in range [-1,365]
+
+            Returns:
+                0 if one_day = 0
+                one_day if one_day in range [1,365]
+                random_number(1,365) if one_day = -1
+            """
+            
+            print("one_day")
+            print(one_day)
+
+            # if(one_day != -1):
+            #     return np.random.randint(0, high=365)
+            
+            # else:
+            return one_day
+
+
+    def _create_action_space(self):
+        """
+        Purpose: Return action space of type specified by self.action_space_string
+
+        Args:
+            None
+        
+        Returns:
+            Action Space for environment based on action_space_str 
+        
+        Note: Multidiscrete refers to a 10-dim vector where each action {0,1,2} represents Low, Medium, High points respectively.
+        We pose this option to test whether simplifying the action-space helps the agent. 
+        """
+
+        #Making a symmetric, continuous space to help learning for continuous control (suggested in StableBaselines doc.)
+        if self.action_space_string == "continuous":
+            return spaces.Box(low=-1, high=1, shape=(self.action_length,), dtype=np.float32)
+
+        elif self.action_space_string == "multidiscrete":
+            discrete_space = [self.action_subspace] * self.action_length
+            return spaces.MultiDiscrete(discrete_space)
+
+        elif self.action_space_string == "fourier":
+            return spaces.Box(
+                low=-2, high=2, shape=(2*self.fourier_basis_size - 1,), dtype=np.float32
+            )
+
+
+    def _create_agents(self):
+        """
+        Purpose: Create the participants of the social game. We create a game with n players, where n = self.number_of_participants
+
+        Args:
+            None
+
+        Returns:
+              agent_dict: Dictionary of players, each with response function based on self.response_type_string
+
+        """
+
+        player_dict = {}
+
+        # Sample Energy from average energy in the office (pre-treatment) from the last experiment 
+        # Reference: Lucas Spangher, et al. Engineering  vs.  ambient  typevisualizations:  Quantifying effects of different data visualizations on energy consumption. 2019
+        
+        sample_energy = np.array([ 0.28,  11.9,   16.34,  16.8,  17.43,  16.15,  16.23,  15.88,  15.09,  35.6, 
+                                123.5,  148.7,  158.49, 149.13, 159.32, 157.62, 158.8,  156.49, 147.04,  70.76,
+                                42.87,  23.13,  22.52,  16.8 ])
+
+        #only grab working hours (8am - 5pm)
+        working_hour_energy = sample_energy[8:18]
+
+        my_baseline_energy = pd.DataFrame(data = {"net_energy_use" : working_hour_energy})
+
+        for i in range(self.number_of_participants):
+            player = DeterministicFunctionPerson(my_baseline_energy, points_multiplier = 10, response= self.response_type_string) #, )
+            ## TODO: Deterministic Function needs to be modified to set a flag on each agent to silent
+            ## TODO: add a prev_energy to keep track of their yesterday's energy
+
+            player_dict['player_{}'.format(i)] = player
+
+        return player_dict
+
+def _create_observation_space(self):
+    """
+    Purpose: Returns the observation space. 
+    If the state space includes yesterday, then it is +10 dim for yesterday's price signal
+    If the state space includes energy_in_state, then it is +10 dim for yesterday's energy
+
+    Args:
+        None
+
+    Returns:
+        Action Space for environment based on action_space_str 
+    """
+
+    #TODO: Normalize obs_space !
+    if(self.yesterday_in_state):
+        if(self.energy_in_state):
+            return spaces.Box(low=-np.inf, high=np.inf, shape=(30,), dtype=np.float32)
+        else:
+            return spaces.Box(low=-np.inf, high=np.inf, shape=(20,), dtype=np.float32)
+
+    else:
+        if self.energy_in_state:
+            return spaces.Box(low=-np.inf, high=np.inf, shape=(20,), dtype=np.float32)
+        else:
+            return spaces.Box(low=-np.inf, high=np.inf, shape=(10,), dtype=np.float32)
+
+def _get_prices(self):
+        """
+        Purpose: Get grid price signals for the entire year (using past data from a building in Los Angeles as reference)
+
+        Args:
+            None
+            
+        Returns: Array containing 365 price signals, where array[day_number] = grid_price for day_number from 8AM - 5PM
+
+        """
+        all_prices = []
+        print("--" * 10)
+        print(self.one_day)
+        print("--" * 10)
+        
+        type_of_DR = self.pricing_type
+
+        if self.one_day != -1:
+            # If one_day we repeat the price signals from a fixed day
+            # Tweak One_Day Price Signal HERE
+            price = price_signal(self.one_day, type_of_DR=type_of_DR)
+            price = np.array(price[8:18])
+            if np.mean(price)==price[2]:
+                price[3:6]+=.3
+            price = np.maximum(0.01 * np.ones_like(price), price)
+
+            for i in range(365):
+                all_prices.append(price)
+        else:
+            day = 0
+            for i in range(365):  
+                price = price_signal(day + 1, type_of_DR=type_of_DR)
+                price = np.array(price[8:18])
+                # put a floor on the prices so we don't have negative prices
+                if np.mean(price)==price[2]:
+                    price[3:6]+=.3
+                price = np.maximum(0.01 * np.ones_like(price), price)
+                all_prices.append(price)
+                day += 1
+
+        return np.array(all_prices)
+
+def _simulate_human(self, action, agent):
+        """
+        Purpose: Gets energy consumption from players given action from agent
+
+        Args:
+            Action: 10-dim vector corresponding to action for each hour 8AM - 5PM
+        
+        Returns: 
+            Energy_consumption: Dictionary containing the energy usage by player and the average energy used in the office (key = "avg")
+        """
+
+        #Get players response to agent's actions
+        player = agent
+
+        if (self.day_of_week_flag):
+            player_energy = player.get_response(action, day_of_week = self.day_of_week)
+        else: 
+            player_energy = player.get_response(action, day_of_week = None)
+
+            #Calculate energy consumption by player and in total (over the office)
+            
+        return player_energy
+
+
+def step(self, action_n):
+    obs_n = []
+    reward_n = []
+    done_n = []
+    info_n = {"n": []}
+
+    for i, agent in enumerate(self.agents):
+        agent.action = action_n[i]
+    
+    prev_price = self.prices[(self.day)]
+        self.day = (self.day + 1) % 365
+        self.curr_iter += 1
+        
+        if self.curr_iter > 0:
+            done = True
+            self.curr_iter = 0
+        else:
+            done = False
+
+    for agent in self.agents:
+            obs_n.append(self._get_observation_per_agent(agent))
+            reward_n.append(self._get_reward_per_agent(agent))
+            done_n.append(self._get_done(agent))
+
+            info_n["n"].append(self._get_info(agent))
+
+    reward = np.sum(reward_n)
+    return obs_n, reward_n, done_n, info_n
+
+def _get_observation_per_agent(self, agent):
+        prev_price = self.prices[ (self.day - 1) % 365]
+        next_observation = self.prices[self.day]
+
+        if(self.yesterday_in_state):
+            if self.energy_in_state:
+                return np.concatenate((next_observation, np.concatenate((prev_price, self.prev_energy))))
+            else:
+                return np.concatenate((next_observation, prev_price))
+
+        elif self.energy_in_state:
+            return np.concatenate((next_observation, self.prev_energy))
+
+        else:
+            return next_observation
+
+def _get_reward_per_agent(self, agent, price, energy_consumptions, reward_function = "scaled_cost_distance"):
+        # TODO: finish massaging this 
+        """
+        Purpose: Compute reward given price signal and energy consumption of the office
+
+        Args:
+            Price: Price signal vector (10-dim)
+            Energy_consumption: Dictionary containing energy usage by player in the office and the average office energy usage
+        
+        Returns: 
+            Energy_consumption: Dictionary containing the energy usage by player and the average energy used in the office (key = "avg")
+        """
+
+        total_reward = 0
+        for player_name in energy_consumptions:
+            if player_name != "avg":
+                # get the points output from players
+                player = self.player_dict[player_name]
+
+                # get the reward from the player's output
+                player_min_demand = player.get_min_demand()
+                player_max_demand = player.get_max_demand()
+                player_energy = energy_consumptions[player_name]
+                player_reward = Reward(player_energy, price, player_min_demand, player_max_demand)
+
+                if reward_function == "scaled_cost_distance":
+                    player_ideal_demands = player_reward.ideal_use_calculation()
+                    reward = player_reward.scaled_cost_distance(player_ideal_demands)
+
+                elif reward_function == "log_cost_regularized":
+                    reward = player_reward.log_cost_regularized()
+
+                total_reward += reward
+
+        return total_reward
+    
+    def reset(self):
+        # reset world
+        obs_n = []
+        for agent in self.agents:
+            obs_n.append(self._get_observation_per_agent(agent))
+        return obs_n
+
+    def render(self, mode='human'):
+        pass
+
+    def close(self):
+        pass
\ No newline at end of file
diff --git a/multiagent/environment.py b/multiagent/environment.py
index d2e8d3278..18bc3c996 100644
--- a/multiagent/environment.py
+++ b/multiagent/environment.py
@@ -7,13 +7,18 @@
 # environment for all agents in the multiagent world
 # currently code assumes that no agents will be created/destroyed at runtime!
 class MultiAgentEnv(gym.Env):
-    metadata = {
-        'render.modes' : ['human', 'rgb_array']
-    }
-
-    def __init__(self, world, reset_callback=None, reward_callback=None,
-                 observation_callback=None, info_callback=None,
-                 done_callback=None, shared_viewer=True):
+    metadata = {"render.modes": ["human", "rgb_array"]}
+
+    def __init__(
+        self,
+        world,
+        reset_callback=None,
+        reward_callback=None,
+        observation_callback=None,
+        info_callback=None,
+        done_callback=None,
+        shared_viewer=True,
+    ):
 
         self.world = world
         self.agents = self.world.policy_agents
@@ -30,9 +35,13 @@ def __init__(self, world, reset_callback=None, reward_callback=None,
         # if true, action is a number 0...N, otherwise action is a one-hot N-dimensional vector
         self.discrete_action_input = False
         # if true, even the action is continuous, action will be performed discretely
-        self.force_discrete_action = world.discrete_action if hasattr(world, 'discrete_action') else False
+        self.force_discrete_action = (
+            world.discrete_action if hasattr(world, "discrete_action") else False
+        )
         # if true, every agent has the same reward
-        self.shared_reward = world.collaborative if hasattr(world, 'collaborative') else False
+        self.shared_reward = (
+            world.collaborative if hasattr(world, "collaborative") else False
+        )
         self.time = 0
 
         # configure spaces
@@ -44,21 +53,35 @@ def __init__(self, world, reset_callback=None, reward_callback=None,
             if self.discrete_action_space:
                 u_action_space = spaces.Discrete(world.dim_p * 2 + 1)
             else:
-                u_action_space = spaces.Box(low=-agent.u_range, high=+agent.u_range, shape=(world.dim_p,), dtype=np.float32)
+                u_action_space = spaces.Box(
+                    low=-agent.u_range,
+                    high=+agent.u_range,
+                    shape=(world.dim_p,),
+                    dtype=np.float32,
+                )
             if agent.movable:
                 total_action_space.append(u_action_space)
             # communication action space
             if self.discrete_action_space:
                 c_action_space = spaces.Discrete(world.dim_c)
             else:
-                c_action_space = spaces.Box(low=0.0, high=1.0, shape=(world.dim_c,), dtype=np.float32)
+                c_action_space = spaces.Box(
+                    low=0.0, high=1.0, shape=(world.dim_c,), dtype=np.float32
+                )
             if not agent.silent:
                 total_action_space.append(c_action_space)
             # total action space
             if len(total_action_space) > 1:
                 # all action spaces are discrete, so simplify to MultiDiscrete action space
-                if all([isinstance(act_space, spaces.Discrete) for act_space in total_action_space]):
-                    act_space = MultiDiscrete([[0, act_space.n - 1] for act_space in total_action_space])
+                if all(
+                    [
+                        isinstance(act_space, spaces.Discrete)
+                        for act_space in total_action_space
+                    ]
+                ):
+                    act_space = MultiDiscrete(
+                        [[0, act_space.n - 1] for act_space in total_action_space]
+                    )
                 else:
                     act_space = spaces.Tuple(total_action_space)
                 self.action_space.append(act_space)
@@ -66,7 +89,11 @@ def __init__(self, world, reset_callback=None, reward_callback=None,
                 self.action_space.append(total_action_space[0])
             # observation space
             obs_dim = len(observation_callback(agent, self.world))
-            self.observation_space.append(spaces.Box(low=-np.inf, high=+np.inf, shape=(obs_dim,), dtype=np.float32))
+            self.observation_space.append(
+                spaces.Box(
+                    low=-np.inf, high=+np.inf, shape=(obs_dim,), dtype=np.float32
+                )
+            )
             agent.action.c = np.zeros(self.world.dim_c)
 
         # rendering
@@ -81,7 +108,7 @@ def step(self, action_n):
         obs_n = []
         reward_n = []
         done_n = []
-        info_n = {'n': []}
+        info_n = {"n": []}
         self.agents = self.world.policy_agents
         # set action for each agent
         for i, agent in enumerate(self.agents):
@@ -94,7 +121,7 @@ def step(self, action_n):
             reward_n.append(self._get_reward(agent))
             done_n.append(self._get_done(agent))
 
-            info_n['n'].append(self._get_info(agent))
+            info_n["n"].append(self._get_info(agent))
 
         # all agents get total reward in cooperative case
         reward = np.sum(reward_n)
@@ -150,7 +177,7 @@ def _set_action(self, action, agent, action_space, time=None):
             size = action_space.high - action_space.low + 1
             index = 0
             for s in size:
-                act.append(action[index:(index+s)])
+                act.append(action[index : (index + s)])
                 index += s
             action = act
         else:
@@ -161,10 +188,14 @@ def _set_action(self, action, agent, action_space, time=None):
             if self.discrete_action_input:
                 agent.action.u = np.zeros(self.world.dim_p)
                 # process discrete action
-                if action[0] == 1: agent.action.u[0] = -1.0
-                if action[0] == 2: agent.action.u[0] = +1.0
-                if action[0] == 3: agent.action.u[1] = -1.0
-                if action[0] == 4: agent.action.u[1] = +1.0
+                if action[0] == 1:
+                    agent.action.u[0] = -1.0
+                if action[0] == 2:
+                    agent.action.u[0] = +1.0
+                if action[0] == 3:
+                    agent.action.u[1] = -1.0
+                if action[0] == 4:
+                    agent.action.u[1] = +1.0
             else:
                 if self.force_discrete_action:
                     d = np.argmax(action[0])
@@ -197,40 +228,43 @@ def _reset_render(self):
         self.render_geoms_xform = None
 
     # render environment
-    def render(self, mode='human'):
-        if mode == 'human':
-            alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
-            message = ''
+    def render(self, mode="human"):
+        if mode == "human":
+            alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+            message = ""
             for agent in self.world.agents:
                 comm = []
                 for other in self.world.agents:
-                    if other is agent: continue
+                    if other is agent:
+                        continue
                     if np.all(other.state.c == 0):
-                        word = '_'
+                        word = "_"
                     else:
                         word = alphabet[np.argmax(other.state.c)]
-                    message += (other.name + ' to ' + agent.name + ': ' + word + '   ')
+                    message += other.name + " to " + agent.name + ": " + word + "   "
             print(message)
 
         for i in range(len(self.viewers)):
             # create viewers (if necessary)
             if self.viewers[i] is None:
                 # import rendering only if we need it (and don't import for headless machines)
-                #from gym.envs.classic_control import rendering
+                # from gym.envs.classic_control import rendering
                 from multiagent import rendering
-                self.viewers[i] = rendering.Viewer(700,700)
+
+                self.viewers[i] = rendering.Viewer(700, 700)
 
         # create rendering geometry
         if self.render_geoms is None:
             # import rendering only if we need it (and don't import for headless machines)
-            #from gym.envs.classic_control import rendering
+            # from gym.envs.classic_control import rendering
             from multiagent import rendering
+
             self.render_geoms = []
             self.render_geoms_xform = []
             for entity in self.world.entities:
                 geom = rendering.make_circle(entity.size)
                 xform = rendering.Transform()
-                if 'agent' in entity.name:
+                if "agent" in entity.name:
                     geom.set_color(*entity.color, alpha=0.5)
                 else:
                     geom.set_color(*entity.color)
@@ -247,49 +281,52 @@ def render(self, mode='human'):
         results = []
         for i in range(len(self.viewers)):
             from multiagent import rendering
+
             # update bounds to center around agent
             cam_range = 1
             if self.shared_viewer:
                 pos = np.zeros(self.world.dim_p)
             else:
                 pos = self.agents[i].state.p_pos
-            self.viewers[i].set_bounds(pos[0]-cam_range,pos[0]+cam_range,pos[1]-cam_range,pos[1]+cam_range)
+            self.viewers[i].set_bounds(
+                pos[0] - cam_range,
+                pos[0] + cam_range,
+                pos[1] - cam_range,
+                pos[1] + cam_range,
+            )
             # update geometry positions
             for e, entity in enumerate(self.world.entities):
                 self.render_geoms_xform[e].set_translation(*entity.state.p_pos)
             # render to display or array
-            results.append(self.viewers[i].render(return_rgb_array = mode=='rgb_array'))
+            results.append(self.viewers[i].render(return_rgb_array=mode == "rgb_array"))
 
         return results
 
     # create receptor field locations in local coordinate frame
     def _make_receptor_locations(self, agent):
-        receptor_type = 'polar'
+        receptor_type = "polar"
         range_min = 0.05 * 2.0
         range_max = 1.00
         dx = []
         # circular receptive field
-        if receptor_type == 'polar':
+        if receptor_type == "polar":
             for angle in np.linspace(-np.pi, +np.pi, 8, endpoint=False):
                 for distance in np.linspace(range_min, range_max, 3):
                     dx.append(distance * np.array([np.cos(angle), np.sin(angle)]))
             # add origin
             dx.append(np.array([0.0, 0.0]))
         # grid receptive field
-        if receptor_type == 'grid':
+        if receptor_type == "grid":
             for x in np.linspace(-range_max, +range_max, 5):
                 for y in np.linspace(-range_max, +range_max, 5):
-                    dx.append(np.array([x,y]))
+                    dx.append(np.array([x, y]))
         return dx
 
 
 # vectorized wrapper for a batch of multi-agent environments
 # assumes all environments have the same observation and action space
 class BatchMultiAgentEnv(gym.Env):
-    metadata = {
-        'runtime.vectorized': True,
-        'render.modes' : ['human', 'rgb_array']
-    }
+    metadata = {"runtime.vectorized": True, "render.modes": ["human", "rgb_array"]}
 
     def __init__(self, env_batch):
         self.env_batch = env_batch
@@ -310,10 +347,10 @@ def step(self, action_n, time):
         obs_n = []
         reward_n = []
         done_n = []
-        info_n = {'n': []}
+        info_n = {"n": []}
         i = 0
         for env in self.env_batch:
-            obs, reward, done, _ = env.step(action_n[i:(i+env.n)], time)
+            obs, reward, done, _ = env.step(action_n[i : (i + env.n)], time)
             i += env.n
             obs_n += obs
             # reward = [r / len(self.env_batch) for r in reward]
@@ -328,7 +365,7 @@ def reset(self):
         return obs_n
 
     # render environment
-    def render(self, mode='human', close=True):
+    def render(self, mode="human", close=True):
         results_n = []
         for env in self.env_batch:
             results_n += env.render(mode, close)
diff --git a/multiagent/scenarios/simple_push.py b/multiagent/scenarios/simple_push.py
index 907fef5b5..733535860 100644
--- a/multiagent/scenarios/simple_push.py
+++ b/multiagent/scenarios/simple_push.py
@@ -2,18 +2,19 @@
 from multiagent.core import World, Agent, Landmark
 from multiagent.scenario import BaseScenario
 
+
 class Scenario(BaseScenario):
     def make_world(self):
         world = World()
         # set any world properties first
         world.dim_c = 2
-        num_agents = 2
+        num_agents = 2  # same as dim_c
         num_adversaries = 1
         num_landmarks = 2
         # add agents
         world.agents = [Agent() for i in range(num_agents)]
         for i, agent in enumerate(world.agents):
-            agent.name = 'agent %d' % i
+            agent.name = "agent %d" % i
             agent.collide = True
             agent.silent = True
             if i < num_adversaries:
@@ -23,7 +24,7 @@ def make_world(self):
         # add landmarks
         world.landmarks = [Landmark() for i in range(num_landmarks)]
         for i, landmark in enumerate(world.landmarks):
-            landmark.name = 'landmark %d' % i
+            landmark.name = "landmark %d" % i
             landmark.collide = False
             landmark.movable = False
         # make initial conditions
@@ -57,7 +58,11 @@ def reset_world(self, world):
 
     def reward(self, agent, world):
         # Agents are rewarded based on minimum agent distance to each landmark
-        return self.adversary_reward(agent, world) if agent.adversary else self.agent_reward(agent, world)
+        return (
+            self.adversary_reward(agent, world)
+            if agent.adversary
+            else self.agent_reward(agent, world)
+        )
 
     def agent_reward(self, agent, world):
         # the distance to the goal
@@ -65,14 +70,20 @@ def agent_reward(self, agent, world):
 
     def adversary_reward(self, agent, world):
         # keep the nearest good agents away from the goal
-        agent_dist = [np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in world.agents if not a.adversary]
+        agent_dist = [
+            np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos)))
+            for a in world.agents
+            if not a.adversary
+        ]
         pos_rew = min(agent_dist)
-        #nearest_agent = world.good_agents[np.argmin(agent_dist)]
-        #neg_rew = np.sqrt(np.sum(np.square(nearest_agent.state.p_pos - agent.state.p_pos)))
-        neg_rew = np.sqrt(np.sum(np.square(agent.goal_a.state.p_pos - agent.state.p_pos)))
-        #neg_rew = sum([np.sqrt(np.sum(np.square(a.state.p_pos - agent.state.p_pos))) for a in world.good_agents])
+        # nearest_agent = world.good_agents[np.argmin(agent_dist)]
+        # neg_rew = np.sqrt(np.sum(np.square(nearest_agent.state.p_pos - agent.state.p_pos)))
+        neg_rew = np.sqrt(
+            np.sum(np.square(agent.goal_a.state.p_pos - agent.state.p_pos))
+        )
+        # neg_rew = sum([np.sqrt(np.sum(np.square(a.state.p_pos - agent.state.p_pos))) for a in world.good_agents])
         return pos_rew - neg_rew
-               
+
     def observation(self, agent, world):
         # get positions of all entities in this agent's reference frame
         entity_pos = []
@@ -86,11 +97,19 @@ def observation(self, agent, world):
         comm = []
         other_pos = []
         for other in world.agents:
-            if other is agent: continue
+            if other is agent:
+                continue
             comm.append(other.state.c)
             other_pos.append(other.state.p_pos - agent.state.p_pos)
         if not agent.adversary:
-            return np.concatenate([agent.state.p_vel] + [agent.goal_a.state.p_pos - agent.state.p_pos] + [agent.color] + entity_pos + entity_color + other_pos)
+            return np.concatenate(
+                [agent.state.p_vel]
+                + [agent.goal_a.state.p_pos - agent.state.p_pos]
+                + [agent.color]
+                + entity_pos
+                + entity_color
+                + other_pos
+            )
         else:
-            #other_pos = list(reversed(other_pos)) if random.uniform(0,1) > 0.5 else other_pos  # randomize position of other agents in adversary network
+            # other_pos = list(reversed(other_pos)) if random.uniform(0,1) > 0.5 else other_pos  # randomize position of other agents in adversary network
             return np.concatenate([agent.state.p_vel] + entity_pos + other_pos)

From 2d36a90cb41cfc23da2d86fcd433e8df59fdca66 Mon Sep 17 00:00:00 2001
From: Lucas <lucas.ingamba@gmail.com>
Date: Wed, 9 Dec 2020 01:10:08 -0800
Subject: [PATCH 2/6] modified obs and rew funcs agent-wise, cleaned up

---
 multiagent/SG_environment.py |  73 ++++----
 multiagent/agents.py         | 328 +++++++++++++++++++++++++++++++++++
 multiagent/reward.py         | 202 +++++++++++++++++++++
 multiagent/utils.py          | 115 ++++++++++++
 4 files changed, 674 insertions(+), 44 deletions(-)
 create mode 100644 multiagent/agents.py
 create mode 100644 multiagent/reward.py
 create mode 100644 multiagent/utils.py

diff --git a/multiagent/SG_environment.py b/multiagent/SG_environment.py
index f7d09d581..a8d010b2f 100644
--- a/multiagent/SG_environment.py
+++ b/multiagent/SG_environment.py
@@ -171,9 +171,6 @@ def _create_agents(self):
 
         for i in range(self.number_of_participants):
             player = DeterministicFunctionPerson(my_baseline_energy, points_multiplier = 10, response= self.response_type_string) #, )
-            ## TODO: Deterministic Function needs to be modified to set a flag on each agent to silent
-            ## TODO: add a prev_energy to keep track of their yesterday's energy
-
             player_dict['player_{}'.format(i)] = player
 
         return player_dict
@@ -275,28 +272,24 @@ def step(self, action_n):
     reward_n = []
     done_n = []
     info_n = {"n": []}
-
-    for i, agent in enumerate(self.agents):
-        agent.action = action_n[i]
     
     prev_price = self.prices[(self.day)]
-        self.day = (self.day + 1) % 365
-        self.curr_iter += 1
-        
-        if self.curr_iter > 0:
-            done = True
-            self.curr_iter = 0
-        else:
-            done = False
-
-    for agent in self.agents:
-            obs_n.append(self._get_observation_per_agent(agent))
-            reward_n.append(self._get_reward_per_agent(agent))
-            done_n.append(self._get_done(agent))
+    self.day = (self.day + 1) % 365
+    self.curr_iter += 1
+    
+    if self.curr_iter > 0:
+        done = True
+        self.curr_iter = 0
+    else:
+        done = False
 
-            info_n["n"].append(self._get_info(agent))
+    for i, agent in enumerate(self.agents):
+        agent.prev_energy = self._simulate_human(action_n[i], agent)
+        obs_n.append(self._get_observation_per_agent(agent))
+        reward_n.append(self._get_reward_per_agent(agent))
+        done_n.append(done)
+        info_n["n"].append({}) ## this was left as a formality in the stableBaselines, too 
 
-    reward = np.sum(reward_n)
     return obs_n, reward_n, done_n, info_n
 
 def _get_observation_per_agent(self, agent):
@@ -305,49 +298,41 @@ def _get_observation_per_agent(self, agent):
 
         if(self.yesterday_in_state):
             if self.energy_in_state:
-                return np.concatenate((next_observation, np.concatenate((prev_price, self.prev_energy))))
+                return np.concatenate((next_observation, np.concatenate((prev_price, agent.prev_energy))))
             else:
                 return np.concatenate((next_observation, prev_price))
 
         elif self.energy_in_state:
-            return np.concatenate((next_observation, self.prev_energy))
+            return np.concatenate((next_observation, agent.prev_energy))
 
         else:
             return next_observation
 
-def _get_reward_per_agent(self, agent, price, energy_consumptions, reward_function = "scaled_cost_distance"):
-        # TODO: finish massaging this 
+def _get_reward_per_agent(self, agent, reward_function = "scaled_cost_distance"):
         """
         Purpose: Compute reward given price signal and energy consumption of the office
 
         Args:
-            Price: Price signal vector (10-dim)
+            agent: DeterministicFunctionPerson object, simulated office twerker
             Energy_consumption: Dictionary containing energy usage by player in the office and the average office energy usage
         
         Returns: 
             Energy_consumption: Dictionary containing the energy usage by player and the average energy used in the office (key = "avg")
         """
 
-        total_reward = 0
-        for player_name in energy_consumptions:
-            if player_name != "avg":
-                # get the points output from players
-                player = self.player_dict[player_name]
-
-                # get the reward from the player's output
-                player_min_demand = player.get_min_demand()
-                player_max_demand = player.get_max_demand()
-                player_energy = energy_consumptions[player_name]
-                player_reward = Reward(player_energy, price, player_min_demand, player_max_demand)
-
-                if reward_function == "scaled_cost_distance":
-                    player_ideal_demands = player_reward.ideal_use_calculation()
-                    reward = player_reward.scaled_cost_distance(player_ideal_demands)
+        price = self.prices[self.day]
+        # get the reward from the player's output
+        player_min_demand = agent.get_min_demand()
+        player_max_demand = agent.get_max_demand()
+        player_energy = agent.prev_energy
+        player_reward = Reward(player_energy, price, player_min_demand, player_max_demand)
 
-                elif reward_function == "log_cost_regularized":
-                    reward = player_reward.log_cost_regularized()
+        if reward_function == "scaled_cost_distance":
+            player_ideal_demands = player_reward.ideal_use_calculation()
+            reward = player_reward.scaled_cost_distance(player_ideal_demands)
 
-                total_reward += reward
+        elif reward_function == "log_cost_regularized":
+            reward = player_reward.log_cost_regularized()
 
         return total_reward
     
diff --git a/multiagent/agents.py b/multiagent/agents.py
new file mode 100644
index 000000000..84be92bd9
--- /dev/null
+++ b/multiagent/agents.py
@@ -0,0 +1,328 @@
+import pandas as pd
+import numpy as np
+import cvxpy as cvx
+from sklearn.preprocessing import MinMaxScaler
+
+#### file to make the simulation of people that we can work with 
+
+
+class Person():
+	""" Person (parent?) class -- will define how the person takes in a points signal and puts out an energy signal 
+	baseline_energy = a list or dataframe of values. This is data from SinBerBEST 
+	points_multiplier = an int which describes how sensitive each person is to points 
+
+	"""
+
+	def __init__(self, baseline_energy_df, points_multiplier = 1):
+		self.baseline_energy_df = baseline_energy_df
+		self.baseline_energy = np.array(self.baseline_energy_df["net_energy_use"])
+		self.points_multiplier = points_multiplier
+		
+		baseline_min = self.baseline_energy.min()
+		baseline_max = self.baseline_energy.max()
+		baseline_range = baseline_max - baseline_min
+		
+		self.min_demand = np.maximum(0, baseline_min + baseline_range * .05)
+		self.max_demand = np.maximum(0, baseline_min + baseline_range * .95)
+		self.silent = True
+		self.prev_energy = self.baseline_energy # basically a placeholder. 
+
+
+	def energy_output_simple_linear(self, points):
+		"""Determines the energy output of the person, based on the formula:
+		
+		y[n] = -sum_{rolling window of 5} points + baseline_energy + noise
+
+		inputs: points - list or dataframe of points values. Assumes that the 
+		list will be in the same time increment that energy_output will be. 
+
+		For now, that's in 1 hour increments
+
+		"""
+		points_df = pd.DataFrame(points)
+		
+		points_effect = (
+			points_df
+				.rolling(
+						window = 5,
+						min_periods = 1)
+				.mean()
+			)
+
+
+
+		time = points_effect.shape[0]
+		energy_output= []
+
+		for t in range(time):
+			temp_energy = self.baseline_energy[t] - points_effect.iloc[t]*self.points_multiplier + \
+				np.random.normal(1)
+			energy_output.append(temp_energy)
+			
+		return pd.DataFrame(energy_output)
+
+	def pure_linear_signal(self, points, baseline_day=0):
+		"""
+		A linear person. The more points you give them, the less energy they will use
+		(within some bounds) for each hour. No rolling effects or anything. The simplest
+		signal. 
+		"""
+
+		# hack here to always grab the first day from the baseline_energy
+		output = np.array(self.baseline_energy)[baseline_day*24:baseline_day*24+10]
+
+		points_effect = np.array(points * self.points_multiplier)
+		output = output - points_effect
+
+		# impose bounds/constraints
+		output = np.maximum(output, self.min_demand)
+		output = np.minimum(output, self.max_demand)
+		return output
+
+
+
+	def get_min_demand(self):
+		return self.min_demand
+		# return np.quantile(self.baseline_energy, .05)
+
+	def get_max_demand(self):
+		return self.max_demand
+		# return np.quantile(self.baseline_energy, .95)
+
+class FixedDemandPerson(Person):
+
+	def __init__(self, baseline_energy_df, points_multiplier = 1):
+		super().__init__(baseline_energy_df, points_multiplier)
+
+
+	def demand_from_points(self, points, baseline_day=0):
+		# hack here to always grab the first day from the baseline_energy
+		output = np.array(self.baseline_energy)[baseline_day*24:baseline_day*24+10]
+		total_demand = np.sum(output)
+
+
+		points_effect = np.array(points * self.points_multiplier)
+		output = output - points_effect
+
+		# scale to keep total_demand (almost) constant
+		# almost bc imposing bounds afterwards
+		output = output * (total_demand/np.sum(output))
+
+		# impose bounds/constraints
+		output = np.maximum(output, self.min_demand)
+		output = np.minimum(output, self.max_demand)
+
+		return output
+
+	def adverserial_linear(self, points, baseline_day=0):
+		# hack here to always grab the first day from the baseline_energy
+		output = np.array(self.baseline_energy)[baseline_day*24:baseline_day*24+10]
+		total_demand = np.sum(output)
+
+
+		points_effect = np.array(points * self.points_multiplier)
+		output = output + points_effect
+
+		# scale to keep total_demand (almost) constant
+		# almost bc imposing bounds afterwards
+		output = output * (total_demand/np.sum(output))
+
+		# impose bounds/constraints
+		output = np.maximum(output, self.min_demand)
+		output = np.minimum(output, self.max_demand)
+
+		return output
+
+class DeterministicFunctionPerson(Person):
+
+	def __init__(self, baseline_energy_df, points_multiplier = 1, response = 't'):
+		super().__init__(baseline_energy_df, points_multiplier)
+		self.response = response
+
+	def threshold_response_func(self, points):
+		points = np.array(points) * self.points_multiplier
+		threshold = np.mean(points)
+		return [p if p>threshold else 0 for p in points]
+
+	def exponential_response_func(self, points):
+		points = np.array(points) * self.points_multiplier
+		points_effect = [p**2 for p in points]
+
+		return points_effect
+
+	def sin_response_func(self,points):
+		points = np.array(points) 
+		# n = np.max(points)
+		# points = [np.sin((float(i)/float(n))*np.pi) for i in points]	
+		points = [np.sin(float(i)*np.pi)*self.points_multiplier for i in points]	
+		points = points 
+		return points
+	
+	def linear_response_func(self,points):
+		return points * self.points_multiplier
+
+	def routine_output_transform(self, points_effect, baseline_day=0, day_of_week = None):
+		output = np.array(self.baseline_energy)[baseline_day*24:baseline_day*24+10]
+		total_demand = np.sum(output)
+
+		# scale to keep total_demand (almost) constant
+		# almost bc imposing bounds afterwards
+		output = output - points_effect
+		# output = output * (total_demand/np.sum(output))
+
+		# impose bounds/constraints
+		# output = np.maximum(output, self.min_demand)
+		# output = np.minimum(output, self.max_demand)
+		# return output
+
+
+		if(day_of_week != None):
+			energy_resp = energy_resp * self.day_of_week_multiplier[day_of_week]
+
+		scaler = MinMaxScaler(feature_range = (self.min_demand, self.max_demand))
+		scaled_output = scaler.fit_transform(output.reshape(-1, 1))
+
+		return np.squeeze(scaled_output)
+
+
+	def threshold_response(self, points, day_of_week = None):
+		points_effect = self.threshold_response_func(points)
+		output = self.routine_output_transform(points_effect, day_of_week = day_of_week)
+		return output
+
+	def sin_response(self, points, day_of_week = None):
+		points_effect = self.sin_response_func(points)
+		output = self.routine_output_transform(points_effect, day_of_week = day_of_week)
+		return output
+
+	def exp_response(self, points, day_of_week = None):
+		points_effect = self.exponential_response_func(points)
+		output = self.routine_output_transform(points_effect, day_of_week = day_of_week)
+		return output
+
+	def threshold_exp_response(self, points, day_of_week = None):
+		points_effect = self.exponential_response_func(points)
+		points_effect = self.threshold_response_func(points_effect)
+		output = self.routine_output_transform(points_effect, day_of_week = day_of_week)
+		return output
+
+	def linear_response(self, points, day_of_week = None):
+		points_effect = points*self.points_multiplier
+		output = self.routine_output_transform(points_effect, day_of_week = day_of_week)
+		return output
+	
+	def get_response(self, points, day_of_week=None):
+		if(self.response == 't'):
+			energy_resp = self.threshold_exp_response(points, day_of_week = day_of_week)
+		elif(self.response == 's'):
+			energy_resp =  self.sin_response(points, day_of_week = day_of_week)
+		elif(self.response == 'l'):
+			energy_resp =  self.linear_response(points, day_of_week = day_of_week)
+		else:
+			raise NotImplementedError
+
+		return energy_resp
+
+class RandomizedFunctionPerson(DeterministicFunctionPerson):
+	def __init__(self, baseline_energy_df, points_multiplier=1, response='t', low = 0, high = 50, distr = 'U'):
+	 
+	 """
+		Adds Random Noise to DeterministicFunctionPerson energy output (for D.R. purposes)
+
+		New Args:
+			Low = Lower bound for random noise added to energy use
+			High = Upper bound "    "      "     "    "    "
+			Distr = 'G' for Gaussian noise, 'U' for Uniform random noise (Note: Continuous distr.)
+
+		Note: For design purposes the random noise is updated at the end of each episode
+	 """
+	 #TODO: Multivariate distr??
+
+	 super().__init__(baseline_energy_df, points_multiplier=points_multiplier, response=response)
+	 
+	 distr = distr.upper()
+	 assert distr in ['G', 'U']
+
+	 self.response = response
+	 self.low = low
+	 self.high = high if high < self.max_demand else 50
+	 self.distr = distr
+
+	 self.noise = []
+	 self.update_noise()
+
+	def update_noise(self):
+		if(self.distr == 'G'):
+			#TODO: Update how to sample from Gausian
+			self.noise = np.random.normal(loc = (self.low + self.high) / 2, scale = 10, size = 10)
+		
+		elif(self.distr == 'U'):
+			self.noise = np.random.uniform(low = self.low, high=self.high, size = 10)
+
+
+	def exponential_response_func(self, points):
+		points = np.array(points) * self.points_multiplier
+		points_effect = [p**2 for p in points]
+
+		return points_effect + self.noise
+
+	def sin_response_func(self,points):
+		points = np.array(points) 
+		# n = np.max(points)
+		# points = [np.sin((float(i)/float(n))*np.pi) for i in points]	
+		points = [np.sin(float(i)*np.pi)*self.points_multiplier for i in points]	
+		points = points 
+		return points + self.noise
+	
+	def linear_response_func(self,points):
+		return points * self.points_multiplier + self.noise
+
+
+# utkarsha's person 
+
+class CurtailandShiftPerson(Person):
+	def __init__(self, baseline_energy_df, points_multiplier = 1):
+		super().__init__(baseline_energy_df, points_multiplier)
+		self.shiftableLoadFraction = 0.1
+		self.shiftByHours = 3
+		self.curtailableLoadFraction = 0.1
+		self.maxCurtailHours = 3 #Person willing to curtail for no more than these hours
+
+	def shiftedLoad(self, points, baseline_day=0, day_of_week=None):
+		output = np.array(self.baseline_energy)[baseline_day*24:baseline_day*24+10]
+		points = np.array(points) * self.points_multiplier
+		shiftableLoad = self.shiftableLoadFraction*output
+		shiftByHours = self.shiftByHours
+		
+		# 10 hour day. Rearrange the sum of shiftableLoad into these hours by treating points as the 'price' at that hour
+		# Load can be shifted by a max of shiftByHours (default = 3 hours)
+		# For each hour, calculate the optimal hour to shift load to within +- 3 hours
+		shiftedLoad = np.zeros(10)
+		for hour in range(10):
+			candidatePrices = points[max(hour-shiftByHours,0): min(hour+shiftByHours,9)+1]
+			shiftToHour = max(hour-shiftByHours,0) + np.argmin(candidatePrices)
+			shiftedLoad[shiftToHour] += shiftableLoad[hour]		
+		return shiftedLoad
+
+	def curtailedLoad(self, points, baseline_day=0, day_of_week=None):
+		output = np.array(self.baseline_energy)[baseline_day*24:baseline_day*24+10]
+		points = np.array(points) * self.points_multiplier
+		curtailableLoad = self.curtailableLoadFraction*output
+		maxPriceHours = np.argsort(points)[0:self.maxCurtailHours]
+		for hour in maxPriceHours:
+			curtailableLoad[hour] = 0
+		return curtailableLoad
+
+	def get_response(self, points, day_of_week=None):
+		baseline_day = 0
+		output = np.array(self.baseline_energy)[baseline_day*24:baseline_day*24+10]
+		energy_resp = output*(1 - self.curtailableLoadFraction - self.shiftableLoadFraction) + self.curtailedLoad(points) + self.shiftedLoad(points)
+		
+			
+		self.min_demand = np.maximum(0, min(energy_resp))
+		self.max_demand = np.maximum(0, max(energy_resp))
+
+		return energy_resp
+
+
+
diff --git a/multiagent/reward.py b/multiagent/reward.py
new file mode 100644
index 000000000..d0dd724d6
--- /dev/null
+++ b/multiagent/reward.py
@@ -0,0 +1,202 @@
+import cvxpy as cvx
+# import osqp
+import numpy as np
+from sklearn.preprocessing import MinMaxScaler
+
+#### file to calculate the rewards. Meant to be modular: 
+#### class Rewards should have several different functions by Dec 2019
+
+
+class Reward():
+	def __init__(self, energy_use, prices, min_demand, max_demand):
+		"""
+		Args: 
+			energy_use: list returned by Person class signifying energy use 
+			prices: list returned by grid signifying cost throughout day 
+			min_demand: value computed by Person class signifying minimum energy use long term
+			max_demand: value computed by Person class signifying maximum energy use long term
+		"""
+
+		self.energy_use = np.array(energy_use)
+		self.prices = np.array(prices) 
+		self._num_timesteps = energy_use.shape[0]
+		self.min_demand = np.min(energy_use) # min_demand
+		self.max_demand = np.max(energy_use) # max_demand
+		self.baseline_max_demand = 159.32
+
+		assert round(self.max_demand) == round(max_demand), "The max demand that the player is using and the optimization is using is not the same"
+		assert round(self.min_demand) == round(min_demand), "The min demand that the player is using and the optimization is using is not the same"
+
+		self.total_demand = np.sum(energy_use)
+
+	def ideal_use_calculation(self):
+		"""
+		Computes an optimization of demand according to price 
+
+		returns: np.array of ideal energy demands given a price signal 
+		"""
+
+		demands = cvx.Variable(self._num_timesteps)
+		min_demand = cvx.Parameter()
+		max_demand = cvx.Parameter()
+		total_demand = cvx.Parameter()
+		prices = cvx.Parameter(self._num_timesteps)
+
+		min_demand = self.min_demand
+		max_demand = self.max_demand 
+		total_demand = self.total_demand
+
+		while (max_demand * 10 < total_demand):
+			print("multiplying demand to make optimization work")
+			print("max_demand: " + str(max_demand))
+			print("total_demand: " + str(total_demand))
+			print("energy_use")
+			print(self.energy_use)
+			max_demand *= 1.1
+
+		prices = self.prices
+		constraints = [cvx.sum(demands, axis=0, keepdims=True) == total_demand]
+		# constraints = [np.ones(self._num_timesteps).T * demands == total_demand]
+		for i in range(self._num_timesteps):
+			constraints += [demands[i] <= max_demand]
+			constraints += [min_demand <= demands[i]]
+			# if i != 0:
+			# 	constraints += [cvx.abs(demands[i] - demands[i-1]) <= 100]	
+
+
+		objective = cvx.Minimize(demands.T * prices)
+		problem = cvx.Problem(objective, constraints)
+
+		problem.solve(solver = cvx.ECOS, verbose=False)
+		return np.array(demands.value)
+
+
+	def log_cost(self):
+		"""
+		Scales energy_use to be between min and max energy demands (this is repeated 
+		in agent.routine_output_trasform), and then returns the simple total cost. 
+
+		"""
+
+		scaler = MinMaxScaler(feature_range = (self.min_demand, self.max_demand))
+		scaled_energy = np.squeeze(scaler.fit_transform(self.energy_use.reshape(-1, 1)))
+
+		return -np.log(np.dot(scaled_energy, self.prices))
+
+	def log_cost_regularized(self):
+		"""
+		Scales energy_use to be between min and max energy demands (this is repeated 
+		in agent.routine_output_trasform), and then returns the simple total cost. 
+
+		:param: h - the hyperparameter that modifies the penalty on energy demand that's driven too low. 
+
+		"""
+
+		scaler = MinMaxScaler(feature_range = (self.min_demand, self.max_demand))
+		## ? 
+		scaled_energy = np.squeeze(scaler.fit_transform(self.energy_use.reshape(-1, 1)))
+
+
+
+		return (-np.log(np.dot(scaled_energy, self.prices)) - 
+			10 * (np.sum(self.energy_use) < (10 * (.5 * self.baseline_max_demand))))  # -
+			#10 * ())
+																			# sigmoid between 10 and 20 so there's a smooth transition 
+																			# - lambd * (difference b/w energy(t)) - put a bound on 
+																			# play with the lipschitz constant 
+																			# [10, 20, 10, 10, 10, ]
+
+
+																	 
+
+
+	def neg_distance_from_ideal(self, demands):
+		"""
+		args: 
+			demands: np.array() of demands from ideal_use_calculation()
+
+		returns: 
+			a numerical distance metric, negated
+		"""
+
+		return -((demands - self.energy_use)**2).sum()
+
+	def cost_distance(self, ideal_demands):
+		"""
+		args: 
+			demands: np.array() of demands from ideal_use_calculation()
+
+		returns: 
+			a cost-based distance metric, negated
+		"""
+		current_cost = np.dot(self.prices, self.energy_use)
+		ideal_cost = np.dot(self.prices, ideal_demands)
+
+		cost_difference = ideal_cost - current_cost
+		
+		return cost_difference
+
+	def log_cost_distance(self, ideal_demands):
+		"""
+		args: 
+			demands: np.array() of demands from ideal_use_calculation()
+
+		returns: 
+			the log of the cost distance
+		"""
+		current_cost = np.dot(self.prices, self.energy_use)
+		ideal_cost = np.dot(self.prices, ideal_demands)
+
+		cost_difference = ideal_cost - current_cost
+		
+		#TODO ENSURE THAT COST DIFFERENCE IS < 0
+		if cost_difference < 0: 
+			return -np.log(-cost_difference)
+		else:
+			print("WEIRD REWARD ALERT. IDEAL COST >= CURRENT COST. returning reward of 10")
+			return 10
+
+	def scaled_cost_distance(self, ideal_demands):
+		"""
+		args: 
+			demands: np.array() of demands from ideal_use_calculation()
+
+		returns: 
+			a cost-based distance metric normalized by total ideal cost
+		"""
+		
+		current_cost = np.dot(self.prices, self.energy_use)
+		ideal_cost = np.dot(self.prices, ideal_demands)
+
+		cost_difference = ideal_cost - current_cost
+
+		# print("--" * 10)
+		# print("ideal cost")
+		# print(ideal_cost)
+		# print("--" * 10)
+		# print("prices")
+		# print(self.prices)
+		# print("--" * 10)		
+		# print("current_cost")
+		# print(current_cost)
+
+		if cost_difference > 0 or ideal_cost < 0:
+			print("--" * 10)
+			print("Problem with reward")
+			# print("min_demand: " + str(self.min_demand))
+			# print("max_demand: " + str(self.max_demand))
+			# print("--" * 10)
+			print("prices")
+			print(self.prices)
+			# print("current_cost")
+			# print(current_cost)
+			# print("--" * 10)
+			# print("ideal_cost")
+			# print(ideal_cost)
+			# print("ideal_demands")
+			# print(ideal_demands)
+			# print("energy demand")
+			# print(self.energy_use)
+			print("taking the neg abs value so that it stays the same sign.")
+
+		return  -np.abs(cost_difference/ideal_cost)
diff --git a/multiagent/utils.py b/multiagent/utils.py
new file mode 100644
index 000000000..19637d732
--- /dev/null
+++ b/multiagent/utils.py
@@ -0,0 +1,115 @@
+import csv
+import numpy as np
+import pandas as pd
+from scipy.optimize import minimize
+import os
+import matplotlib.pyplot as plt
+from sklearn.preprocessing import MinMaxScaler 
+
+#data_path = os.path.join(os.getcwd(), "baselines", "behavioral_sim", "building_data.csv")
+# csv_path = os.path.dirname(os.path.realpath(__file__)) + "/building_data.csv"
+
+
+def price_signal(day = 45, type_of_DR = "real_time_pricing"):
+
+    """
+    Utkarsha's work on price signal from a building with demand and solar
+    Input: Day = an int signifying a 24 hour period. 365 total, all of 2012, start at 1.
+    Output: netdemand_price, a measure of how expensive energy is at each time in the day
+        optionally, we can return the optimized demand, which is the building
+        calculating where the net demand should be allocated
+    """
+    csv_path = "building_data.csv"
+    csv_path_2 = "../gym-socialgame/gym_socialgame/envs/building_data.csv"
+    csv_path_3 = "/global/home/users/lucas_spangher/transactive_control/gym-socialgame/gym_socialgame/envs/building_data.csv"
+    try:
+        df = pd.read_csv(csv_path)
+    except:
+        try:
+            df = pd.read_csv(csv_path_2)
+        except:
+            df = pd.read_csv(csv_path_3)
+
+    pv = 0.001*np.array(df['PV (W)'].tolist())
+    price = np.array(df['Price( $ per kWh)'].tolist())
+    demand = np.nan_to_num(np.array(df['Office_Elizabeth (kWh)'].tolist()))
+    demand_charge = 10/30 # 10$/kW per month
+
+    pvsize = 0 #Assumption
+    netdemand = demand - pvsize*pv
+
+    # Data starts at 5 am on Jan 1
+    netdemand_24 = netdemand[24*day-5:24*day+19]
+    price_24 = price[24*day-5:24*day+19]
+    pv_24 = pv[24*day-5:24*day+19]
+    demand_24 = demand[24*day-5:24*day+19]
+
+    # Calculate optimal load scheduling. 90% of load is fixed, 10% is controllable.
+    def optimise_24h(netdemand_24, price_24):
+        currentcost = netdemand_24@price_24
+
+        fixed_load = 0.9*netdemand_24
+        controllable_load = sum(0.1*netdemand_24)
+        
+        def objective(x):
+            load = fixed_load + x
+            cost = np.multiply(price_24,load)
+            # Negative demand means zero cost, not negative cost
+            # Adding L1 regularisation to penalise shifting of occupant demand
+            lambd = 0.005
+            # Demand charge: attached to peak demand
+            return sum(np.maximum(cost,0)) + demand_charge*max(load) + lambd*sum(abs(x-0.1*netdemand_24))
+
+        def constraint_sumofx(x):
+            return sum(x) - controllable_load
+
+        def constraint_x_positive(x):
+            return x
+
+        x0 = np.zeros(24)
+        cons = [
+            {'type':'eq', 'fun': constraint_sumofx},
+            {'type':'ineq', 'fun':constraint_x_positive}
+        ]
+        sol = minimize(objective, x0, constraints=cons)
+        return sol
+
+    if type_of_DR == "real_time_pricing":    
+        sol = optimise_24h(netdemand_24,price_24)
+        x = sol['x']
+        diff = x - 0.1*netdemand_24
+        return -diff - min(-diff)
+
+    elif type_of_DR == "time_of_use":
+        if (np.mean(price_24[8:18]) == price_24[9]):
+            price_24[13:16]+=3
+        return price_24
+    else:
+        return "error!!!"
+
+
+def fourier_points_from_action(action, points_length, fourier_basis_size):
+    assert fourier_basis_size == (action.size + 1) // 2, "Incorrect fourier basis size for actions"
+    root_points = (action[0]/2)*np.ones(points_length)
+    inp = np.linspace(0, 1, points_length)
+    for k in range(1, fourier_basis_size):
+        ak, bk = action[2*k - 1], action[2*k]
+        root_points += ak * np.sin(2*np.pi*k*inp) + bk * np.cos(2*np.pi*k*inp)
+
+    points = root_points**2
+    #TODO: More elegant solution than clipping
+    points = np.clip(points, 0, 10)
+    return points
+
+
+
+
+
+
+
+
+
+
+
+
+

From 3b7d558f3392e38e3ff50534a7c26b16720e41ee Mon Sep 17 00:00:00 2001
From: Lucas <lucas.ingamba@gmail.com>
Date: Wed, 9 Dec 2020 23:39:20 -0800
Subject: [PATCH 3/6] modifying imports

---
 multiagent/SG_environment.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/multiagent/SG_environment.py b/multiagent/SG_environment.py
index a8d010b2f..3a9ab8b37 100644
--- a/multiagent/SG_environment.py
+++ b/multiagent/SG_environment.py
@@ -9,9 +9,10 @@
 
 import tensorflow as tf
 
-from gym_socialgame.envs.utils import price_signal, fourier_points_from_action
-from gym_socialgame.envs.agents import *
-from gym_socialgame.envs.reward import Reward
+# manan these are in the same directory -- you may need to add that to the path, but prob not 
+from utils import price_signal, fourier_points_from_action
+from agents import *
+from reward import Reward
 
 import pickle
 

From aa55ae628833c21654a591d1ca9019e392fe1158 Mon Sep 17 00:00:00 2001
From: Manan Khattar <khattar.manan@gmail.com>
Date: Fri, 11 Dec 2020 17:22:52 -0800
Subject: [PATCH 4/6] 12/11 changes

---
 multiagent/SG_environment.py | 414 +++++++++++++++------------
 multiagent/agents.py         | 528 ++++++++++++++++++-----------------
 multiagent/reward.py         | 271 +++++++++---------
 multiagent/utils.py          |  93 +++---
 4 files changed, 691 insertions(+), 615 deletions(-)

diff --git a/multiagent/SG_environment.py b/multiagent/SG_environment.py
index 3a9ab8b37..de5e2e7dd 100644
--- a/multiagent/SG_environment.py
+++ b/multiagent/SG_environment.py
@@ -2,99 +2,102 @@
 from gym import spaces
 from gym.envs.registration import EnvSpec
 import numpy as np
+import pandas as pd
 from multiagent.multi_discrete import MultiDiscrete
 
-import numpy as np
+
 import random
 
 import tensorflow as tf
 
-# manan these are in the same directory -- you may need to add that to the path, but prob not 
-from utils import price_signal, fourier_points_from_action
-from agents import *
-from reward import Reward
+# manan these are in the same directory -- you may need to add that to the path, but prob not
+from multiagent.utils import price_signal, fourier_points_from_action
+from multiagent.agents import DeterministicFunctionPerson
+from multiagent.reward import Reward
 
 import pickle
 
 # environment for all agents in the multiagent world
 # currently code assumes that no agents will be created/destroyed at runtime!
 class MultiAgentEnv(gym.Env):
-    metadata = {
-        'render.modes' : ['human', 'rgb_array'] # ?? necc?
-    }
+    metadata = {"render.modes": ["human", "rgb_array"]}  # ?? necc?
 
     def __init__(
-    	self,
-        action_space_string = "continuous", 
-        response_type_string = "l", 
-        number_of_participants = 10,
-        one_day = 0, 
-        energy_in_state = False, 
-        yesterday_in_state = False,
-        day_of_week = False,
+        self,
+        action_space_string="continuous",
+        response_type_string="l",
+        number_of_participants=10,
+        one_day=0,
+        energy_in_state=False,
+        yesterday_in_state=False,
+        day_of_week=False,
         pricing_type="TOU",
-        reward_function = "scaled_cost_distance",
-        fourier_basis_size = 10,
-    	):
-
-    # self.world = world  ## does world appear elsewhere 
-
-    player_dict = self._create_agents()
-    self.agents = player_dict # or transform however needed # world.policy_agents
-    self.n = len(self.agents)
-
-    self.discrete_action_space = False
-    self.discrete_action_input = False
-    self.force_discrete_action = False
-
-    self.shared_reward = False ## right? 
-    self.time = 0
-
-	self.points_length = 10
-	self.action_length = 10
-	self.action_subspace = 3
-
-	self.action_space = []
-	
-
-	# (our inputs)
-	self.prev_energy = np.zeros(10)
-    self.one_day = self._find_one_day(one_day)
-    self.energy_in_state = energy_in_state
-    self.yesterday_in_state = yesterday_in_state
-    self.reward_function = reward_function
-    self.fourier_basis_size = fourier_basis_size
-
-    self.day = 0
-    self.days_of_week = [0, 1, 2, 3, 4]
-    self.day_of_week_flag = day_of_week
-    self.day_of_week = self.days_of_week[self.day % 5]
-
-    #Create Observation Space (aka State Space)
-    self.observation_space = []
-
-    if pricing_type=="TOU":
-        self.pricing_type = "time_of_use"
-    elif pricing_type == "RTP":
-        self.pricing_type = "real_time_pricing"
-    else:
-        print("Wrong pricing type")
-        raise ValueError
-    
-    self.prices = self._get_prices()
-
-    ## back to filling in their skeleton 
-
-    for agent in self.agents:
-        # action seq isn't continuous, so skipping Multidiscrete action
-        self.action_space.append(self._create_action_space())
-
-        obs_dim = 10 + 10 * self.energy_in_state + 10 * self.yesterday_in_state
-        self.observation_space.append(self._create_observation_space())
-        
-  
+        reward_function="log_cost_regularized",
+        fourier_basis_size=10,
+    ):
+
+        # self.world = world  ## does world appear elsewhere
+
+        self.number_of_participants = number_of_participants
+        self.response_type_string = response_type_string
+        self.action_space_string = action_space_string
+
+        player_dict = self._create_agents()
+        self.agents = player_dict  # or transform however needed # world.policy_agents
+        self.n = len(self.agents)
+
+        self.discrete_action_space = False
+        self.discrete_action_input = False
+        self.force_discrete_action = False
+
+        self.shared_reward = False  ## right?
+        self.time = 0
+
+        self.curr_iter = 0
+
+        self.points_length = 10
+        self.action_length = 10
+        self.action_subspace = 3
+
+        self.action_space = []
+
+        # (our inputs)
+        self.prev_energy = np.zeros(10)
+        self.one_day = self._find_one_day(one_day)
+        self.energy_in_state = energy_in_state
+        self.yesterday_in_state = yesterday_in_state
+        self.reward_function = reward_function
+        self.fourier_basis_size = fourier_basis_size
+
+        self.day = 0
+        self.days_of_week = [0, 1, 2, 3, 4]
+        self.day_of_week_flag = day_of_week
+        self.day_of_week = self.days_of_week[self.day % 5]
+
+        # Create Observation Space (aka State Space)
+        self.observation_space = []
+
+        if pricing_type == "TOU":
+            self.pricing_type = "time_of_use"
+        elif pricing_type == "RTP":
+            self.pricing_type = "real_time_pricing"
+        else:
+            print("Wrong pricing type")
+            raise ValueError
+
+        self.prices = self._get_prices()
+
+        ## back to filling in their skeleton
+
+        for agent in self.agents:
+            # action seq isn't continuous, so skipping Multidiscrete action
+            self.action_space.append(self._create_action_space())
+
+            obs_dim = 10 + 10 * self.energy_in_state + 10 * self.yesterday_in_state
+            self.observation_space.append(self._create_observation_space())
+
     def _find_one_day(self, one_day: int):
-            """
+        """
             Purpose: Helper function to find one_day to train on (if applicable)
 
             Args:
@@ -105,16 +108,15 @@ def _find_one_day(self, one_day: int):
                 one_day if one_day in range [1,365]
                 random_number(1,365) if one_day = -1
             """
-            
-            print("one_day")
-            print(one_day)
 
-            # if(one_day != -1):
-            #     return np.random.randint(0, high=365)
-            
-            # else:
-            return one_day
+        print("one_day")
+        print(one_day)
 
+        # if(one_day != -1):
+        #     return np.random.randint(0, high=365)
+
+        # else:
+        return one_day
 
     def _create_action_space(self):
         """
@@ -130,9 +132,11 @@ def _create_action_space(self):
         We pose this option to test whether simplifying the action-space helps the agent. 
         """
 
-        #Making a symmetric, continuous space to help learning for continuous control (suggested in StableBaselines doc.)
+        # Making a symmetric, continuous space to help learning for continuous control (suggested in StableBaselines doc.)
         if self.action_space_string == "continuous":
-            return spaces.Box(low=-1, high=1, shape=(self.action_length,), dtype=np.float32)
+            return spaces.Box(
+                low=-1, high=1, shape=(self.action_length,), dtype=np.float32
+            )
 
         elif self.action_space_string == "multidiscrete":
             discrete_space = [self.action_subspace] * self.action_length
@@ -140,10 +144,12 @@ def _create_action_space(self):
 
         elif self.action_space_string == "fourier":
             return spaces.Box(
-                low=-2, high=2, shape=(2*self.fourier_basis_size - 1,), dtype=np.float32
+                low=-2,
+                high=2,
+                shape=(2 * self.fourier_basis_size - 1,),
+                dtype=np.float32,
             )
 
-
     def _create_agents(self):
         """
         Purpose: Create the participants of the social game. We create a game with n players, where n = self.number_of_participants
@@ -158,51 +164,87 @@ def _create_agents(self):
 
         player_dict = {}
 
-        # Sample Energy from average energy in the office (pre-treatment) from the last experiment 
+        # Sample Energy from average energy in the office (pre-treatment) from the last experiment
         # Reference: Lucas Spangher, et al. Engineering  vs.  ambient  typevisualizations:  Quantifying effects of different data visualizations on energy consumption. 2019
-        
-        sample_energy = np.array([ 0.28,  11.9,   16.34,  16.8,  17.43,  16.15,  16.23,  15.88,  15.09,  35.6, 
-                                123.5,  148.7,  158.49, 149.13, 159.32, 157.62, 158.8,  156.49, 147.04,  70.76,
-                                42.87,  23.13,  22.52,  16.8 ])
 
-        #only grab working hours (8am - 5pm)
+        sample_energy = np.array(
+            [
+                0.28,
+                11.9,
+                16.34,
+                16.8,
+                17.43,
+                16.15,
+                16.23,
+                15.88,
+                15.09,
+                35.6,
+                123.5,
+                148.7,
+                158.49,
+                149.13,
+                159.32,
+                157.62,
+                158.8,
+                156.49,
+                147.04,
+                70.76,
+                42.87,
+                23.13,
+                22.52,
+                16.8,
+            ]
+        )
+
+        # only grab working hours (8am - 5pm)
         working_hour_energy = sample_energy[8:18]
 
-        my_baseline_energy = pd.DataFrame(data = {"net_energy_use" : working_hour_energy})
-
+        my_baseline_energy = pd.DataFrame(data={"net_energy_use": working_hour_energy})
         for i in range(self.number_of_participants):
-            player = DeterministicFunctionPerson(my_baseline_energy, points_multiplier = 10, response= self.response_type_string) #, )
-            player_dict['player_{}'.format(i)] = player
+            player = DeterministicFunctionPerson(
+                my_baseline_energy,
+                points_multiplier=10,
+                response=self.response_type_string,
+            )  # , )
+            player_dict["player_{}".format(i)] = player
 
         return player_dict
 
-def _create_observation_space(self):
-    """
-    Purpose: Returns the observation space. 
-    If the state space includes yesterday, then it is +10 dim for yesterday's price signal
-    If the state space includes energy_in_state, then it is +10 dim for yesterday's energy
+    def _create_observation_space(self):
+        """
+        Purpose: Returns the observation space. 
+        If the state space includes yesterday, then it is +10 dim for yesterday's price signal
+        If the state space includes energy_in_state, then it is +10 dim for yesterday's energy
 
-    Args:
-        None
+        Args:
+            None
 
-    Returns:
-        Action Space for environment based on action_space_str 
-    """
+        Returns:
+            Action Space for environment based on action_space_str 
+        """
 
-    #TODO: Normalize obs_space !
-    if(self.yesterday_in_state):
-        if(self.energy_in_state):
-            return spaces.Box(low=-np.inf, high=np.inf, shape=(30,), dtype=np.float32)
-        else:
-            return spaces.Box(low=-np.inf, high=np.inf, shape=(20,), dtype=np.float32)
+        # TODO: Normalize obs_space !
+        if self.yesterday_in_state:
+            if self.energy_in_state:
+                return spaces.Box(
+                    low=-np.inf, high=np.inf, shape=(30,), dtype=np.float32
+                )
+            else:
+                return spaces.Box(
+                    low=-np.inf, high=np.inf, shape=(20,), dtype=np.float32
+                )
 
-    else:
-        if self.energy_in_state:
-            return spaces.Box(low=-np.inf, high=np.inf, shape=(20,), dtype=np.float32)
         else:
-            return spaces.Box(low=-np.inf, high=np.inf, shape=(10,), dtype=np.float32)
+            if self.energy_in_state:
+                return spaces.Box(
+                    low=-np.inf, high=np.inf, shape=(20,), dtype=np.float32
+                )
+            else:
+                return spaces.Box(
+                    low=-np.inf, high=np.inf, shape=(10,), dtype=np.float32
+                )
 
-def _get_prices(self):
+    def _get_prices(self):
         """
         Purpose: Get grid price signals for the entire year (using past data from a building in Los Angeles as reference)
 
@@ -213,10 +255,7 @@ def _get_prices(self):
 
         """
         all_prices = []
-        print("--" * 10)
-        print(self.one_day)
-        print("--" * 10)
-        
+
         type_of_DR = self.pricing_type
 
         if self.one_day != -1:
@@ -224,82 +263,85 @@ def _get_prices(self):
             # Tweak One_Day Price Signal HERE
             price = price_signal(self.one_day, type_of_DR=type_of_DR)
             price = np.array(price[8:18])
-            if np.mean(price)==price[2]:
-                price[3:6]+=.3
+            if np.mean(price) == price[2]:
+                price[3:6] += 0.3
             price = np.maximum(0.01 * np.ones_like(price), price)
 
             for i in range(365):
                 all_prices.append(price)
         else:
             day = 0
-            for i in range(365):  
+            for i in range(365):
                 price = price_signal(day + 1, type_of_DR=type_of_DR)
                 price = np.array(price[8:18])
                 # put a floor on the prices so we don't have negative prices
-                if np.mean(price)==price[2]:
-                    price[3:6]+=.3
+                if np.mean(price) == price[2]:
+                    price[3:6] += 0.3
                 price = np.maximum(0.01 * np.ones_like(price), price)
                 all_prices.append(price)
                 day += 1
 
         return np.array(all_prices)
 
-def _simulate_human(self, action, agent):
+    def _simulate_human(self, action, agent):
         """
-        Purpose: Gets energy consumption from players given action from agent
+            Purpose: Gets energy consumption from players given action from agent
 
-        Args:
-            Action: 10-dim vector corresponding to action for each hour 8AM - 5PM
-        
-        Returns: 
-            Energy_consumption: Dictionary containing the energy usage by player and the average energy used in the office (key = "avg")
-        """
+            Args:
+                Action: 10-dim vector corresponding to action for each hour 8AM - 5PM
+            
+            Returns: 
+                Energy_consumption: Dictionary containing the energy usage by player and the average energy used in the office (key = "avg")
+            """
 
-        #Get players response to agent's actions
+        # Get players response to agent's actions
         player = agent
 
-        if (self.day_of_week_flag):
-            player_energy = player.get_response(action, day_of_week = self.day_of_week)
-        else: 
-            player_energy = player.get_response(action, day_of_week = None)
+        if self.day_of_week_flag:
+            player_energy = player.get_response(action, day_of_week=self.day_of_week)
+        else:
+            player_energy = player.get_response(action, day_of_week=None)
+
+            # Calculate energy consumption by player and in total (over the office)
 
-            #Calculate energy consumption by player and in total (over the office)
-            
         return player_energy
 
+    def step(self, action_n):
+        obs_n = []
+        reward_n = []
+        done_n = []
+        info_n = {"n": []}
 
-def step(self, action_n):
-    obs_n = []
-    reward_n = []
-    done_n = []
-    info_n = {"n": []}
-    
-    prev_price = self.prices[(self.day)]
-    self.day = (self.day + 1) % 365
-    self.curr_iter += 1
-    
-    if self.curr_iter > 0:
-        done = True
-        self.curr_iter = 0
-    else:
-        done = False
+        prev_price = self.prices[(self.day)]
+        self.day = (self.day + 1) % 365
+        self.curr_iter += 1
 
-    for i, agent in enumerate(self.agents):
-        agent.prev_energy = self._simulate_human(action_n[i], agent)
-        obs_n.append(self._get_observation_per_agent(agent))
-        reward_n.append(self._get_reward_per_agent(agent))
-        done_n.append(done)
-        info_n["n"].append({}) ## this was left as a formality in the stableBaselines, too 
+        if self.curr_iter > 0:
+            done = True
+            self.curr_iter = 0
+        else:
+            done = False
+
+        for i, agent in enumerate(self.agents.values()):
+            agent.prev_energy = self._simulate_human(action_n[i], agent)
+            obs_n.append(self._get_observation_per_agent(agent))
+            reward_n.append(self._get_reward_per_agent(agent))
+            done_n.append(done)
+            info_n["n"].append(
+                {}
+            )  ## this was left as a formality in the stableBaselines, too
 
-    return obs_n, reward_n, done_n, info_n
+        return obs_n, reward_n, done_n, info_n
 
-def _get_observation_per_agent(self, agent):
-        prev_price = self.prices[ (self.day - 1) % 365]
+    def _get_observation_per_agent(self, agent):
+        prev_price = self.prices[(self.day - 1) % 365]
         next_observation = self.prices[self.day]
 
-        if(self.yesterday_in_state):
+        if self.yesterday_in_state:
             if self.energy_in_state:
-                return np.concatenate((next_observation, np.concatenate((prev_price, agent.prev_energy))))
+                return np.concatenate(
+                    (next_observation, np.concatenate((prev_price, agent.prev_energy)))
+                )
             else:
                 return np.concatenate((next_observation, prev_price))
 
@@ -309,34 +351,36 @@ def _get_observation_per_agent(self, agent):
         else:
             return next_observation
 
-def _get_reward_per_agent(self, agent, reward_function = "scaled_cost_distance"):
+    def _get_reward_per_agent(self, agent, reward_function="scaled_cost_distance"):
         """
-        Purpose: Compute reward given price signal and energy consumption of the office
+            Purpose: Compute reward given price signal and energy consumption of the office
 
-        Args:
-            agent: DeterministicFunctionPerson object, simulated office twerker
-            Energy_consumption: Dictionary containing energy usage by player in the office and the average office energy usage
-        
-        Returns: 
-            Energy_consumption: Dictionary containing the energy usage by player and the average energy used in the office (key = "avg")
-        """
+            Args:
+                agent: DeterministicFunctionPerson object, simulated office twerker
+                Energy_consumption: Dictionary containing energy usage by player in the office and the average office energy usage
+            
+            Returns: 
+                Energy_consumption: Dictionary containing the energy usage by player and the average energy used in the office (key = "avg")
+            """
 
         price = self.prices[self.day]
         # get the reward from the player's output
         player_min_demand = agent.get_min_demand()
         player_max_demand = agent.get_max_demand()
         player_energy = agent.prev_energy
-        player_reward = Reward(player_energy, price, player_min_demand, player_max_demand)
+        player_reward = Reward(
+            player_energy, price, player_min_demand, player_max_demand
+        )
 
         if reward_function == "scaled_cost_distance":
             player_ideal_demands = player_reward.ideal_use_calculation()
             reward = player_reward.scaled_cost_distance(player_ideal_demands)
-
         elif reward_function == "log_cost_regularized":
             reward = player_reward.log_cost_regularized()
+        else:
+            raise Exception("reward_function not defined")
+        return reward
 
-        return total_reward
-    
     def reset(self):
         # reset world
         obs_n = []
@@ -344,8 +388,8 @@ def reset(self):
             obs_n.append(self._get_observation_per_agent(agent))
         return obs_n
 
-    def render(self, mode='human'):
+    def render(self, mode="human"):
         pass
 
     def close(self):
-        pass
\ No newline at end of file
+        pass
diff --git a/multiagent/agents.py b/multiagent/agents.py
index 84be92bd9..f7bfbd1fa 100644
--- a/multiagent/agents.py
+++ b/multiagent/agents.py
@@ -1,35 +1,34 @@
 import pandas as pd
 import numpy as np
-import cvxpy as cvx
 from sklearn.preprocessing import MinMaxScaler
+import random
 
-#### file to make the simulation of people that we can work with 
+#### file to make the simulation of people that we can work with
 
 
-class Person():
-	""" Person (parent?) class -- will define how the person takes in a points signal and puts out an energy signal 
+class Person:
+    """ Person (parent?) class -- will define how the person takes in a points signal and puts out an energy signal 
 	baseline_energy = a list or dataframe of values. This is data from SinBerBEST 
 	points_multiplier = an int which describes how sensitive each person is to points 
 
 	"""
 
-	def __init__(self, baseline_energy_df, points_multiplier = 1):
-		self.baseline_energy_df = baseline_energy_df
-		self.baseline_energy = np.array(self.baseline_energy_df["net_energy_use"])
-		self.points_multiplier = points_multiplier
-		
-		baseline_min = self.baseline_energy.min()
-		baseline_max = self.baseline_energy.max()
-		baseline_range = baseline_max - baseline_min
-		
-		self.min_demand = np.maximum(0, baseline_min + baseline_range * .05)
-		self.max_demand = np.maximum(0, baseline_min + baseline_range * .95)
-		self.silent = True
-		self.prev_energy = self.baseline_energy # basically a placeholder. 
+    def __init__(self, baseline_energy_df, points_multiplier=1):
+        self.baseline_energy_df = baseline_energy_df
+        self.baseline_energy = np.array(self.baseline_energy_df["net_energy_use"])
+        self.points_multiplier = points_multiplier
 
+        baseline_min = self.baseline_energy.min()
+        baseline_max = self.baseline_energy.max()
+        baseline_range = baseline_max - baseline_min
 
-	def energy_output_simple_linear(self, points):
-		"""Determines the energy output of the person, based on the formula:
+        self.min_demand = np.maximum(0, baseline_min + baseline_range * 0.05)
+        self.max_demand = np.maximum(0, baseline_min + baseline_range * 0.95)
+        self.silent = True
+        self.prev_energy = self.baseline_energy  # basically a placeholder.
+
+    def energy_output_simple_linear(self, points):
+        """Determines the energy output of the person, based on the formula:
 		
 		y[n] = -sum_{rolling window of 5} points + baseline_energy + noise
 
@@ -39,194 +38,210 @@ def energy_output_simple_linear(self, points):
 		For now, that's in 1 hour increments
 
 		"""
-		points_df = pd.DataFrame(points)
-		
-		points_effect = (
-			points_df
-				.rolling(
-						window = 5,
-						min_periods = 1)
-				.mean()
-			)
+        points_df = pd.DataFrame(points)
 
+        points_effect = points_df.rolling(window=5, min_periods=1).mean()
 
+        time = points_effect.shape[0]
+        energy_output = []
 
-		time = points_effect.shape[0]
-		energy_output= []
+        for t in range(time):
+            temp_energy = (
+                self.baseline_energy[t]
+                - points_effect.iloc[t] * self.points_multiplier
+                + np.random.normal(1)
+            )
+            energy_output.append(temp_energy)
 
-		for t in range(time):
-			temp_energy = self.baseline_energy[t] - points_effect.iloc[t]*self.points_multiplier + \
-				np.random.normal(1)
-			energy_output.append(temp_energy)
-			
-		return pd.DataFrame(energy_output)
+        return pd.DataFrame(energy_output)
 
-	def pure_linear_signal(self, points, baseline_day=0):
-		"""
+    def pure_linear_signal(self, points, baseline_day=0):
+        """
 		A linear person. The more points you give them, the less energy they will use
 		(within some bounds) for each hour. No rolling effects or anything. The simplest
 		signal. 
 		"""
 
-		# hack here to always grab the first day from the baseline_energy
-		output = np.array(self.baseline_energy)[baseline_day*24:baseline_day*24+10]
-
-		points_effect = np.array(points * self.points_multiplier)
-		output = output - points_effect
+        # hack here to always grab the first day from the baseline_energy
+        output = np.array(self.baseline_energy)[
+            baseline_day * 24 : baseline_day * 24 + 10
+        ]
 
-		# impose bounds/constraints
-		output = np.maximum(output, self.min_demand)
-		output = np.minimum(output, self.max_demand)
-		return output
+        points_effect = np.array(points * self.points_multiplier)
+        output = output - points_effect
 
+        # impose bounds/constraints
+        output = np.maximum(output, self.min_demand)
+        output = np.minimum(output, self.max_demand)
+        return output
 
+    def get_min_demand(self):
+        return self.min_demand
+        # return np.quantile(self.baseline_energy, .05)
 
-	def get_min_demand(self):
-		return self.min_demand
-		# return np.quantile(self.baseline_energy, .05)
+    def get_max_demand(self):
+        return self.max_demand
+        # return np.quantile(self.baseline_energy, .95)
 
-	def get_max_demand(self):
-		return self.max_demand
-		# return np.quantile(self.baseline_energy, .95)
 
 class FixedDemandPerson(Person):
+    def __init__(self, baseline_energy_df, points_multiplier=1):
+        super().__init__(baseline_energy_df, points_multiplier)
 
-	def __init__(self, baseline_energy_df, points_multiplier = 1):
-		super().__init__(baseline_energy_df, points_multiplier)
-
-
-	def demand_from_points(self, points, baseline_day=0):
-		# hack here to always grab the first day from the baseline_energy
-		output = np.array(self.baseline_energy)[baseline_day*24:baseline_day*24+10]
-		total_demand = np.sum(output)
+    def demand_from_points(self, points, baseline_day=0):
+        # hack here to always grab the first day from the baseline_energy
+        output = np.array(self.baseline_energy)[
+            baseline_day * 24 : baseline_day * 24 + 10
+        ]
+        total_demand = np.sum(output)
 
+        points_effect = np.array(points * self.points_multiplier)
+        output = output - points_effect
 
-		points_effect = np.array(points * self.points_multiplier)
-		output = output - points_effect
+        # scale to keep total_demand (almost) constant
+        # almost bc imposing bounds afterwards
+        output = output * (total_demand / np.sum(output))
 
-		# scale to keep total_demand (almost) constant
-		# almost bc imposing bounds afterwards
-		output = output * (total_demand/np.sum(output))
+        # impose bounds/constraints
+        output = np.maximum(output, self.min_demand)
+        output = np.minimum(output, self.max_demand)
 
-		# impose bounds/constraints
-		output = np.maximum(output, self.min_demand)
-		output = np.minimum(output, self.max_demand)
+        return output
 
-		return output
+    def adverserial_linear(self, points, baseline_day=0):
+        # hack here to always grab the first day from the baseline_energy
+        output = np.array(self.baseline_energy)[
+            baseline_day * 24 : baseline_day * 24 + 10
+        ]
+        total_demand = np.sum(output)
 
-	def adverserial_linear(self, points, baseline_day=0):
-		# hack here to always grab the first day from the baseline_energy
-		output = np.array(self.baseline_energy)[baseline_day*24:baseline_day*24+10]
-		total_demand = np.sum(output)
+        points_effect = np.array(points * self.points_multiplier)
+        output = output + points_effect
 
+        # scale to keep total_demand (almost) constant
+        # almost bc imposing bounds afterwards
+        output = output * (total_demand / np.sum(output))
 
-		points_effect = np.array(points * self.points_multiplier)
-		output = output + points_effect
+        # impose bounds/constraints
+        output = np.maximum(output, self.min_demand)
+        output = np.minimum(output, self.max_demand)
 
-		# scale to keep total_demand (almost) constant
-		# almost bc imposing bounds afterwards
-		output = output * (total_demand/np.sum(output))
+        return output
 
-		# impose bounds/constraints
-		output = np.maximum(output, self.min_demand)
-		output = np.minimum(output, self.max_demand)
-
-		return output
 
 class DeterministicFunctionPerson(Person):
+    def __init__(self, baseline_energy_df, points_multiplier=1, response="t"):
+        super().__init__(baseline_energy_df, points_multiplier)
+        self.response = response
+
+    def threshold_response_func(self, points):
+        points = np.array(points) * self.points_multiplier
+        threshold = np.mean(points)
+        return [p if p > threshold else 0 for p in points]
+
+    def exponential_response_func(self, points):
+        points = np.array(points) * self.points_multiplier
+        points_effect = [p ** 2 for p in points]
+
+        return points_effect
+
+    def sin_response_func(self, points):
+        points = np.array(points)
+        # n = np.max(points)
+        # points = [np.sin((float(i)/float(n))*np.pi) for i in points]
+        points = [np.sin(float(i) * np.pi) * self.points_multiplier for i in points]
+        points = points
+        return points
+
+    def linear_response_func(self, points):
+        return points * self.points_multiplier
+
+    def routine_output_transform(self, points_effect, baseline_day=0, day_of_week=None):
+        output = np.array(self.baseline_energy)[
+            baseline_day * 24 : baseline_day * 24 + 10
+        ]
+        total_demand = np.sum(output)
+
+        # scale to keep total_demand (almost) constant
+        # almost bc imposing bounds afterwards
+        output = output - points_effect
+        # output = output * (total_demand/np.sum(output))
+
+        # impose bounds/constraints
+        # output = np.maximum(output, self.min_demand)
+        # output = np.minimum(output, self.max_demand)
+        # return output
+
+        if day_of_week != None:
+            energy_resp = energy_resp * self.day_of_week_multiplier[day_of_week]
+
+        scaler = MinMaxScaler(feature_range=(self.min_demand, self.max_demand))
+        scaled_output = scaler.fit_transform(output.reshape(-1, 1))
+
+        return np.squeeze(scaled_output)
+
+    def threshold_response(self, points, day_of_week=None):
+        points_effect = self.threshold_response_func(points)
+        output = self.routine_output_transform(points_effect, day_of_week=day_of_week)
+        return output
+
+    def sin_response(self, points, day_of_week=None):
+        points_effect = self.sin_response_func(points)
+        output = self.routine_output_transform(points_effect, day_of_week=day_of_week)
+        return output
+
+    def exp_response(self, points, day_of_week=None):
+        points_effect = self.exponential_response_func(points)
+        output = self.routine_output_transform(points_effect, day_of_week=day_of_week)
+        return output
+
+    def threshold_exp_response(self, points, day_of_week=None):
+        points_effect = self.exponential_response_func(points)
+        points_effect = self.threshold_response_func(points_effect)
+        output = self.routine_output_transform(points_effect, day_of_week=day_of_week)
+        return output
+
+    def linear_response(self, points, day_of_week=None):
+        points_effect = points * self.points_multiplier
+        output = self.routine_output_transform(points_effect, day_of_week=day_of_week)
+        return output
+
+    def get_response(self, points, day_of_week=None):
+        if self.response == "t":
+            energy_resp = self.threshold_exp_response(points, day_of_week=day_of_week)
+        elif self.response == "s":
+            energy_resp = self.sin_response(points, day_of_week=day_of_week)
+        elif self.response == "l":
+            energy_resp = self.linear_response(points, day_of_week=day_of_week)
+        elif self.response == "m":
+            draw = random.randint(Start=0, Stop=2)
+            if draw == 0:
+                energy_resp = self.threshold_exp_response(
+                    points, day_of_week=day_of_week
+                )
+            elif draw == 1:
+                energy_resp = self.sin_response(points, day_of_week=day_of_week)
+            else:
+                energy_resp = self.linear_response(points, day_of_week=day_of_week)
+            energy_resp = self.mixed_response(points, day_of_week=day_of_week)
+        else:
+            raise NotImplementedError
+
+        return energy_resp
 
-	def __init__(self, baseline_energy_df, points_multiplier = 1, response = 't'):
-		super().__init__(baseline_energy_df, points_multiplier)
-		self.response = response
-
-	def threshold_response_func(self, points):
-		points = np.array(points) * self.points_multiplier
-		threshold = np.mean(points)
-		return [p if p>threshold else 0 for p in points]
-
-	def exponential_response_func(self, points):
-		points = np.array(points) * self.points_multiplier
-		points_effect = [p**2 for p in points]
-
-		return points_effect
-
-	def sin_response_func(self,points):
-		points = np.array(points) 
-		# n = np.max(points)
-		# points = [np.sin((float(i)/float(n))*np.pi) for i in points]	
-		points = [np.sin(float(i)*np.pi)*self.points_multiplier for i in points]	
-		points = points 
-		return points
-	
-	def linear_response_func(self,points):
-		return points * self.points_multiplier
-
-	def routine_output_transform(self, points_effect, baseline_day=0, day_of_week = None):
-		output = np.array(self.baseline_energy)[baseline_day*24:baseline_day*24+10]
-		total_demand = np.sum(output)
-
-		# scale to keep total_demand (almost) constant
-		# almost bc imposing bounds afterwards
-		output = output - points_effect
-		# output = output * (total_demand/np.sum(output))
-
-		# impose bounds/constraints
-		# output = np.maximum(output, self.min_demand)
-		# output = np.minimum(output, self.max_demand)
-		# return output
-
-
-		if(day_of_week != None):
-			energy_resp = energy_resp * self.day_of_week_multiplier[day_of_week]
-
-		scaler = MinMaxScaler(feature_range = (self.min_demand, self.max_demand))
-		scaled_output = scaler.fit_transform(output.reshape(-1, 1))
-
-		return np.squeeze(scaled_output)
-
-
-	def threshold_response(self, points, day_of_week = None):
-		points_effect = self.threshold_response_func(points)
-		output = self.routine_output_transform(points_effect, day_of_week = day_of_week)
-		return output
-
-	def sin_response(self, points, day_of_week = None):
-		points_effect = self.sin_response_func(points)
-		output = self.routine_output_transform(points_effect, day_of_week = day_of_week)
-		return output
-
-	def exp_response(self, points, day_of_week = None):
-		points_effect = self.exponential_response_func(points)
-		output = self.routine_output_transform(points_effect, day_of_week = day_of_week)
-		return output
-
-	def threshold_exp_response(self, points, day_of_week = None):
-		points_effect = self.exponential_response_func(points)
-		points_effect = self.threshold_response_func(points_effect)
-		output = self.routine_output_transform(points_effect, day_of_week = day_of_week)
-		return output
-
-	def linear_response(self, points, day_of_week = None):
-		points_effect = points*self.points_multiplier
-		output = self.routine_output_transform(points_effect, day_of_week = day_of_week)
-		return output
-	
-	def get_response(self, points, day_of_week=None):
-		if(self.response == 't'):
-			energy_resp = self.threshold_exp_response(points, day_of_week = day_of_week)
-		elif(self.response == 's'):
-			energy_resp =  self.sin_response(points, day_of_week = day_of_week)
-		elif(self.response == 'l'):
-			energy_resp =  self.linear_response(points, day_of_week = day_of_week)
-		else:
-			raise NotImplementedError
-
-		return energy_resp
 
 class RandomizedFunctionPerson(DeterministicFunctionPerson):
-	def __init__(self, baseline_energy_df, points_multiplier=1, response='t', low = 0, high = 50, distr = 'U'):
-	 
-	 """
+    def __init__(
+        self,
+        baseline_energy_df,
+        points_multiplier=1,
+        response="t",
+        low=0,
+        high=50,
+        distr="U",
+    ):
+
+        """
 		Adds Random Noise to DeterministicFunctionPerson energy output (for D.R. purposes)
 
 		New Args:
@@ -236,93 +251,108 @@ def __init__(self, baseline_energy_df, points_multiplier=1, response='t', low =
 
 		Note: For design purposes the random noise is updated at the end of each episode
 	 """
-	 #TODO: Multivariate distr??
+        # TODO: Multivariate distr??
 
-	 super().__init__(baseline_energy_df, points_multiplier=points_multiplier, response=response)
-	 
-	 distr = distr.upper()
-	 assert distr in ['G', 'U']
+        super().__init__(
+            baseline_energy_df, points_multiplier=points_multiplier, response=response
+        )
 
-	 self.response = response
-	 self.low = low
-	 self.high = high if high < self.max_demand else 50
-	 self.distr = distr
+        distr = distr.upper()
+        assert distr in ["G", "U"]
 
-	 self.noise = []
-	 self.update_noise()
+        self.response = response
+        self.low = low
+        self.high = high if high < self.max_demand else 50
+        self.distr = distr
 
-	def update_noise(self):
-		if(self.distr == 'G'):
-			#TODO: Update how to sample from Gausian
-			self.noise = np.random.normal(loc = (self.low + self.high) / 2, scale = 10, size = 10)
-		
-		elif(self.distr == 'U'):
-			self.noise = np.random.uniform(low = self.low, high=self.high, size = 10)
+        self.noise = []
+        self.update_noise()
 
+    def update_noise(self):
+        if self.distr == "G":
+            # TODO: Update how to sample from Gausian
+            self.noise = np.random.normal(
+                loc=(self.low + self.high) / 2, scale=10, size=10
+            )
 
-	def exponential_response_func(self, points):
-		points = np.array(points) * self.points_multiplier
-		points_effect = [p**2 for p in points]
+        elif self.distr == "U":
+            self.noise = np.random.uniform(low=self.low, high=self.high, size=10)
 
-		return points_effect + self.noise
+    def exponential_response_func(self, points):
+        points = np.array(points) * self.points_multiplier
+        points_effect = [p ** 2 for p in points]
 
-	def sin_response_func(self,points):
-		points = np.array(points) 
-		# n = np.max(points)
-		# points = [np.sin((float(i)/float(n))*np.pi) for i in points]	
-		points = [np.sin(float(i)*np.pi)*self.points_multiplier for i in points]	
-		points = points 
-		return points + self.noise
-	
-	def linear_response_func(self,points):
-		return points * self.points_multiplier + self.noise
+        return points_effect + self.noise
 
+    def sin_response_func(self, points):
+        points = np.array(points)
+        # n = np.max(points)
+        # points = [np.sin((float(i)/float(n))*np.pi) for i in points]
+        points = [np.sin(float(i) * np.pi) * self.points_multiplier for i in points]
+        points = points
+        return points + self.noise
 
-# utkarsha's person 
+    def linear_response_func(self, points):
+        return points * self.points_multiplier + self.noise
 
-class CurtailandShiftPerson(Person):
-	def __init__(self, baseline_energy_df, points_multiplier = 1):
-		super().__init__(baseline_energy_df, points_multiplier)
-		self.shiftableLoadFraction = 0.1
-		self.shiftByHours = 3
-		self.curtailableLoadFraction = 0.1
-		self.maxCurtailHours = 3 #Person willing to curtail for no more than these hours
-
-	def shiftedLoad(self, points, baseline_day=0, day_of_week=None):
-		output = np.array(self.baseline_energy)[baseline_day*24:baseline_day*24+10]
-		points = np.array(points) * self.points_multiplier
-		shiftableLoad = self.shiftableLoadFraction*output
-		shiftByHours = self.shiftByHours
-		
-		# 10 hour day. Rearrange the sum of shiftableLoad into these hours by treating points as the 'price' at that hour
-		# Load can be shifted by a max of shiftByHours (default = 3 hours)
-		# For each hour, calculate the optimal hour to shift load to within +- 3 hours
-		shiftedLoad = np.zeros(10)
-		for hour in range(10):
-			candidatePrices = points[max(hour-shiftByHours,0): min(hour+shiftByHours,9)+1]
-			shiftToHour = max(hour-shiftByHours,0) + np.argmin(candidatePrices)
-			shiftedLoad[shiftToHour] += shiftableLoad[hour]		
-		return shiftedLoad
-
-	def curtailedLoad(self, points, baseline_day=0, day_of_week=None):
-		output = np.array(self.baseline_energy)[baseline_day*24:baseline_day*24+10]
-		points = np.array(points) * self.points_multiplier
-		curtailableLoad = self.curtailableLoadFraction*output
-		maxPriceHours = np.argsort(points)[0:self.maxCurtailHours]
-		for hour in maxPriceHours:
-			curtailableLoad[hour] = 0
-		return curtailableLoad
-
-	def get_response(self, points, day_of_week=None):
-		baseline_day = 0
-		output = np.array(self.baseline_energy)[baseline_day*24:baseline_day*24+10]
-		energy_resp = output*(1 - self.curtailableLoadFraction - self.shiftableLoadFraction) + self.curtailedLoad(points) + self.shiftedLoad(points)
-		
-			
-		self.min_demand = np.maximum(0, min(energy_resp))
-		self.max_demand = np.maximum(0, max(energy_resp))
 
-		return energy_resp
+# utkarsha's person
 
 
+class CurtailandShiftPerson(Person):
+    def __init__(self, baseline_energy_df, points_multiplier=1):
+        super().__init__(baseline_energy_df, points_multiplier)
+        self.shiftableLoadFraction = 0.1
+        self.shiftByHours = 3
+        self.curtailableLoadFraction = 0.1
+        self.maxCurtailHours = (
+            3  # Person willing to curtail for no more than these hours
+        )
+
+    def shiftedLoad(self, points, baseline_day=0, day_of_week=None):
+        output = np.array(self.baseline_energy)[
+            baseline_day * 24 : baseline_day * 24 + 10
+        ]
+        points = np.array(points) * self.points_multiplier
+        shiftableLoad = self.shiftableLoadFraction * output
+        shiftByHours = self.shiftByHours
+
+        # 10 hour day. Rearrange the sum of shiftableLoad into these hours by treating points as the 'price' at that hour
+        # Load can be shifted by a max of shiftByHours (default = 3 hours)
+        # For each hour, calculate the optimal hour to shift load to within +- 3 hours
+        shiftedLoad = np.zeros(10)
+        for hour in range(10):
+            candidatePrices = points[
+                max(hour - shiftByHours, 0) : min(hour + shiftByHours, 9) + 1
+            ]
+            shiftToHour = max(hour - shiftByHours, 0) + np.argmin(candidatePrices)
+            shiftedLoad[shiftToHour] += shiftableLoad[hour]
+        return shiftedLoad
+
+    def curtailedLoad(self, points, baseline_day=0, day_of_week=None):
+        output = np.array(self.baseline_energy)[
+            baseline_day * 24 : baseline_day * 24 + 10
+        ]
+        points = np.array(points) * self.points_multiplier
+        curtailableLoad = self.curtailableLoadFraction * output
+        maxPriceHours = np.argsort(points)[0 : self.maxCurtailHours]
+        for hour in maxPriceHours:
+            curtailableLoad[hour] = 0
+        return curtailableLoad
+
+    def get_response(self, points, day_of_week=None):
+        baseline_day = 0
+        output = np.array(self.baseline_energy)[
+            baseline_day * 24 : baseline_day * 24 + 10
+        ]
+        energy_resp = (
+            output * (1 - self.curtailableLoadFraction - self.shiftableLoadFraction)
+            + self.curtailedLoad(points)
+            + self.shiftedLoad(points)
+        )
+
+        self.min_demand = np.maximum(0, min(energy_resp))
+        self.max_demand = np.maximum(0, max(energy_resp))
+
+        return energy_resp
 
diff --git a/multiagent/reward.py b/multiagent/reward.py
index d0dd724d6..0b09bea35 100644
--- a/multiagent/reward.py
+++ b/multiagent/reward.py
@@ -1,15 +1,16 @@
 import cvxpy as cvx
+
 # import osqp
 import numpy as np
 from sklearn.preprocessing import MinMaxScaler
 
-#### file to calculate the rewards. Meant to be modular: 
+#### file to calculate the rewards. Meant to be modular:
 #### class Rewards should have several different functions by Dec 2019
 
 
-class Reward():
-	def __init__(self, energy_use, prices, min_demand, max_demand):
-		"""
+class Reward:
+    def __init__(self, energy_use, prices, min_demand, max_demand):
+        """
 		Args: 
 			energy_use: list returned by Person class signifying energy use 
 			prices: list returned by grid signifying cost throughout day 
@@ -17,74 +18,76 @@ def __init__(self, energy_use, prices, min_demand, max_demand):
 			max_demand: value computed by Person class signifying maximum energy use long term
 		"""
 
-		self.energy_use = np.array(energy_use)
-		self.prices = np.array(prices) 
-		self._num_timesteps = energy_use.shape[0]
-		self.min_demand = np.min(energy_use) # min_demand
-		self.max_demand = np.max(energy_use) # max_demand
-		self.baseline_max_demand = 159.32
+        self.energy_use = np.array(energy_use)
+        self.prices = np.array(prices)
+        self._num_timesteps = energy_use.shape[0]
+        self.min_demand = np.min(energy_use)  # min_demand
+        self.max_demand = np.max(energy_use)  # max_demand
+        self.baseline_max_demand = 159.32
 
-		assert round(self.max_demand) == round(max_demand), "The max demand that the player is using and the optimization is using is not the same"
-		assert round(self.min_demand) == round(min_demand), "The min demand that the player is using and the optimization is using is not the same"
+        assert round(self.max_demand) == round(
+            max_demand
+        ), "The max demand that the player is using and the optimization is using is not the same"
+        assert round(self.min_demand) == round(
+            min_demand
+        ), "The min demand that the player is using and the optimization is using is not the same"
 
-		self.total_demand = np.sum(energy_use)
+        self.total_demand = np.sum(energy_use)
 
-	def ideal_use_calculation(self):
-		"""
+    def ideal_use_calculation(self):
+        """
 		Computes an optimization of demand according to price 
 
 		returns: np.array of ideal energy demands given a price signal 
 		"""
 
-		demands = cvx.Variable(self._num_timesteps)
-		min_demand = cvx.Parameter()
-		max_demand = cvx.Parameter()
-		total_demand = cvx.Parameter()
-		prices = cvx.Parameter(self._num_timesteps)
-
-		min_demand = self.min_demand
-		max_demand = self.max_demand 
-		total_demand = self.total_demand
-
-		while (max_demand * 10 < total_demand):
-			print("multiplying demand to make optimization work")
-			print("max_demand: " + str(max_demand))
-			print("total_demand: " + str(total_demand))
-			print("energy_use")
-			print(self.energy_use)
-			max_demand *= 1.1
-
-		prices = self.prices
-		constraints = [cvx.sum(demands, axis=0, keepdims=True) == total_demand]
-		# constraints = [np.ones(self._num_timesteps).T * demands == total_demand]
-		for i in range(self._num_timesteps):
-			constraints += [demands[i] <= max_demand]
-			constraints += [min_demand <= demands[i]]
-			# if i != 0:
-			# 	constraints += [cvx.abs(demands[i] - demands[i-1]) <= 100]	
-
-
-		objective = cvx.Minimize(demands.T * prices)
-		problem = cvx.Problem(objective, constraints)
-
-		problem.solve(solver = cvx.ECOS, verbose=False)
-		return np.array(demands.value)
-
-
-	def log_cost(self):
-		"""
+        demands = cvx.Variable(self._num_timesteps)
+        min_demand = cvx.Parameter()
+        max_demand = cvx.Parameter()
+        total_demand = cvx.Parameter()
+        prices = cvx.Parameter(self._num_timesteps)
+
+        min_demand = self.min_demand
+        max_demand = self.max_demand
+        total_demand = self.total_demand
+
+        while max_demand * 10 < total_demand:
+            print("multiplying demand to make optimization work")
+            print("max_demand: " + str(max_demand))
+            print("total_demand: " + str(total_demand))
+            print("energy_use")
+            print(self.energy_use)
+            max_demand *= 1.1
+
+        prices = self.prices
+        constraints = [cvx.sum(demands, axis=0, keepdims=True) == total_demand]
+        # constraints = [np.ones(self._num_timesteps).T * demands == total_demand]
+        for i in range(self._num_timesteps):
+            constraints += [demands[i] <= max_demand]
+            constraints += [min_demand <= demands[i]]
+            # if i != 0:
+            # 	constraints += [cvx.abs(demands[i] - demands[i-1]) <= 100]
+
+        objective = cvx.Minimize(demands.T * prices)
+        problem = cvx.Problem(objective, constraints)
+
+        problem.solve(solver=cvx.ECOS, verbose=False)
+        return np.array(demands.value)
+
+    def log_cost(self):
+        """
 		Scales energy_use to be between min and max energy demands (this is repeated 
 		in agent.routine_output_trasform), and then returns the simple total cost. 
 
 		"""
 
-		scaler = MinMaxScaler(feature_range = (self.min_demand, self.max_demand))
-		scaled_energy = np.squeeze(scaler.fit_transform(self.energy_use.reshape(-1, 1)))
+        scaler = MinMaxScaler(feature_range=(self.min_demand, self.max_demand))
+        scaled_energy = np.squeeze(scaler.fit_transform(self.energy_use.reshape(-1, 1)))
 
-		return -np.log(np.dot(scaled_energy, self.prices))
+        return -np.log(np.dot(scaled_energy, self.prices))
 
-	def log_cost_regularized(self):
-		"""
+    def log_cost_regularized(self):
+        """
 		Scales energy_use to be between min and max energy demands (this is repeated 
 		in agent.routine_output_trasform), and then returns the simple total cost. 
 
@@ -92,26 +95,21 @@ def log_cost_regularized(self):
 
 		"""
 
-		scaler = MinMaxScaler(feature_range = (self.min_demand, self.max_demand))
-		## ? 
-		scaled_energy = np.squeeze(scaler.fit_transform(self.energy_use.reshape(-1, 1)))
-
+        scaler = MinMaxScaler(feature_range=(self.min_demand, self.max_demand))
+        ## ?
+        scaled_energy = np.squeeze(scaler.fit_transform(self.energy_use.reshape(-1, 1)))
 
+        return -np.log(np.dot(scaled_energy, self.prices)) - 10 * (
+            np.sum(self.energy_use) < (10 * (0.5 * self.baseline_max_demand))
+        )  # -
+        # 10 * ())
+        # sigmoid between 10 and 20 so there's a smooth transition
+        # - lambd * (difference b/w energy(t)) - put a bound on
+        # play with the lipschitz constant
+        # [10, 20, 10, 10, 10, ]
 
-		return (-np.log(np.dot(scaled_energy, self.prices)) - 
-			10 * (np.sum(self.energy_use) < (10 * (.5 * self.baseline_max_demand))))  # -
-			#10 * ())
-																			# sigmoid between 10 and 20 so there's a smooth transition 
-																			# - lambd * (difference b/w energy(t)) - put a bound on 
-																			# play with the lipschitz constant 
-																			# [10, 20, 10, 10, 10, ]
-
-
-																	 
-
-
-	def neg_distance_from_ideal(self, demands):
-		"""
+    def neg_distance_from_ideal(self, demands):
+        """
 		args: 
 			demands: np.array() of demands from ideal_use_calculation()
 
@@ -119,84 +117,87 @@ def neg_distance_from_ideal(self, demands):
 			a numerical distance metric, negated
 		"""
 
-		return -((demands - self.energy_use)**2).sum()
+        return -((demands - self.energy_use) ** 2).sum()
 
-	def cost_distance(self, ideal_demands):
-		"""
+    def cost_distance(self, ideal_demands):
+        """
 		args: 
 			demands: np.array() of demands from ideal_use_calculation()
 
 		returns: 
 			a cost-based distance metric, negated
 		"""
-		current_cost = np.dot(self.prices, self.energy_use)
-		ideal_cost = np.dot(self.prices, ideal_demands)
+        current_cost = np.dot(self.prices, self.energy_use)
+        ideal_cost = np.dot(self.prices, ideal_demands)
 
-		cost_difference = ideal_cost - current_cost
-		
-		return cost_difference
+        cost_difference = ideal_cost - current_cost
 
-	def log_cost_distance(self, ideal_demands):
-		"""
+        return cost_difference
+
+    def log_cost_distance(self, ideal_demands):
+        """
 		args: 
 			demands: np.array() of demands from ideal_use_calculation()
 
 		returns: 
 			the log of the cost distance
 		"""
-		current_cost = np.dot(self.prices, self.energy_use)
-		ideal_cost = np.dot(self.prices, ideal_demands)
-
-		cost_difference = ideal_cost - current_cost
-		
-		#TODO ENSURE THAT COST DIFFERENCE IS < 0
-		if cost_difference < 0: 
-			return -np.log(-cost_difference)
-		else:
-			print("WEIRD REWARD ALERT. IDEAL COST >= CURRENT COST. returning reward of 10")
-			return 10
-
-	def scaled_cost_distance(self, ideal_demands):
-		"""
+        current_cost = np.dot(self.prices, self.energy_use)
+        ideal_cost = np.dot(self.prices, ideal_demands)
+
+        cost_difference = ideal_cost - current_cost
+
+        # TODO ENSURE THAT COST DIFFERENCE IS < 0
+        if cost_difference < 0:
+            return -np.log(-cost_difference)
+        else:
+            print(
+                "WEIRD REWARD ALERT. IDEAL COST >= CURRENT COST. returning reward of 10"
+            )
+            return 10
+
+    def scaled_cost_distance(self, ideal_demands):
+        """
 		args: 
 			demands: np.array() of demands from ideal_use_calculation()
 
 		returns: 
 			a cost-based distance metric normalized by total ideal cost
 		"""
-		
-		current_cost = np.dot(self.prices, self.energy_use)
-		ideal_cost = np.dot(self.prices, ideal_demands)
-
-		cost_difference = ideal_cost - current_cost
-
-		# print("--" * 10)
-		# print("ideal cost")
-		# print(ideal_cost)
-		# print("--" * 10)
-		# print("prices")
-		# print(self.prices)
-		# print("--" * 10)		
-		# print("current_cost")
-		# print(current_cost)
-
-		if cost_difference > 0 or ideal_cost < 0:
-			print("--" * 10)
-			print("Problem with reward")
-			# print("min_demand: " + str(self.min_demand))
-			# print("max_demand: " + str(self.max_demand))
-			# print("--" * 10)
-			print("prices")
-			print(self.prices)
-			# print("current_cost")
-			# print(current_cost)
-			# print("--" * 10)
-			# print("ideal_cost")
-			# print(ideal_cost)
-			# print("ideal_demands")
-			# print(ideal_demands)
-			# print("energy demand")
-			# print(self.energy_use)
-			print("taking the neg abs value so that it stays the same sign.")
-
-		return  -np.abs(cost_difference/ideal_cost)
+
+        current_cost = np.dot(self.prices, self.energy_use)
+        ideal_cost = np.dot(self.prices, ideal_demands)
+
+        cost_difference = ideal_cost - current_cost
+
+        # print("--" * 10)
+        # print("ideal cost")
+        # print(ideal_cost)
+        # print("--" * 10)
+        # print("prices")
+        # print(self.prices)
+        # print("--" * 10)
+        # print("current_cost")
+        # print(current_cost)
+
+        if cost_difference > 0 or ideal_cost < 0:
+            print("--" * 10)
+            print("Problem with reward")
+            # print("min_demand: " + str(self.min_demand))
+            # print("max_demand: " + str(self.max_demand))
+            # print("--" * 10)
+            print("prices")
+            print(self.prices)
+            # print("current_cost")
+            # print(current_cost)
+            # print("--" * 10)
+            # print("ideal_cost")
+            # print(ideal_cost)
+            # print("ideal_demands")
+            # print(ideal_demands)
+            # print("energy demand")
+            # print(self.energy_use)
+            print("taking the neg abs value so that it stays the same sign.")
+
+        return -np.abs(cost_difference / ideal_cost)
+
diff --git a/multiagent/utils.py b/multiagent/utils.py
index 19637d732..1cb1d8877 100644
--- a/multiagent/utils.py
+++ b/multiagent/utils.py
@@ -4,13 +4,14 @@
 from scipy.optimize import minimize
 import os
 import matplotlib.pyplot as plt
-from sklearn.preprocessing import MinMaxScaler 
+from sklearn.preprocessing import MinMaxScaler
+import os
 
-#data_path = os.path.join(os.getcwd(), "baselines", "behavioral_sim", "building_data.csv")
+# data_path = os.path.join(os.getcwd(), "baselines", "behavioral_sim", "building_data.csv")
 # csv_path = os.path.dirname(os.path.realpath(__file__)) + "/building_data.csv"
 
 
-def price_signal(day = 45, type_of_DR = "real_time_pricing"):
+def price_signal(day=45, type_of_DR="real_time_pricing"):
 
     """
     Utkarsha's work on price signal from a building with demand and solar
@@ -21,7 +22,11 @@ def price_signal(day = 45, type_of_DR = "real_time_pricing"):
     """
     csv_path = "building_data.csv"
     csv_path_2 = "../gym-socialgame/gym_socialgame/envs/building_data.csv"
-    csv_path_3 = "/global/home/users/lucas_spangher/transactive_control/gym-socialgame/gym_socialgame/envs/building_data.csv"
+    current_dir = os.path.dirname(os.path.realpath(__file__))
+    csv_path_3 = os.path.join(
+        current_dir,
+        "../../transactive_control/gym-socialgame/gym_socialgame/envs/building_data.csv",
+    )
     try:
         df = pd.read_csv(csv_path)
     except:
@@ -30,35 +35,39 @@ def price_signal(day = 45, type_of_DR = "real_time_pricing"):
         except:
             df = pd.read_csv(csv_path_3)
 
-    pv = 0.001*np.array(df['PV (W)'].tolist())
-    price = np.array(df['Price( $ per kWh)'].tolist())
-    demand = np.nan_to_num(np.array(df['Office_Elizabeth (kWh)'].tolist()))
-    demand_charge = 10/30 # 10$/kW per month
+    pv = 0.001 * np.array(df["PV (W)"].tolist())
+    price = np.array(df["Price( $ per kWh)"].tolist())
+    demand = np.nan_to_num(np.array(df["Office_Elizabeth (kWh)"].tolist()))
+    demand_charge = 10 / 30  # 10$/kW per month
 
-    pvsize = 0 #Assumption
-    netdemand = demand - pvsize*pv
+    pvsize = 0  # Assumption
+    netdemand = demand - pvsize * pv
 
     # Data starts at 5 am on Jan 1
-    netdemand_24 = netdemand[24*day-5:24*day+19]
-    price_24 = price[24*day-5:24*day+19]
-    pv_24 = pv[24*day-5:24*day+19]
-    demand_24 = demand[24*day-5:24*day+19]
+    netdemand_24 = netdemand[24 * day - 5 : 24 * day + 19]
+    price_24 = price[24 * day - 5 : 24 * day + 19]
+    pv_24 = pv[24 * day - 5 : 24 * day + 19]
+    demand_24 = demand[24 * day - 5 : 24 * day + 19]
 
     # Calculate optimal load scheduling. 90% of load is fixed, 10% is controllable.
     def optimise_24h(netdemand_24, price_24):
-        currentcost = netdemand_24@price_24
+        currentcost = netdemand_24 @ price_24
+
+        fixed_load = 0.9 * netdemand_24
+        controllable_load = sum(0.1 * netdemand_24)
 
-        fixed_load = 0.9*netdemand_24
-        controllable_load = sum(0.1*netdemand_24)
-        
         def objective(x):
             load = fixed_load + x
-            cost = np.multiply(price_24,load)
+            cost = np.multiply(price_24, load)
             # Negative demand means zero cost, not negative cost
             # Adding L1 regularisation to penalise shifting of occupant demand
             lambd = 0.005
             # Demand charge: attached to peak demand
-            return sum(np.maximum(cost,0)) + demand_charge*max(load) + lambd*sum(abs(x-0.1*netdemand_24))
+            return (
+                sum(np.maximum(cost, 0))
+                + demand_charge * max(load)
+                + lambd * sum(abs(x - 0.1 * netdemand_24))
+            )
 
         def constraint_sumofx(x):
             return sum(x) - controllable_load
@@ -68,48 +77,40 @@ def constraint_x_positive(x):
 
         x0 = np.zeros(24)
         cons = [
-            {'type':'eq', 'fun': constraint_sumofx},
-            {'type':'ineq', 'fun':constraint_x_positive}
+            {"type": "eq", "fun": constraint_sumofx},
+            {"type": "ineq", "fun": constraint_x_positive},
         ]
         sol = minimize(objective, x0, constraints=cons)
         return sol
 
-    if type_of_DR == "real_time_pricing":    
-        sol = optimise_24h(netdemand_24,price_24)
-        x = sol['x']
-        diff = x - 0.1*netdemand_24
+    if type_of_DR == "real_time_pricing":
+        sol = optimise_24h(netdemand_24, price_24)
+        x = sol["x"]
+        diff = x - 0.1 * netdemand_24
         return -diff - min(-diff)
 
     elif type_of_DR == "time_of_use":
-        if (np.mean(price_24[8:18]) == price_24[9]):
-            price_24[13:16]+=3
+        if np.mean(price_24[8:18]) == price_24[9]:
+            price_24[13:16] += 3
         return price_24
     else:
         return "error!!!"
 
 
 def fourier_points_from_action(action, points_length, fourier_basis_size):
-    assert fourier_basis_size == (action.size + 1) // 2, "Incorrect fourier basis size for actions"
-    root_points = (action[0]/2)*np.ones(points_length)
+    assert (
+        fourier_basis_size == (action.size + 1) // 2
+    ), "Incorrect fourier basis size for actions"
+    root_points = (action[0] / 2) * np.ones(points_length)
     inp = np.linspace(0, 1, points_length)
     for k in range(1, fourier_basis_size):
-        ak, bk = action[2*k - 1], action[2*k]
-        root_points += ak * np.sin(2*np.pi*k*inp) + bk * np.cos(2*np.pi*k*inp)
+        ak, bk = action[2 * k - 1], action[2 * k]
+        root_points += ak * np.sin(2 * np.pi * k * inp) + bk * np.cos(
+            2 * np.pi * k * inp
+        )
 
-    points = root_points**2
-    #TODO: More elegant solution than clipping
+    points = root_points ** 2
+    # TODO: More elegant solution than clipping
     points = np.clip(points, 0, 10)
     return points
 
-
-
-
-
-
-
-
-
-
-
-
-

From f58136f10f81dcd0e41c6181403060c34640db39 Mon Sep 17 00:00:00 2001
From: Manan Khattar <khattar.manan@gmail.com>
Date: Tue, 15 Dec 2020 14:19:04 -0800
Subject: [PATCH 5/6] pre 12/15 work

---
 multiagent/agents.py | 11 -----------
 multiagent/reward.py |  2 +-
 2 files changed, 1 insertion(+), 12 deletions(-)

diff --git a/multiagent/agents.py b/multiagent/agents.py
index f7bfbd1fa..350ae904b 100644
--- a/multiagent/agents.py
+++ b/multiagent/agents.py
@@ -1,7 +1,6 @@
 import pandas as pd
 import numpy as np
 from sklearn.preprocessing import MinMaxScaler
-import random
 
 #### file to make the simulation of people that we can work with
 
@@ -214,19 +213,9 @@ def get_response(self, points, day_of_week=None):
         elif self.response == "l":
             energy_resp = self.linear_response(points, day_of_week=day_of_week)
         elif self.response == "m":
-            draw = random.randint(Start=0, Stop=2)
-            if draw == 0:
-                energy_resp = self.threshold_exp_response(
-                    points, day_of_week=day_of_week
-                )
-            elif draw == 1:
-                energy_resp = self.sin_response(points, day_of_week=day_of_week)
-            else:
-                energy_resp = self.linear_response(points, day_of_week=day_of_week)
             energy_resp = self.mixed_response(points, day_of_week=day_of_week)
         else:
             raise NotImplementedError
-
         return energy_resp
 
 
diff --git a/multiagent/reward.py b/multiagent/reward.py
index 0b09bea35..f55b80527 100644
--- a/multiagent/reward.py
+++ b/multiagent/reward.py
@@ -65,7 +65,7 @@ def ideal_use_calculation(self):
         for i in range(self._num_timesteps):
             constraints += [demands[i] <= max_demand]
             constraints += [min_demand <= demands[i]]
-            # if i != 0:
+            # if i != 0:y
             # 	constraints += [cvx.abs(demands[i] - demands[i-1]) <= 100]
 
         objective = cvx.Minimize(demands.T * prices)

From 17f2073c791837d7248980149167e79783772d55 Mon Sep 17 00:00:00 2001
From: Manan Khattar <khattar.manan@gmail.com>
Date: Thu, 17 Dec 2020 02:24:17 -0800
Subject: [PATCH 6/6] 12/16

---
 multiagent/SG_environment.py | 21 ++++++++++++++++++---
 multiagent/agents.py         |  9 +++++++++
 multiagent/reward.py         |  4 ++--
 3 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/multiagent/SG_environment.py b/multiagent/SG_environment.py
index de5e2e7dd..5d04e5886 100644
--- a/multiagent/SG_environment.py
+++ b/multiagent/SG_environment.py
@@ -307,6 +307,19 @@ def _simulate_human(self, action, agent):
         return player_energy
 
     def step(self, action_n):
+        for i, action in enumerate(action_n):
+            action = np.asarray(action)
+            if self.action_space_string == "continuous":
+                action = np.clip(action, 0, 10)
+                action_n[i] = action
+            else:
+                raise Exception("Wrong action_space_string")
+
+        print("--" * 10)
+        print("--" * 10)
+        print("clipped action")
+        print(action_n)
+
         obs_n = []
         reward_n = []
         done_n = []
@@ -316,7 +329,7 @@ def step(self, action_n):
         self.day = (self.day + 1) % 365
         self.curr_iter += 1
 
-        if self.curr_iter > 0:
+        if self.curr_iter > 0:  ### could change
             done = True
             self.curr_iter = 0
         else:
@@ -325,7 +338,9 @@ def step(self, action_n):
         for i, agent in enumerate(self.agents.values()):
             agent.prev_energy = self._simulate_human(action_n[i], agent)
             obs_n.append(self._get_observation_per_agent(agent))
-            reward_n.append(self._get_reward_per_agent(agent))
+            reward_n.append(
+                self._get_reward_per_agent(agent, reward_function=self.reward_function)
+            )
             done_n.append(done)
             info_n["n"].append(
                 {}
@@ -384,7 +399,7 @@ def _get_reward_per_agent(self, agent, reward_function="scaled_cost_distance"):
     def reset(self):
         # reset world
         obs_n = []
-        for agent in self.agents:
+        for agent in self.agents.values():
             obs_n.append(self._get_observation_per_agent(agent))
         return obs_n
 
diff --git a/multiagent/agents.py b/multiagent/agents.py
index 350ae904b..173099157 100644
--- a/multiagent/agents.py
+++ b/multiagent/agents.py
@@ -1,3 +1,4 @@
+import pdb
 import pandas as pd
 import numpy as np
 from sklearn.preprocessing import MinMaxScaler
@@ -202,6 +203,14 @@ def threshold_exp_response(self, points, day_of_week=None):
 
     def linear_response(self, points, day_of_week=None):
         points_effect = points * self.points_multiplier
+        if (
+            np.isnan(points_effect).any()
+            or np.isneginf(points_effect).any()
+            or np.isposinf(points_effect).any()
+        ):
+            import pdb
+
+            pdb.set_trace()
         output = self.routine_output_transform(points_effect, day_of_week=day_of_week)
         return output
 
diff --git a/multiagent/reward.py b/multiagent/reward.py
index f55b80527..97ee222ad 100644
--- a/multiagent/reward.py
+++ b/multiagent/reward.py
@@ -1,5 +1,3 @@
-import cvxpy as cvx
-
 # import osqp
 import numpy as np
 from sklearn.preprocessing import MinMaxScaler
@@ -35,6 +33,8 @@ def __init__(self, energy_use, prices, min_demand, max_demand):
         self.total_demand = np.sum(energy_use)
 
     def ideal_use_calculation(self):
+        import cvxpy as cvx
+
         """
 		Computes an optimization of demand according to price