From e58062903a923bb31d0240edbae6ea743747ff48 Mon Sep 17 00:00:00 2001 From: Manan Khattar Date: Tue, 8 Dec 2020 22:10:08 -0800 Subject: [PATCH 1/6] 12/8 work --- multiagent/SG_environment.py | 365 ++++++++++++++++++++++++++++ multiagent/environment.py | 127 ++++++---- multiagent/scenarios/simple_push.py | 45 +++- 3 files changed, 479 insertions(+), 58 deletions(-) create mode 100644 multiagent/SG_environment.py diff --git a/multiagent/SG_environment.py b/multiagent/SG_environment.py new file mode 100644 index 000000000..f7d09d581 --- /dev/null +++ b/multiagent/SG_environment.py @@ -0,0 +1,365 @@ +import gym +from gym import spaces +from gym.envs.registration import EnvSpec +import numpy as np +from multiagent.multi_discrete import MultiDiscrete + +import numpy as np +import random + +import tensorflow as tf + +from gym_socialgame.envs.utils import price_signal, fourier_points_from_action +from gym_socialgame.envs.agents import * +from gym_socialgame.envs.reward import Reward + +import pickle + +# environment for all agents in the multiagent world +# currently code assumes that no agents will be created/destroyed at runtime! +class MultiAgentEnv(gym.Env): + metadata = { + 'render.modes' : ['human', 'rgb_array'] # ?? necc? + } + + def __init__( + self, + action_space_string = "continuous", + response_type_string = "l", + number_of_participants = 10, + one_day = 0, + energy_in_state = False, + yesterday_in_state = False, + day_of_week = False, + pricing_type="TOU", + reward_function = "scaled_cost_distance", + fourier_basis_size = 10, + ): + + # self.world = world ## does world appear elsewhere + + player_dict = self._create_agents() + self.agents = player_dict # or transform however needed # world.policy_agents + self.n = len(self.agents) + + self.discrete_action_space = False + self.discrete_action_input = False + self.force_discrete_action = False + + self.shared_reward = False ## right? + self.time = 0 + + self.points_length = 10 + self.action_length = 10 + self.action_subspace = 3 + + self.action_space = [] + + + # (our inputs) + self.prev_energy = np.zeros(10) + self.one_day = self._find_one_day(one_day) + self.energy_in_state = energy_in_state + self.yesterday_in_state = yesterday_in_state + self.reward_function = reward_function + self.fourier_basis_size = fourier_basis_size + + self.day = 0 + self.days_of_week = [0, 1, 2, 3, 4] + self.day_of_week_flag = day_of_week + self.day_of_week = self.days_of_week[self.day % 5] + + #Create Observation Space (aka State Space) + self.observation_space = [] + + if pricing_type=="TOU": + self.pricing_type = "time_of_use" + elif pricing_type == "RTP": + self.pricing_type = "real_time_pricing" + else: + print("Wrong pricing type") + raise ValueError + + self.prices = self._get_prices() + + ## back to filling in their skeleton + + for agent in self.agents: + # action seq isn't continuous, so skipping Multidiscrete action + self.action_space.append(self._create_action_space()) + + obs_dim = 10 + 10 * self.energy_in_state + 10 * self.yesterday_in_state + self.observation_space.append(self._create_observation_space()) + + + def _find_one_day(self, one_day: int): + """ + Purpose: Helper function to find one_day to train on (if applicable) + + Args: + One_day: (Int) in range [-1,365] + + Returns: + 0 if one_day = 0 + one_day if one_day in range [1,365] + random_number(1,365) if one_day = -1 + """ + + print("one_day") + print(one_day) + + # if(one_day != -1): + # return np.random.randint(0, high=365) + + # else: + return one_day + + + def _create_action_space(self): + """ + Purpose: Return action space of type specified by self.action_space_string + + Args: + None + + Returns: + Action Space for environment based on action_space_str + + Note: Multidiscrete refers to a 10-dim vector where each action {0,1,2} represents Low, Medium, High points respectively. + We pose this option to test whether simplifying the action-space helps the agent. + """ + + #Making a symmetric, continuous space to help learning for continuous control (suggested in StableBaselines doc.) + if self.action_space_string == "continuous": + return spaces.Box(low=-1, high=1, shape=(self.action_length,), dtype=np.float32) + + elif self.action_space_string == "multidiscrete": + discrete_space = [self.action_subspace] * self.action_length + return spaces.MultiDiscrete(discrete_space) + + elif self.action_space_string == "fourier": + return spaces.Box( + low=-2, high=2, shape=(2*self.fourier_basis_size - 1,), dtype=np.float32 + ) + + + def _create_agents(self): + """ + Purpose: Create the participants of the social game. We create a game with n players, where n = self.number_of_participants + + Args: + None + + Returns: + agent_dict: Dictionary of players, each with response function based on self.response_type_string + + """ + + player_dict = {} + + # Sample Energy from average energy in the office (pre-treatment) from the last experiment + # Reference: Lucas Spangher, et al. Engineering vs. ambient typevisualizations: Quantifying effects of different data visualizations on energy consumption. 2019 + + sample_energy = np.array([ 0.28, 11.9, 16.34, 16.8, 17.43, 16.15, 16.23, 15.88, 15.09, 35.6, + 123.5, 148.7, 158.49, 149.13, 159.32, 157.62, 158.8, 156.49, 147.04, 70.76, + 42.87, 23.13, 22.52, 16.8 ]) + + #only grab working hours (8am - 5pm) + working_hour_energy = sample_energy[8:18] + + my_baseline_energy = pd.DataFrame(data = {"net_energy_use" : working_hour_energy}) + + for i in range(self.number_of_participants): + player = DeterministicFunctionPerson(my_baseline_energy, points_multiplier = 10, response= self.response_type_string) #, ) + ## TODO: Deterministic Function needs to be modified to set a flag on each agent to silent + ## TODO: add a prev_energy to keep track of their yesterday's energy + + player_dict['player_{}'.format(i)] = player + + return player_dict + +def _create_observation_space(self): + """ + Purpose: Returns the observation space. + If the state space includes yesterday, then it is +10 dim for yesterday's price signal + If the state space includes energy_in_state, then it is +10 dim for yesterday's energy + + Args: + None + + Returns: + Action Space for environment based on action_space_str + """ + + #TODO: Normalize obs_space ! + if(self.yesterday_in_state): + if(self.energy_in_state): + return spaces.Box(low=-np.inf, high=np.inf, shape=(30,), dtype=np.float32) + else: + return spaces.Box(low=-np.inf, high=np.inf, shape=(20,), dtype=np.float32) + + else: + if self.energy_in_state: + return spaces.Box(low=-np.inf, high=np.inf, shape=(20,), dtype=np.float32) + else: + return spaces.Box(low=-np.inf, high=np.inf, shape=(10,), dtype=np.float32) + +def _get_prices(self): + """ + Purpose: Get grid price signals for the entire year (using past data from a building in Los Angeles as reference) + + Args: + None + + Returns: Array containing 365 price signals, where array[day_number] = grid_price for day_number from 8AM - 5PM + + """ + all_prices = [] + print("--" * 10) + print(self.one_day) + print("--" * 10) + + type_of_DR = self.pricing_type + + if self.one_day != -1: + # If one_day we repeat the price signals from a fixed day + # Tweak One_Day Price Signal HERE + price = price_signal(self.one_day, type_of_DR=type_of_DR) + price = np.array(price[8:18]) + if np.mean(price)==price[2]: + price[3:6]+=.3 + price = np.maximum(0.01 * np.ones_like(price), price) + + for i in range(365): + all_prices.append(price) + else: + day = 0 + for i in range(365): + price = price_signal(day + 1, type_of_DR=type_of_DR) + price = np.array(price[8:18]) + # put a floor on the prices so we don't have negative prices + if np.mean(price)==price[2]: + price[3:6]+=.3 + price = np.maximum(0.01 * np.ones_like(price), price) + all_prices.append(price) + day += 1 + + return np.array(all_prices) + +def _simulate_human(self, action, agent): + """ + Purpose: Gets energy consumption from players given action from agent + + Args: + Action: 10-dim vector corresponding to action for each hour 8AM - 5PM + + Returns: + Energy_consumption: Dictionary containing the energy usage by player and the average energy used in the office (key = "avg") + """ + + #Get players response to agent's actions + player = agent + + if (self.day_of_week_flag): + player_energy = player.get_response(action, day_of_week = self.day_of_week) + else: + player_energy = player.get_response(action, day_of_week = None) + + #Calculate energy consumption by player and in total (over the office) + + return player_energy + + +def step(self, action_n): + obs_n = [] + reward_n = [] + done_n = [] + info_n = {"n": []} + + for i, agent in enumerate(self.agents): + agent.action = action_n[i] + + prev_price = self.prices[(self.day)] + self.day = (self.day + 1) % 365 + self.curr_iter += 1 + + if self.curr_iter > 0: + done = True + self.curr_iter = 0 + else: + done = False + + for agent in self.agents: + obs_n.append(self._get_observation_per_agent(agent)) + reward_n.append(self._get_reward_per_agent(agent)) + done_n.append(self._get_done(agent)) + + info_n["n"].append(self._get_info(agent)) + + reward = np.sum(reward_n) + return obs_n, reward_n, done_n, info_n + +def _get_observation_per_agent(self, agent): + prev_price = self.prices[ (self.day - 1) % 365] + next_observation = self.prices[self.day] + + if(self.yesterday_in_state): + if self.energy_in_state: + return np.concatenate((next_observation, np.concatenate((prev_price, self.prev_energy)))) + else: + return np.concatenate((next_observation, prev_price)) + + elif self.energy_in_state: + return np.concatenate((next_observation, self.prev_energy)) + + else: + return next_observation + +def _get_reward_per_agent(self, agent, price, energy_consumptions, reward_function = "scaled_cost_distance"): + # TODO: finish massaging this + """ + Purpose: Compute reward given price signal and energy consumption of the office + + Args: + Price: Price signal vector (10-dim) + Energy_consumption: Dictionary containing energy usage by player in the office and the average office energy usage + + Returns: + Energy_consumption: Dictionary containing the energy usage by player and the average energy used in the office (key = "avg") + """ + + total_reward = 0 + for player_name in energy_consumptions: + if player_name != "avg": + # get the points output from players + player = self.player_dict[player_name] + + # get the reward from the player's output + player_min_demand = player.get_min_demand() + player_max_demand = player.get_max_demand() + player_energy = energy_consumptions[player_name] + player_reward = Reward(player_energy, price, player_min_demand, player_max_demand) + + if reward_function == "scaled_cost_distance": + player_ideal_demands = player_reward.ideal_use_calculation() + reward = player_reward.scaled_cost_distance(player_ideal_demands) + + elif reward_function == "log_cost_regularized": + reward = player_reward.log_cost_regularized() + + total_reward += reward + + return total_reward + + def reset(self): + # reset world + obs_n = [] + for agent in self.agents: + obs_n.append(self._get_observation_per_agent(agent)) + return obs_n + + def render(self, mode='human'): + pass + + def close(self): + pass \ No newline at end of file diff --git a/multiagent/environment.py b/multiagent/environment.py index d2e8d3278..18bc3c996 100644 --- a/multiagent/environment.py +++ b/multiagent/environment.py @@ -7,13 +7,18 @@ # environment for all agents in the multiagent world # currently code assumes that no agents will be created/destroyed at runtime! class MultiAgentEnv(gym.Env): - metadata = { - 'render.modes' : ['human', 'rgb_array'] - } - - def __init__(self, world, reset_callback=None, reward_callback=None, - observation_callback=None, info_callback=None, - done_callback=None, shared_viewer=True): + metadata = {"render.modes": ["human", "rgb_array"]} + + def __init__( + self, + world, + reset_callback=None, + reward_callback=None, + observation_callback=None, + info_callback=None, + done_callback=None, + shared_viewer=True, + ): self.world = world self.agents = self.world.policy_agents @@ -30,9 +35,13 @@ def __init__(self, world, reset_callback=None, reward_callback=None, # if true, action is a number 0...N, otherwise action is a one-hot N-dimensional vector self.discrete_action_input = False # if true, even the action is continuous, action will be performed discretely - self.force_discrete_action = world.discrete_action if hasattr(world, 'discrete_action') else False + self.force_discrete_action = ( + world.discrete_action if hasattr(world, "discrete_action") else False + ) # if true, every agent has the same reward - self.shared_reward = world.collaborative if hasattr(world, 'collaborative') else False + self.shared_reward = ( + world.collaborative if hasattr(world, "collaborative") else False + ) self.time = 0 # configure spaces @@ -44,21 +53,35 @@ def __init__(self, world, reset_callback=None, reward_callback=None, if self.discrete_action_space: u_action_space = spaces.Discrete(world.dim_p * 2 + 1) else: - u_action_space = spaces.Box(low=-agent.u_range, high=+agent.u_range, shape=(world.dim_p,), dtype=np.float32) + u_action_space = spaces.Box( + low=-agent.u_range, + high=+agent.u_range, + shape=(world.dim_p,), + dtype=np.float32, + ) if agent.movable: total_action_space.append(u_action_space) # communication action space if self.discrete_action_space: c_action_space = spaces.Discrete(world.dim_c) else: - c_action_space = spaces.Box(low=0.0, high=1.0, shape=(world.dim_c,), dtype=np.float32) + c_action_space = spaces.Box( + low=0.0, high=1.0, shape=(world.dim_c,), dtype=np.float32 + ) if not agent.silent: total_action_space.append(c_action_space) # total action space if len(total_action_space) > 1: # all action spaces are discrete, so simplify to MultiDiscrete action space - if all([isinstance(act_space, spaces.Discrete) for act_space in total_action_space]): - act_space = MultiDiscrete([[0, act_space.n - 1] for act_space in total_action_space]) + if all( + [ + isinstance(act_space, spaces.Discrete) + for act_space in total_action_space + ] + ): + act_space = MultiDiscrete( + [[0, act_space.n - 1] for act_space in total_action_space] + ) else: act_space = spaces.Tuple(total_action_space) self.action_space.append(act_space) @@ -66,7 +89,11 @@ def __init__(self, world, reset_callback=None, reward_callback=None, self.action_space.append(total_action_space[0]) # observation space obs_dim = len(observation_callback(agent, self.world)) - self.observation_space.append(spaces.Box(low=-np.inf, high=+np.inf, shape=(obs_dim,), dtype=np.float32)) + self.observation_space.append( + spaces.Box( + low=-np.inf, high=+np.inf, shape=(obs_dim,), dtype=np.float32 + ) + ) agent.action.c = np.zeros(self.world.dim_c) # rendering @@ -81,7 +108,7 @@ def step(self, action_n): obs_n = [] reward_n = [] done_n = [] - info_n = {'n': []} + info_n = {"n": []} self.agents = self.world.policy_agents # set action for each agent for i, agent in enumerate(self.agents): @@ -94,7 +121,7 @@ def step(self, action_n): reward_n.append(self._get_reward(agent)) done_n.append(self._get_done(agent)) - info_n['n'].append(self._get_info(agent)) + info_n["n"].append(self._get_info(agent)) # all agents get total reward in cooperative case reward = np.sum(reward_n) @@ -150,7 +177,7 @@ def _set_action(self, action, agent, action_space, time=None): size = action_space.high - action_space.low + 1 index = 0 for s in size: - act.append(action[index:(index+s)]) + act.append(action[index : (index + s)]) index += s action = act else: @@ -161,10 +188,14 @@ def _set_action(self, action, agent, action_space, time=None): if self.discrete_action_input: agent.action.u = np.zeros(self.world.dim_p) # process discrete action - if action[0] == 1: agent.action.u[0] = -1.0 - if action[0] == 2: agent.action.u[0] = +1.0 - if action[0] == 3: agent.action.u[1] = -1.0 - if action[0] == 4: agent.action.u[1] = +1.0 + if action[0] == 1: + agent.action.u[0] = -1.0 + if action[0] == 2: + agent.action.u[0] = +1.0 + if action[0] == 3: + agent.action.u[1] = -1.0 + if action[0] == 4: + agent.action.u[1] = +1.0 else: if self.force_discrete_action: d = np.argmax(action[0]) @@ -197,40 +228,43 @@ def _reset_render(self): self.render_geoms_xform = None # render environment - def render(self, mode='human'): - if mode == 'human': - alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' - message = '' + def render(self, mode="human"): + if mode == "human": + alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + message = "" for agent in self.world.agents: comm = [] for other in self.world.agents: - if other is agent: continue + if other is agent: + continue if np.all(other.state.c == 0): - word = '_' + word = "_" else: word = alphabet[np.argmax(other.state.c)] - message += (other.name + ' to ' + agent.name + ': ' + word + ' ') + message += other.name + " to " + agent.name + ": " + word + " " print(message) for i in range(len(self.viewers)): # create viewers (if necessary) if self.viewers[i] is None: # import rendering only if we need it (and don't import for headless machines) - #from gym.envs.classic_control import rendering + # from gym.envs.classic_control import rendering from multiagent import rendering - self.viewers[i] = rendering.Viewer(700,700) + + self.viewers[i] = rendering.Viewer(700, 700) # create rendering geometry if self.render_geoms is None: # import rendering only if we need it (and don't import for headless machines) - #from gym.envs.classic_control import rendering + # from gym.envs.classic_control import rendering from multiagent import rendering + self.render_geoms = [] self.render_geoms_xform = [] for entity in self.world.entities: geom = rendering.make_circle(entity.size) xform = rendering.Transform() - if 'agent' in entity.name: + if "agent" in entity.name: geom.set_color(*entity.color, alpha=0.5) else: geom.set_color(*entity.color) @@ -247,49 +281,52 @@ def render(self, mode='human'): results = [] for i in range(len(self.viewers)): from multiagent import rendering + # update bounds to center around agent cam_range = 1 if self.shared_viewer: pos = np.zeros(self.world.dim_p) else: pos = self.agents[i].state.p_pos - self.viewers[i].set_bounds(pos[0]-cam_range,pos[0]+cam_range,pos[1]-cam_range,pos[1]+cam_range) + self.viewers[i].set_bounds( + pos[0] - cam_range, + pos[0] + cam_range, + pos[1] - cam_range, + pos[1] + cam_range, + ) # update geometry positions for e, entity in enumerate(self.world.entities): self.render_geoms_xform[e].set_translation(*entity.state.p_pos) # render to display or array - results.append(self.viewers[i].render(return_rgb_array = mode=='rgb_array')) + results.append(self.viewers[i].render(return_rgb_array=mode == "rgb_array")) return results # create receptor field locations in local coordinate frame def _make_receptor_locations(self, agent): - receptor_type = 'polar' + receptor_type = "polar" range_min = 0.05 * 2.0 range_max = 1.00 dx = [] # circular receptive field - if receptor_type == 'polar': + if receptor_type == "polar": for angle in np.linspace(-np.pi, +np.pi, 8, endpoint=False): for distance in np.linspace(range_min, range_max, 3): dx.append(distance * np.array([np.cos(angle), np.sin(angle)])) # add origin dx.append(np.array([0.0, 0.0])) # grid receptive field - if receptor_type == 'grid': + if receptor_type == "grid": for x in np.linspace(-range_max, +range_max, 5): for y in np.linspace(-range_max, +range_max, 5): - dx.append(np.array([x,y])) + dx.append(np.array([x, y])) return dx # vectorized wrapper for a batch of multi-agent environments # assumes all environments have the same observation and action space class BatchMultiAgentEnv(gym.Env): - metadata = { - 'runtime.vectorized': True, - 'render.modes' : ['human', 'rgb_array'] - } + metadata = {"runtime.vectorized": True, "render.modes": ["human", "rgb_array"]} def __init__(self, env_batch): self.env_batch = env_batch @@ -310,10 +347,10 @@ def step(self, action_n, time): obs_n = [] reward_n = [] done_n = [] - info_n = {'n': []} + info_n = {"n": []} i = 0 for env in self.env_batch: - obs, reward, done, _ = env.step(action_n[i:(i+env.n)], time) + obs, reward, done, _ = env.step(action_n[i : (i + env.n)], time) i += env.n obs_n += obs # reward = [r / len(self.env_batch) for r in reward] @@ -328,7 +365,7 @@ def reset(self): return obs_n # render environment - def render(self, mode='human', close=True): + def render(self, mode="human", close=True): results_n = [] for env in self.env_batch: results_n += env.render(mode, close) diff --git a/multiagent/scenarios/simple_push.py b/multiagent/scenarios/simple_push.py index 907fef5b5..733535860 100644 --- a/multiagent/scenarios/simple_push.py +++ b/multiagent/scenarios/simple_push.py @@ -2,18 +2,19 @@ from multiagent.core import World, Agent, Landmark from multiagent.scenario import BaseScenario + class Scenario(BaseScenario): def make_world(self): world = World() # set any world properties first world.dim_c = 2 - num_agents = 2 + num_agents = 2 # same as dim_c num_adversaries = 1 num_landmarks = 2 # add agents world.agents = [Agent() for i in range(num_agents)] for i, agent in enumerate(world.agents): - agent.name = 'agent %d' % i + agent.name = "agent %d" % i agent.collide = True agent.silent = True if i < num_adversaries: @@ -23,7 +24,7 @@ def make_world(self): # add landmarks world.landmarks = [Landmark() for i in range(num_landmarks)] for i, landmark in enumerate(world.landmarks): - landmark.name = 'landmark %d' % i + landmark.name = "landmark %d" % i landmark.collide = False landmark.movable = False # make initial conditions @@ -57,7 +58,11 @@ def reset_world(self, world): def reward(self, agent, world): # Agents are rewarded based on minimum agent distance to each landmark - return self.adversary_reward(agent, world) if agent.adversary else self.agent_reward(agent, world) + return ( + self.adversary_reward(agent, world) + if agent.adversary + else self.agent_reward(agent, world) + ) def agent_reward(self, agent, world): # the distance to the goal @@ -65,14 +70,20 @@ def agent_reward(self, agent, world): def adversary_reward(self, agent, world): # keep the nearest good agents away from the goal - agent_dist = [np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in world.agents if not a.adversary] + agent_dist = [ + np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) + for a in world.agents + if not a.adversary + ] pos_rew = min(agent_dist) - #nearest_agent = world.good_agents[np.argmin(agent_dist)] - #neg_rew = np.sqrt(np.sum(np.square(nearest_agent.state.p_pos - agent.state.p_pos))) - neg_rew = np.sqrt(np.sum(np.square(agent.goal_a.state.p_pos - agent.state.p_pos))) - #neg_rew = sum([np.sqrt(np.sum(np.square(a.state.p_pos - agent.state.p_pos))) for a in world.good_agents]) + # nearest_agent = world.good_agents[np.argmin(agent_dist)] + # neg_rew = np.sqrt(np.sum(np.square(nearest_agent.state.p_pos - agent.state.p_pos))) + neg_rew = np.sqrt( + np.sum(np.square(agent.goal_a.state.p_pos - agent.state.p_pos)) + ) + # neg_rew = sum([np.sqrt(np.sum(np.square(a.state.p_pos - agent.state.p_pos))) for a in world.good_agents]) return pos_rew - neg_rew - + def observation(self, agent, world): # get positions of all entities in this agent's reference frame entity_pos = [] @@ -86,11 +97,19 @@ def observation(self, agent, world): comm = [] other_pos = [] for other in world.agents: - if other is agent: continue + if other is agent: + continue comm.append(other.state.c) other_pos.append(other.state.p_pos - agent.state.p_pos) if not agent.adversary: - return np.concatenate([agent.state.p_vel] + [agent.goal_a.state.p_pos - agent.state.p_pos] + [agent.color] + entity_pos + entity_color + other_pos) + return np.concatenate( + [agent.state.p_vel] + + [agent.goal_a.state.p_pos - agent.state.p_pos] + + [agent.color] + + entity_pos + + entity_color + + other_pos + ) else: - #other_pos = list(reversed(other_pos)) if random.uniform(0,1) > 0.5 else other_pos # randomize position of other agents in adversary network + # other_pos = list(reversed(other_pos)) if random.uniform(0,1) > 0.5 else other_pos # randomize position of other agents in adversary network return np.concatenate([agent.state.p_vel] + entity_pos + other_pos) From 2d36a90cb41cfc23da2d86fcd433e8df59fdca66 Mon Sep 17 00:00:00 2001 From: Lucas Date: Wed, 9 Dec 2020 01:10:08 -0800 Subject: [PATCH 2/6] modified obs and rew funcs agent-wise, cleaned up --- multiagent/SG_environment.py | 73 ++++---- multiagent/agents.py | 328 +++++++++++++++++++++++++++++++++++ multiagent/reward.py | 202 +++++++++++++++++++++ multiagent/utils.py | 115 ++++++++++++ 4 files changed, 674 insertions(+), 44 deletions(-) create mode 100644 multiagent/agents.py create mode 100644 multiagent/reward.py create mode 100644 multiagent/utils.py diff --git a/multiagent/SG_environment.py b/multiagent/SG_environment.py index f7d09d581..a8d010b2f 100644 --- a/multiagent/SG_environment.py +++ b/multiagent/SG_environment.py @@ -171,9 +171,6 @@ def _create_agents(self): for i in range(self.number_of_participants): player = DeterministicFunctionPerson(my_baseline_energy, points_multiplier = 10, response= self.response_type_string) #, ) - ## TODO: Deterministic Function needs to be modified to set a flag on each agent to silent - ## TODO: add a prev_energy to keep track of their yesterday's energy - player_dict['player_{}'.format(i)] = player return player_dict @@ -275,28 +272,24 @@ def step(self, action_n): reward_n = [] done_n = [] info_n = {"n": []} - - for i, agent in enumerate(self.agents): - agent.action = action_n[i] prev_price = self.prices[(self.day)] - self.day = (self.day + 1) % 365 - self.curr_iter += 1 - - if self.curr_iter > 0: - done = True - self.curr_iter = 0 - else: - done = False - - for agent in self.agents: - obs_n.append(self._get_observation_per_agent(agent)) - reward_n.append(self._get_reward_per_agent(agent)) - done_n.append(self._get_done(agent)) + self.day = (self.day + 1) % 365 + self.curr_iter += 1 + + if self.curr_iter > 0: + done = True + self.curr_iter = 0 + else: + done = False - info_n["n"].append(self._get_info(agent)) + for i, agent in enumerate(self.agents): + agent.prev_energy = self._simulate_human(action_n[i], agent) + obs_n.append(self._get_observation_per_agent(agent)) + reward_n.append(self._get_reward_per_agent(agent)) + done_n.append(done) + info_n["n"].append({}) ## this was left as a formality in the stableBaselines, too - reward = np.sum(reward_n) return obs_n, reward_n, done_n, info_n def _get_observation_per_agent(self, agent): @@ -305,49 +298,41 @@ def _get_observation_per_agent(self, agent): if(self.yesterday_in_state): if self.energy_in_state: - return np.concatenate((next_observation, np.concatenate((prev_price, self.prev_energy)))) + return np.concatenate((next_observation, np.concatenate((prev_price, agent.prev_energy)))) else: return np.concatenate((next_observation, prev_price)) elif self.energy_in_state: - return np.concatenate((next_observation, self.prev_energy)) + return np.concatenate((next_observation, agent.prev_energy)) else: return next_observation -def _get_reward_per_agent(self, agent, price, energy_consumptions, reward_function = "scaled_cost_distance"): - # TODO: finish massaging this +def _get_reward_per_agent(self, agent, reward_function = "scaled_cost_distance"): """ Purpose: Compute reward given price signal and energy consumption of the office Args: - Price: Price signal vector (10-dim) + agent: DeterministicFunctionPerson object, simulated office twerker Energy_consumption: Dictionary containing energy usage by player in the office and the average office energy usage Returns: Energy_consumption: Dictionary containing the energy usage by player and the average energy used in the office (key = "avg") """ - total_reward = 0 - for player_name in energy_consumptions: - if player_name != "avg": - # get the points output from players - player = self.player_dict[player_name] - - # get the reward from the player's output - player_min_demand = player.get_min_demand() - player_max_demand = player.get_max_demand() - player_energy = energy_consumptions[player_name] - player_reward = Reward(player_energy, price, player_min_demand, player_max_demand) - - if reward_function == "scaled_cost_distance": - player_ideal_demands = player_reward.ideal_use_calculation() - reward = player_reward.scaled_cost_distance(player_ideal_demands) + price = self.prices[self.day] + # get the reward from the player's output + player_min_demand = agent.get_min_demand() + player_max_demand = agent.get_max_demand() + player_energy = agent.prev_energy + player_reward = Reward(player_energy, price, player_min_demand, player_max_demand) - elif reward_function == "log_cost_regularized": - reward = player_reward.log_cost_regularized() + if reward_function == "scaled_cost_distance": + player_ideal_demands = player_reward.ideal_use_calculation() + reward = player_reward.scaled_cost_distance(player_ideal_demands) - total_reward += reward + elif reward_function == "log_cost_regularized": + reward = player_reward.log_cost_regularized() return total_reward diff --git a/multiagent/agents.py b/multiagent/agents.py new file mode 100644 index 000000000..84be92bd9 --- /dev/null +++ b/multiagent/agents.py @@ -0,0 +1,328 @@ +import pandas as pd +import numpy as np +import cvxpy as cvx +from sklearn.preprocessing import MinMaxScaler + +#### file to make the simulation of people that we can work with + + +class Person(): + """ Person (parent?) class -- will define how the person takes in a points signal and puts out an energy signal + baseline_energy = a list or dataframe of values. This is data from SinBerBEST + points_multiplier = an int which describes how sensitive each person is to points + + """ + + def __init__(self, baseline_energy_df, points_multiplier = 1): + self.baseline_energy_df = baseline_energy_df + self.baseline_energy = np.array(self.baseline_energy_df["net_energy_use"]) + self.points_multiplier = points_multiplier + + baseline_min = self.baseline_energy.min() + baseline_max = self.baseline_energy.max() + baseline_range = baseline_max - baseline_min + + self.min_demand = np.maximum(0, baseline_min + baseline_range * .05) + self.max_demand = np.maximum(0, baseline_min + baseline_range * .95) + self.silent = True + self.prev_energy = self.baseline_energy # basically a placeholder. + + + def energy_output_simple_linear(self, points): + """Determines the energy output of the person, based on the formula: + + y[n] = -sum_{rolling window of 5} points + baseline_energy + noise + + inputs: points - list or dataframe of points values. Assumes that the + list will be in the same time increment that energy_output will be. + + For now, that's in 1 hour increments + + """ + points_df = pd.DataFrame(points) + + points_effect = ( + points_df + .rolling( + window = 5, + min_periods = 1) + .mean() + ) + + + + time = points_effect.shape[0] + energy_output= [] + + for t in range(time): + temp_energy = self.baseline_energy[t] - points_effect.iloc[t]*self.points_multiplier + \ + np.random.normal(1) + energy_output.append(temp_energy) + + return pd.DataFrame(energy_output) + + def pure_linear_signal(self, points, baseline_day=0): + """ + A linear person. The more points you give them, the less energy they will use + (within some bounds) for each hour. No rolling effects or anything. The simplest + signal. + """ + + # hack here to always grab the first day from the baseline_energy + output = np.array(self.baseline_energy)[baseline_day*24:baseline_day*24+10] + + points_effect = np.array(points * self.points_multiplier) + output = output - points_effect + + # impose bounds/constraints + output = np.maximum(output, self.min_demand) + output = np.minimum(output, self.max_demand) + return output + + + + def get_min_demand(self): + return self.min_demand + # return np.quantile(self.baseline_energy, .05) + + def get_max_demand(self): + return self.max_demand + # return np.quantile(self.baseline_energy, .95) + +class FixedDemandPerson(Person): + + def __init__(self, baseline_energy_df, points_multiplier = 1): + super().__init__(baseline_energy_df, points_multiplier) + + + def demand_from_points(self, points, baseline_day=0): + # hack here to always grab the first day from the baseline_energy + output = np.array(self.baseline_energy)[baseline_day*24:baseline_day*24+10] + total_demand = np.sum(output) + + + points_effect = np.array(points * self.points_multiplier) + output = output - points_effect + + # scale to keep total_demand (almost) constant + # almost bc imposing bounds afterwards + output = output * (total_demand/np.sum(output)) + + # impose bounds/constraints + output = np.maximum(output, self.min_demand) + output = np.minimum(output, self.max_demand) + + return output + + def adverserial_linear(self, points, baseline_day=0): + # hack here to always grab the first day from the baseline_energy + output = np.array(self.baseline_energy)[baseline_day*24:baseline_day*24+10] + total_demand = np.sum(output) + + + points_effect = np.array(points * self.points_multiplier) + output = output + points_effect + + # scale to keep total_demand (almost) constant + # almost bc imposing bounds afterwards + output = output * (total_demand/np.sum(output)) + + # impose bounds/constraints + output = np.maximum(output, self.min_demand) + output = np.minimum(output, self.max_demand) + + return output + +class DeterministicFunctionPerson(Person): + + def __init__(self, baseline_energy_df, points_multiplier = 1, response = 't'): + super().__init__(baseline_energy_df, points_multiplier) + self.response = response + + def threshold_response_func(self, points): + points = np.array(points) * self.points_multiplier + threshold = np.mean(points) + return [p if p>threshold else 0 for p in points] + + def exponential_response_func(self, points): + points = np.array(points) * self.points_multiplier + points_effect = [p**2 for p in points] + + return points_effect + + def sin_response_func(self,points): + points = np.array(points) + # n = np.max(points) + # points = [np.sin((float(i)/float(n))*np.pi) for i in points] + points = [np.sin(float(i)*np.pi)*self.points_multiplier for i in points] + points = points + return points + + def linear_response_func(self,points): + return points * self.points_multiplier + + def routine_output_transform(self, points_effect, baseline_day=0, day_of_week = None): + output = np.array(self.baseline_energy)[baseline_day*24:baseline_day*24+10] + total_demand = np.sum(output) + + # scale to keep total_demand (almost) constant + # almost bc imposing bounds afterwards + output = output - points_effect + # output = output * (total_demand/np.sum(output)) + + # impose bounds/constraints + # output = np.maximum(output, self.min_demand) + # output = np.minimum(output, self.max_demand) + # return output + + + if(day_of_week != None): + energy_resp = energy_resp * self.day_of_week_multiplier[day_of_week] + + scaler = MinMaxScaler(feature_range = (self.min_demand, self.max_demand)) + scaled_output = scaler.fit_transform(output.reshape(-1, 1)) + + return np.squeeze(scaled_output) + + + def threshold_response(self, points, day_of_week = None): + points_effect = self.threshold_response_func(points) + output = self.routine_output_transform(points_effect, day_of_week = day_of_week) + return output + + def sin_response(self, points, day_of_week = None): + points_effect = self.sin_response_func(points) + output = self.routine_output_transform(points_effect, day_of_week = day_of_week) + return output + + def exp_response(self, points, day_of_week = None): + points_effect = self.exponential_response_func(points) + output = self.routine_output_transform(points_effect, day_of_week = day_of_week) + return output + + def threshold_exp_response(self, points, day_of_week = None): + points_effect = self.exponential_response_func(points) + points_effect = self.threshold_response_func(points_effect) + output = self.routine_output_transform(points_effect, day_of_week = day_of_week) + return output + + def linear_response(self, points, day_of_week = None): + points_effect = points*self.points_multiplier + output = self.routine_output_transform(points_effect, day_of_week = day_of_week) + return output + + def get_response(self, points, day_of_week=None): + if(self.response == 't'): + energy_resp = self.threshold_exp_response(points, day_of_week = day_of_week) + elif(self.response == 's'): + energy_resp = self.sin_response(points, day_of_week = day_of_week) + elif(self.response == 'l'): + energy_resp = self.linear_response(points, day_of_week = day_of_week) + else: + raise NotImplementedError + + return energy_resp + +class RandomizedFunctionPerson(DeterministicFunctionPerson): + def __init__(self, baseline_energy_df, points_multiplier=1, response='t', low = 0, high = 50, distr = 'U'): + + """ + Adds Random Noise to DeterministicFunctionPerson energy output (for D.R. purposes) + + New Args: + Low = Lower bound for random noise added to energy use + High = Upper bound " " " " " " + Distr = 'G' for Gaussian noise, 'U' for Uniform random noise (Note: Continuous distr.) + + Note: For design purposes the random noise is updated at the end of each episode + """ + #TODO: Multivariate distr?? + + super().__init__(baseline_energy_df, points_multiplier=points_multiplier, response=response) + + distr = distr.upper() + assert distr in ['G', 'U'] + + self.response = response + self.low = low + self.high = high if high < self.max_demand else 50 + self.distr = distr + + self.noise = [] + self.update_noise() + + def update_noise(self): + if(self.distr == 'G'): + #TODO: Update how to sample from Gausian + self.noise = np.random.normal(loc = (self.low + self.high) / 2, scale = 10, size = 10) + + elif(self.distr == 'U'): + self.noise = np.random.uniform(low = self.low, high=self.high, size = 10) + + + def exponential_response_func(self, points): + points = np.array(points) * self.points_multiplier + points_effect = [p**2 for p in points] + + return points_effect + self.noise + + def sin_response_func(self,points): + points = np.array(points) + # n = np.max(points) + # points = [np.sin((float(i)/float(n))*np.pi) for i in points] + points = [np.sin(float(i)*np.pi)*self.points_multiplier for i in points] + points = points + return points + self.noise + + def linear_response_func(self,points): + return points * self.points_multiplier + self.noise + + +# utkarsha's person + +class CurtailandShiftPerson(Person): + def __init__(self, baseline_energy_df, points_multiplier = 1): + super().__init__(baseline_energy_df, points_multiplier) + self.shiftableLoadFraction = 0.1 + self.shiftByHours = 3 + self.curtailableLoadFraction = 0.1 + self.maxCurtailHours = 3 #Person willing to curtail for no more than these hours + + def shiftedLoad(self, points, baseline_day=0, day_of_week=None): + output = np.array(self.baseline_energy)[baseline_day*24:baseline_day*24+10] + points = np.array(points) * self.points_multiplier + shiftableLoad = self.shiftableLoadFraction*output + shiftByHours = self.shiftByHours + + # 10 hour day. Rearrange the sum of shiftableLoad into these hours by treating points as the 'price' at that hour + # Load can be shifted by a max of shiftByHours (default = 3 hours) + # For each hour, calculate the optimal hour to shift load to within +- 3 hours + shiftedLoad = np.zeros(10) + for hour in range(10): + candidatePrices = points[max(hour-shiftByHours,0): min(hour+shiftByHours,9)+1] + shiftToHour = max(hour-shiftByHours,0) + np.argmin(candidatePrices) + shiftedLoad[shiftToHour] += shiftableLoad[hour] + return shiftedLoad + + def curtailedLoad(self, points, baseline_day=0, day_of_week=None): + output = np.array(self.baseline_energy)[baseline_day*24:baseline_day*24+10] + points = np.array(points) * self.points_multiplier + curtailableLoad = self.curtailableLoadFraction*output + maxPriceHours = np.argsort(points)[0:self.maxCurtailHours] + for hour in maxPriceHours: + curtailableLoad[hour] = 0 + return curtailableLoad + + def get_response(self, points, day_of_week=None): + baseline_day = 0 + output = np.array(self.baseline_energy)[baseline_day*24:baseline_day*24+10] + energy_resp = output*(1 - self.curtailableLoadFraction - self.shiftableLoadFraction) + self.curtailedLoad(points) + self.shiftedLoad(points) + + + self.min_demand = np.maximum(0, min(energy_resp)) + self.max_demand = np.maximum(0, max(energy_resp)) + + return energy_resp + + + diff --git a/multiagent/reward.py b/multiagent/reward.py new file mode 100644 index 000000000..d0dd724d6 --- /dev/null +++ b/multiagent/reward.py @@ -0,0 +1,202 @@ +import cvxpy as cvx +# import osqp +import numpy as np +from sklearn.preprocessing import MinMaxScaler + +#### file to calculate the rewards. Meant to be modular: +#### class Rewards should have several different functions by Dec 2019 + + +class Reward(): + def __init__(self, energy_use, prices, min_demand, max_demand): + """ + Args: + energy_use: list returned by Person class signifying energy use + prices: list returned by grid signifying cost throughout day + min_demand: value computed by Person class signifying minimum energy use long term + max_demand: value computed by Person class signifying maximum energy use long term + """ + + self.energy_use = np.array(energy_use) + self.prices = np.array(prices) + self._num_timesteps = energy_use.shape[0] + self.min_demand = np.min(energy_use) # min_demand + self.max_demand = np.max(energy_use) # max_demand + self.baseline_max_demand = 159.32 + + assert round(self.max_demand) == round(max_demand), "The max demand that the player is using and the optimization is using is not the same" + assert round(self.min_demand) == round(min_demand), "The min demand that the player is using and the optimization is using is not the same" + + self.total_demand = np.sum(energy_use) + + def ideal_use_calculation(self): + """ + Computes an optimization of demand according to price + + returns: np.array of ideal energy demands given a price signal + """ + + demands = cvx.Variable(self._num_timesteps) + min_demand = cvx.Parameter() + max_demand = cvx.Parameter() + total_demand = cvx.Parameter() + prices = cvx.Parameter(self._num_timesteps) + + min_demand = self.min_demand + max_demand = self.max_demand + total_demand = self.total_demand + + while (max_demand * 10 < total_demand): + print("multiplying demand to make optimization work") + print("max_demand: " + str(max_demand)) + print("total_demand: " + str(total_demand)) + print("energy_use") + print(self.energy_use) + max_demand *= 1.1 + + prices = self.prices + constraints = [cvx.sum(demands, axis=0, keepdims=True) == total_demand] + # constraints = [np.ones(self._num_timesteps).T * demands == total_demand] + for i in range(self._num_timesteps): + constraints += [demands[i] <= max_demand] + constraints += [min_demand <= demands[i]] + # if i != 0: + # constraints += [cvx.abs(demands[i] - demands[i-1]) <= 100] + + + objective = cvx.Minimize(demands.T * prices) + problem = cvx.Problem(objective, constraints) + + problem.solve(solver = cvx.ECOS, verbose=False) + return np.array(demands.value) + + + def log_cost(self): + """ + Scales energy_use to be between min and max energy demands (this is repeated + in agent.routine_output_trasform), and then returns the simple total cost. + + """ + + scaler = MinMaxScaler(feature_range = (self.min_demand, self.max_demand)) + scaled_energy = np.squeeze(scaler.fit_transform(self.energy_use.reshape(-1, 1))) + + return -np.log(np.dot(scaled_energy, self.prices)) + + def log_cost_regularized(self): + """ + Scales energy_use to be between min and max energy demands (this is repeated + in agent.routine_output_trasform), and then returns the simple total cost. + + :param: h - the hyperparameter that modifies the penalty on energy demand that's driven too low. + + """ + + scaler = MinMaxScaler(feature_range = (self.min_demand, self.max_demand)) + ## ? + scaled_energy = np.squeeze(scaler.fit_transform(self.energy_use.reshape(-1, 1))) + + + + return (-np.log(np.dot(scaled_energy, self.prices)) - + 10 * (np.sum(self.energy_use) < (10 * (.5 * self.baseline_max_demand)))) # - + #10 * ()) + # sigmoid between 10 and 20 so there's a smooth transition + # - lambd * (difference b/w energy(t)) - put a bound on + # play with the lipschitz constant + # [10, 20, 10, 10, 10, ] + + + + + + def neg_distance_from_ideal(self, demands): + """ + args: + demands: np.array() of demands from ideal_use_calculation() + + returns: + a numerical distance metric, negated + """ + + return -((demands - self.energy_use)**2).sum() + + def cost_distance(self, ideal_demands): + """ + args: + demands: np.array() of demands from ideal_use_calculation() + + returns: + a cost-based distance metric, negated + """ + current_cost = np.dot(self.prices, self.energy_use) + ideal_cost = np.dot(self.prices, ideal_demands) + + cost_difference = ideal_cost - current_cost + + return cost_difference + + def log_cost_distance(self, ideal_demands): + """ + args: + demands: np.array() of demands from ideal_use_calculation() + + returns: + the log of the cost distance + """ + current_cost = np.dot(self.prices, self.energy_use) + ideal_cost = np.dot(self.prices, ideal_demands) + + cost_difference = ideal_cost - current_cost + + #TODO ENSURE THAT COST DIFFERENCE IS < 0 + if cost_difference < 0: + return -np.log(-cost_difference) + else: + print("WEIRD REWARD ALERT. IDEAL COST >= CURRENT COST. returning reward of 10") + return 10 + + def scaled_cost_distance(self, ideal_demands): + """ + args: + demands: np.array() of demands from ideal_use_calculation() + + returns: + a cost-based distance metric normalized by total ideal cost + """ + + current_cost = np.dot(self.prices, self.energy_use) + ideal_cost = np.dot(self.prices, ideal_demands) + + cost_difference = ideal_cost - current_cost + + # print("--" * 10) + # print("ideal cost") + # print(ideal_cost) + # print("--" * 10) + # print("prices") + # print(self.prices) + # print("--" * 10) + # print("current_cost") + # print(current_cost) + + if cost_difference > 0 or ideal_cost < 0: + print("--" * 10) + print("Problem with reward") + # print("min_demand: " + str(self.min_demand)) + # print("max_demand: " + str(self.max_demand)) + # print("--" * 10) + print("prices") + print(self.prices) + # print("current_cost") + # print(current_cost) + # print("--" * 10) + # print("ideal_cost") + # print(ideal_cost) + # print("ideal_demands") + # print(ideal_demands) + # print("energy demand") + # print(self.energy_use) + print("taking the neg abs value so that it stays the same sign.") + + return -np.abs(cost_difference/ideal_cost) diff --git a/multiagent/utils.py b/multiagent/utils.py new file mode 100644 index 000000000..19637d732 --- /dev/null +++ b/multiagent/utils.py @@ -0,0 +1,115 @@ +import csv +import numpy as np +import pandas as pd +from scipy.optimize import minimize +import os +import matplotlib.pyplot as plt +from sklearn.preprocessing import MinMaxScaler + +#data_path = os.path.join(os.getcwd(), "baselines", "behavioral_sim", "building_data.csv") +# csv_path = os.path.dirname(os.path.realpath(__file__)) + "/building_data.csv" + + +def price_signal(day = 45, type_of_DR = "real_time_pricing"): + + """ + Utkarsha's work on price signal from a building with demand and solar + Input: Day = an int signifying a 24 hour period. 365 total, all of 2012, start at 1. + Output: netdemand_price, a measure of how expensive energy is at each time in the day + optionally, we can return the optimized demand, which is the building + calculating where the net demand should be allocated + """ + csv_path = "building_data.csv" + csv_path_2 = "../gym-socialgame/gym_socialgame/envs/building_data.csv" + csv_path_3 = "/global/home/users/lucas_spangher/transactive_control/gym-socialgame/gym_socialgame/envs/building_data.csv" + try: + df = pd.read_csv(csv_path) + except: + try: + df = pd.read_csv(csv_path_2) + except: + df = pd.read_csv(csv_path_3) + + pv = 0.001*np.array(df['PV (W)'].tolist()) + price = np.array(df['Price( $ per kWh)'].tolist()) + demand = np.nan_to_num(np.array(df['Office_Elizabeth (kWh)'].tolist())) + demand_charge = 10/30 # 10$/kW per month + + pvsize = 0 #Assumption + netdemand = demand - pvsize*pv + + # Data starts at 5 am on Jan 1 + netdemand_24 = netdemand[24*day-5:24*day+19] + price_24 = price[24*day-5:24*day+19] + pv_24 = pv[24*day-5:24*day+19] + demand_24 = demand[24*day-5:24*day+19] + + # Calculate optimal load scheduling. 90% of load is fixed, 10% is controllable. + def optimise_24h(netdemand_24, price_24): + currentcost = netdemand_24@price_24 + + fixed_load = 0.9*netdemand_24 + controllable_load = sum(0.1*netdemand_24) + + def objective(x): + load = fixed_load + x + cost = np.multiply(price_24,load) + # Negative demand means zero cost, not negative cost + # Adding L1 regularisation to penalise shifting of occupant demand + lambd = 0.005 + # Demand charge: attached to peak demand + return sum(np.maximum(cost,0)) + demand_charge*max(load) + lambd*sum(abs(x-0.1*netdemand_24)) + + def constraint_sumofx(x): + return sum(x) - controllable_load + + def constraint_x_positive(x): + return x + + x0 = np.zeros(24) + cons = [ + {'type':'eq', 'fun': constraint_sumofx}, + {'type':'ineq', 'fun':constraint_x_positive} + ] + sol = minimize(objective, x0, constraints=cons) + return sol + + if type_of_DR == "real_time_pricing": + sol = optimise_24h(netdemand_24,price_24) + x = sol['x'] + diff = x - 0.1*netdemand_24 + return -diff - min(-diff) + + elif type_of_DR == "time_of_use": + if (np.mean(price_24[8:18]) == price_24[9]): + price_24[13:16]+=3 + return price_24 + else: + return "error!!!" + + +def fourier_points_from_action(action, points_length, fourier_basis_size): + assert fourier_basis_size == (action.size + 1) // 2, "Incorrect fourier basis size for actions" + root_points = (action[0]/2)*np.ones(points_length) + inp = np.linspace(0, 1, points_length) + for k in range(1, fourier_basis_size): + ak, bk = action[2*k - 1], action[2*k] + root_points += ak * np.sin(2*np.pi*k*inp) + bk * np.cos(2*np.pi*k*inp) + + points = root_points**2 + #TODO: More elegant solution than clipping + points = np.clip(points, 0, 10) + return points + + + + + + + + + + + + + From 3b7d558f3392e38e3ff50534a7c26b16720e41ee Mon Sep 17 00:00:00 2001 From: Lucas Date: Wed, 9 Dec 2020 23:39:20 -0800 Subject: [PATCH 3/6] modifying imports --- multiagent/SG_environment.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/multiagent/SG_environment.py b/multiagent/SG_environment.py index a8d010b2f..3a9ab8b37 100644 --- a/multiagent/SG_environment.py +++ b/multiagent/SG_environment.py @@ -9,9 +9,10 @@ import tensorflow as tf -from gym_socialgame.envs.utils import price_signal, fourier_points_from_action -from gym_socialgame.envs.agents import * -from gym_socialgame.envs.reward import Reward +# manan these are in the same directory -- you may need to add that to the path, but prob not +from utils import price_signal, fourier_points_from_action +from agents import * +from reward import Reward import pickle From aa55ae628833c21654a591d1ca9019e392fe1158 Mon Sep 17 00:00:00 2001 From: Manan Khattar Date: Fri, 11 Dec 2020 17:22:52 -0800 Subject: [PATCH 4/6] 12/11 changes --- multiagent/SG_environment.py | 414 +++++++++++++++------------ multiagent/agents.py | 528 ++++++++++++++++++----------------- multiagent/reward.py | 271 +++++++++--------- multiagent/utils.py | 93 +++--- 4 files changed, 691 insertions(+), 615 deletions(-) diff --git a/multiagent/SG_environment.py b/multiagent/SG_environment.py index 3a9ab8b37..de5e2e7dd 100644 --- a/multiagent/SG_environment.py +++ b/multiagent/SG_environment.py @@ -2,99 +2,102 @@ from gym import spaces from gym.envs.registration import EnvSpec import numpy as np +import pandas as pd from multiagent.multi_discrete import MultiDiscrete -import numpy as np + import random import tensorflow as tf -# manan these are in the same directory -- you may need to add that to the path, but prob not -from utils import price_signal, fourier_points_from_action -from agents import * -from reward import Reward +# manan these are in the same directory -- you may need to add that to the path, but prob not +from multiagent.utils import price_signal, fourier_points_from_action +from multiagent.agents import DeterministicFunctionPerson +from multiagent.reward import Reward import pickle # environment for all agents in the multiagent world # currently code assumes that no agents will be created/destroyed at runtime! class MultiAgentEnv(gym.Env): - metadata = { - 'render.modes' : ['human', 'rgb_array'] # ?? necc? - } + metadata = {"render.modes": ["human", "rgb_array"]} # ?? necc? def __init__( - self, - action_space_string = "continuous", - response_type_string = "l", - number_of_participants = 10, - one_day = 0, - energy_in_state = False, - yesterday_in_state = False, - day_of_week = False, + self, + action_space_string="continuous", + response_type_string="l", + number_of_participants=10, + one_day=0, + energy_in_state=False, + yesterday_in_state=False, + day_of_week=False, pricing_type="TOU", - reward_function = "scaled_cost_distance", - fourier_basis_size = 10, - ): - - # self.world = world ## does world appear elsewhere - - player_dict = self._create_agents() - self.agents = player_dict # or transform however needed # world.policy_agents - self.n = len(self.agents) - - self.discrete_action_space = False - self.discrete_action_input = False - self.force_discrete_action = False - - self.shared_reward = False ## right? - self.time = 0 - - self.points_length = 10 - self.action_length = 10 - self.action_subspace = 3 - - self.action_space = [] - - - # (our inputs) - self.prev_energy = np.zeros(10) - self.one_day = self._find_one_day(one_day) - self.energy_in_state = energy_in_state - self.yesterday_in_state = yesterday_in_state - self.reward_function = reward_function - self.fourier_basis_size = fourier_basis_size - - self.day = 0 - self.days_of_week = [0, 1, 2, 3, 4] - self.day_of_week_flag = day_of_week - self.day_of_week = self.days_of_week[self.day % 5] - - #Create Observation Space (aka State Space) - self.observation_space = [] - - if pricing_type=="TOU": - self.pricing_type = "time_of_use" - elif pricing_type == "RTP": - self.pricing_type = "real_time_pricing" - else: - print("Wrong pricing type") - raise ValueError - - self.prices = self._get_prices() - - ## back to filling in their skeleton - - for agent in self.agents: - # action seq isn't continuous, so skipping Multidiscrete action - self.action_space.append(self._create_action_space()) - - obs_dim = 10 + 10 * self.energy_in_state + 10 * self.yesterday_in_state - self.observation_space.append(self._create_observation_space()) - - + reward_function="log_cost_regularized", + fourier_basis_size=10, + ): + + # self.world = world ## does world appear elsewhere + + self.number_of_participants = number_of_participants + self.response_type_string = response_type_string + self.action_space_string = action_space_string + + player_dict = self._create_agents() + self.agents = player_dict # or transform however needed # world.policy_agents + self.n = len(self.agents) + + self.discrete_action_space = False + self.discrete_action_input = False + self.force_discrete_action = False + + self.shared_reward = False ## right? + self.time = 0 + + self.curr_iter = 0 + + self.points_length = 10 + self.action_length = 10 + self.action_subspace = 3 + + self.action_space = [] + + # (our inputs) + self.prev_energy = np.zeros(10) + self.one_day = self._find_one_day(one_day) + self.energy_in_state = energy_in_state + self.yesterday_in_state = yesterday_in_state + self.reward_function = reward_function + self.fourier_basis_size = fourier_basis_size + + self.day = 0 + self.days_of_week = [0, 1, 2, 3, 4] + self.day_of_week_flag = day_of_week + self.day_of_week = self.days_of_week[self.day % 5] + + # Create Observation Space (aka State Space) + self.observation_space = [] + + if pricing_type == "TOU": + self.pricing_type = "time_of_use" + elif pricing_type == "RTP": + self.pricing_type = "real_time_pricing" + else: + print("Wrong pricing type") + raise ValueError + + self.prices = self._get_prices() + + ## back to filling in their skeleton + + for agent in self.agents: + # action seq isn't continuous, so skipping Multidiscrete action + self.action_space.append(self._create_action_space()) + + obs_dim = 10 + 10 * self.energy_in_state + 10 * self.yesterday_in_state + self.observation_space.append(self._create_observation_space()) + def _find_one_day(self, one_day: int): - """ + """ Purpose: Helper function to find one_day to train on (if applicable) Args: @@ -105,16 +108,15 @@ def _find_one_day(self, one_day: int): one_day if one_day in range [1,365] random_number(1,365) if one_day = -1 """ - - print("one_day") - print(one_day) - # if(one_day != -1): - # return np.random.randint(0, high=365) - - # else: - return one_day + print("one_day") + print(one_day) + # if(one_day != -1): + # return np.random.randint(0, high=365) + + # else: + return one_day def _create_action_space(self): """ @@ -130,9 +132,11 @@ def _create_action_space(self): We pose this option to test whether simplifying the action-space helps the agent. """ - #Making a symmetric, continuous space to help learning for continuous control (suggested in StableBaselines doc.) + # Making a symmetric, continuous space to help learning for continuous control (suggested in StableBaselines doc.) if self.action_space_string == "continuous": - return spaces.Box(low=-1, high=1, shape=(self.action_length,), dtype=np.float32) + return spaces.Box( + low=-1, high=1, shape=(self.action_length,), dtype=np.float32 + ) elif self.action_space_string == "multidiscrete": discrete_space = [self.action_subspace] * self.action_length @@ -140,10 +144,12 @@ def _create_action_space(self): elif self.action_space_string == "fourier": return spaces.Box( - low=-2, high=2, shape=(2*self.fourier_basis_size - 1,), dtype=np.float32 + low=-2, + high=2, + shape=(2 * self.fourier_basis_size - 1,), + dtype=np.float32, ) - def _create_agents(self): """ Purpose: Create the participants of the social game. We create a game with n players, where n = self.number_of_participants @@ -158,51 +164,87 @@ def _create_agents(self): player_dict = {} - # Sample Energy from average energy in the office (pre-treatment) from the last experiment + # Sample Energy from average energy in the office (pre-treatment) from the last experiment # Reference: Lucas Spangher, et al. Engineering vs. ambient typevisualizations: Quantifying effects of different data visualizations on energy consumption. 2019 - - sample_energy = np.array([ 0.28, 11.9, 16.34, 16.8, 17.43, 16.15, 16.23, 15.88, 15.09, 35.6, - 123.5, 148.7, 158.49, 149.13, 159.32, 157.62, 158.8, 156.49, 147.04, 70.76, - 42.87, 23.13, 22.52, 16.8 ]) - #only grab working hours (8am - 5pm) + sample_energy = np.array( + [ + 0.28, + 11.9, + 16.34, + 16.8, + 17.43, + 16.15, + 16.23, + 15.88, + 15.09, + 35.6, + 123.5, + 148.7, + 158.49, + 149.13, + 159.32, + 157.62, + 158.8, + 156.49, + 147.04, + 70.76, + 42.87, + 23.13, + 22.52, + 16.8, + ] + ) + + # only grab working hours (8am - 5pm) working_hour_energy = sample_energy[8:18] - my_baseline_energy = pd.DataFrame(data = {"net_energy_use" : working_hour_energy}) - + my_baseline_energy = pd.DataFrame(data={"net_energy_use": working_hour_energy}) for i in range(self.number_of_participants): - player = DeterministicFunctionPerson(my_baseline_energy, points_multiplier = 10, response= self.response_type_string) #, ) - player_dict['player_{}'.format(i)] = player + player = DeterministicFunctionPerson( + my_baseline_energy, + points_multiplier=10, + response=self.response_type_string, + ) # , ) + player_dict["player_{}".format(i)] = player return player_dict -def _create_observation_space(self): - """ - Purpose: Returns the observation space. - If the state space includes yesterday, then it is +10 dim for yesterday's price signal - If the state space includes energy_in_state, then it is +10 dim for yesterday's energy + def _create_observation_space(self): + """ + Purpose: Returns the observation space. + If the state space includes yesterday, then it is +10 dim for yesterday's price signal + If the state space includes energy_in_state, then it is +10 dim for yesterday's energy - Args: - None + Args: + None - Returns: - Action Space for environment based on action_space_str - """ + Returns: + Action Space for environment based on action_space_str + """ - #TODO: Normalize obs_space ! - if(self.yesterday_in_state): - if(self.energy_in_state): - return spaces.Box(low=-np.inf, high=np.inf, shape=(30,), dtype=np.float32) - else: - return spaces.Box(low=-np.inf, high=np.inf, shape=(20,), dtype=np.float32) + # TODO: Normalize obs_space ! + if self.yesterday_in_state: + if self.energy_in_state: + return spaces.Box( + low=-np.inf, high=np.inf, shape=(30,), dtype=np.float32 + ) + else: + return spaces.Box( + low=-np.inf, high=np.inf, shape=(20,), dtype=np.float32 + ) - else: - if self.energy_in_state: - return spaces.Box(low=-np.inf, high=np.inf, shape=(20,), dtype=np.float32) else: - return spaces.Box(low=-np.inf, high=np.inf, shape=(10,), dtype=np.float32) + if self.energy_in_state: + return spaces.Box( + low=-np.inf, high=np.inf, shape=(20,), dtype=np.float32 + ) + else: + return spaces.Box( + low=-np.inf, high=np.inf, shape=(10,), dtype=np.float32 + ) -def _get_prices(self): + def _get_prices(self): """ Purpose: Get grid price signals for the entire year (using past data from a building in Los Angeles as reference) @@ -213,10 +255,7 @@ def _get_prices(self): """ all_prices = [] - print("--" * 10) - print(self.one_day) - print("--" * 10) - + type_of_DR = self.pricing_type if self.one_day != -1: @@ -224,82 +263,85 @@ def _get_prices(self): # Tweak One_Day Price Signal HERE price = price_signal(self.one_day, type_of_DR=type_of_DR) price = np.array(price[8:18]) - if np.mean(price)==price[2]: - price[3:6]+=.3 + if np.mean(price) == price[2]: + price[3:6] += 0.3 price = np.maximum(0.01 * np.ones_like(price), price) for i in range(365): all_prices.append(price) else: day = 0 - for i in range(365): + for i in range(365): price = price_signal(day + 1, type_of_DR=type_of_DR) price = np.array(price[8:18]) # put a floor on the prices so we don't have negative prices - if np.mean(price)==price[2]: - price[3:6]+=.3 + if np.mean(price) == price[2]: + price[3:6] += 0.3 price = np.maximum(0.01 * np.ones_like(price), price) all_prices.append(price) day += 1 return np.array(all_prices) -def _simulate_human(self, action, agent): + def _simulate_human(self, action, agent): """ - Purpose: Gets energy consumption from players given action from agent + Purpose: Gets energy consumption from players given action from agent - Args: - Action: 10-dim vector corresponding to action for each hour 8AM - 5PM - - Returns: - Energy_consumption: Dictionary containing the energy usage by player and the average energy used in the office (key = "avg") - """ + Args: + Action: 10-dim vector corresponding to action for each hour 8AM - 5PM + + Returns: + Energy_consumption: Dictionary containing the energy usage by player and the average energy used in the office (key = "avg") + """ - #Get players response to agent's actions + # Get players response to agent's actions player = agent - if (self.day_of_week_flag): - player_energy = player.get_response(action, day_of_week = self.day_of_week) - else: - player_energy = player.get_response(action, day_of_week = None) + if self.day_of_week_flag: + player_energy = player.get_response(action, day_of_week=self.day_of_week) + else: + player_energy = player.get_response(action, day_of_week=None) + + # Calculate energy consumption by player and in total (over the office) - #Calculate energy consumption by player and in total (over the office) - return player_energy + def step(self, action_n): + obs_n = [] + reward_n = [] + done_n = [] + info_n = {"n": []} -def step(self, action_n): - obs_n = [] - reward_n = [] - done_n = [] - info_n = {"n": []} - - prev_price = self.prices[(self.day)] - self.day = (self.day + 1) % 365 - self.curr_iter += 1 - - if self.curr_iter > 0: - done = True - self.curr_iter = 0 - else: - done = False + prev_price = self.prices[(self.day)] + self.day = (self.day + 1) % 365 + self.curr_iter += 1 - for i, agent in enumerate(self.agents): - agent.prev_energy = self._simulate_human(action_n[i], agent) - obs_n.append(self._get_observation_per_agent(agent)) - reward_n.append(self._get_reward_per_agent(agent)) - done_n.append(done) - info_n["n"].append({}) ## this was left as a formality in the stableBaselines, too + if self.curr_iter > 0: + done = True + self.curr_iter = 0 + else: + done = False + + for i, agent in enumerate(self.agents.values()): + agent.prev_energy = self._simulate_human(action_n[i], agent) + obs_n.append(self._get_observation_per_agent(agent)) + reward_n.append(self._get_reward_per_agent(agent)) + done_n.append(done) + info_n["n"].append( + {} + ) ## this was left as a formality in the stableBaselines, too - return obs_n, reward_n, done_n, info_n + return obs_n, reward_n, done_n, info_n -def _get_observation_per_agent(self, agent): - prev_price = self.prices[ (self.day - 1) % 365] + def _get_observation_per_agent(self, agent): + prev_price = self.prices[(self.day - 1) % 365] next_observation = self.prices[self.day] - if(self.yesterday_in_state): + if self.yesterday_in_state: if self.energy_in_state: - return np.concatenate((next_observation, np.concatenate((prev_price, agent.prev_energy)))) + return np.concatenate( + (next_observation, np.concatenate((prev_price, agent.prev_energy))) + ) else: return np.concatenate((next_observation, prev_price)) @@ -309,34 +351,36 @@ def _get_observation_per_agent(self, agent): else: return next_observation -def _get_reward_per_agent(self, agent, reward_function = "scaled_cost_distance"): + def _get_reward_per_agent(self, agent, reward_function="scaled_cost_distance"): """ - Purpose: Compute reward given price signal and energy consumption of the office + Purpose: Compute reward given price signal and energy consumption of the office - Args: - agent: DeterministicFunctionPerson object, simulated office twerker - Energy_consumption: Dictionary containing energy usage by player in the office and the average office energy usage - - Returns: - Energy_consumption: Dictionary containing the energy usage by player and the average energy used in the office (key = "avg") - """ + Args: + agent: DeterministicFunctionPerson object, simulated office twerker + Energy_consumption: Dictionary containing energy usage by player in the office and the average office energy usage + + Returns: + Energy_consumption: Dictionary containing the energy usage by player and the average energy used in the office (key = "avg") + """ price = self.prices[self.day] # get the reward from the player's output player_min_demand = agent.get_min_demand() player_max_demand = agent.get_max_demand() player_energy = agent.prev_energy - player_reward = Reward(player_energy, price, player_min_demand, player_max_demand) + player_reward = Reward( + player_energy, price, player_min_demand, player_max_demand + ) if reward_function == "scaled_cost_distance": player_ideal_demands = player_reward.ideal_use_calculation() reward = player_reward.scaled_cost_distance(player_ideal_demands) - elif reward_function == "log_cost_regularized": reward = player_reward.log_cost_regularized() + else: + raise Exception("reward_function not defined") + return reward - return total_reward - def reset(self): # reset world obs_n = [] @@ -344,8 +388,8 @@ def reset(self): obs_n.append(self._get_observation_per_agent(agent)) return obs_n - def render(self, mode='human'): + def render(self, mode="human"): pass def close(self): - pass \ No newline at end of file + pass diff --git a/multiagent/agents.py b/multiagent/agents.py index 84be92bd9..f7bfbd1fa 100644 --- a/multiagent/agents.py +++ b/multiagent/agents.py @@ -1,35 +1,34 @@ import pandas as pd import numpy as np -import cvxpy as cvx from sklearn.preprocessing import MinMaxScaler +import random -#### file to make the simulation of people that we can work with +#### file to make the simulation of people that we can work with -class Person(): - """ Person (parent?) class -- will define how the person takes in a points signal and puts out an energy signal +class Person: + """ Person (parent?) class -- will define how the person takes in a points signal and puts out an energy signal baseline_energy = a list or dataframe of values. This is data from SinBerBEST points_multiplier = an int which describes how sensitive each person is to points """ - def __init__(self, baseline_energy_df, points_multiplier = 1): - self.baseline_energy_df = baseline_energy_df - self.baseline_energy = np.array(self.baseline_energy_df["net_energy_use"]) - self.points_multiplier = points_multiplier - - baseline_min = self.baseline_energy.min() - baseline_max = self.baseline_energy.max() - baseline_range = baseline_max - baseline_min - - self.min_demand = np.maximum(0, baseline_min + baseline_range * .05) - self.max_demand = np.maximum(0, baseline_min + baseline_range * .95) - self.silent = True - self.prev_energy = self.baseline_energy # basically a placeholder. + def __init__(self, baseline_energy_df, points_multiplier=1): + self.baseline_energy_df = baseline_energy_df + self.baseline_energy = np.array(self.baseline_energy_df["net_energy_use"]) + self.points_multiplier = points_multiplier + baseline_min = self.baseline_energy.min() + baseline_max = self.baseline_energy.max() + baseline_range = baseline_max - baseline_min - def energy_output_simple_linear(self, points): - """Determines the energy output of the person, based on the formula: + self.min_demand = np.maximum(0, baseline_min + baseline_range * 0.05) + self.max_demand = np.maximum(0, baseline_min + baseline_range * 0.95) + self.silent = True + self.prev_energy = self.baseline_energy # basically a placeholder. + + def energy_output_simple_linear(self, points): + """Determines the energy output of the person, based on the formula: y[n] = -sum_{rolling window of 5} points + baseline_energy + noise @@ -39,194 +38,210 @@ def energy_output_simple_linear(self, points): For now, that's in 1 hour increments """ - points_df = pd.DataFrame(points) - - points_effect = ( - points_df - .rolling( - window = 5, - min_periods = 1) - .mean() - ) + points_df = pd.DataFrame(points) + points_effect = points_df.rolling(window=5, min_periods=1).mean() + time = points_effect.shape[0] + energy_output = [] - time = points_effect.shape[0] - energy_output= [] + for t in range(time): + temp_energy = ( + self.baseline_energy[t] + - points_effect.iloc[t] * self.points_multiplier + + np.random.normal(1) + ) + energy_output.append(temp_energy) - for t in range(time): - temp_energy = self.baseline_energy[t] - points_effect.iloc[t]*self.points_multiplier + \ - np.random.normal(1) - energy_output.append(temp_energy) - - return pd.DataFrame(energy_output) + return pd.DataFrame(energy_output) - def pure_linear_signal(self, points, baseline_day=0): - """ + def pure_linear_signal(self, points, baseline_day=0): + """ A linear person. The more points you give them, the less energy they will use (within some bounds) for each hour. No rolling effects or anything. The simplest signal. """ - # hack here to always grab the first day from the baseline_energy - output = np.array(self.baseline_energy)[baseline_day*24:baseline_day*24+10] - - points_effect = np.array(points * self.points_multiplier) - output = output - points_effect + # hack here to always grab the first day from the baseline_energy + output = np.array(self.baseline_energy)[ + baseline_day * 24 : baseline_day * 24 + 10 + ] - # impose bounds/constraints - output = np.maximum(output, self.min_demand) - output = np.minimum(output, self.max_demand) - return output + points_effect = np.array(points * self.points_multiplier) + output = output - points_effect + # impose bounds/constraints + output = np.maximum(output, self.min_demand) + output = np.minimum(output, self.max_demand) + return output + def get_min_demand(self): + return self.min_demand + # return np.quantile(self.baseline_energy, .05) - def get_min_demand(self): - return self.min_demand - # return np.quantile(self.baseline_energy, .05) + def get_max_demand(self): + return self.max_demand + # return np.quantile(self.baseline_energy, .95) - def get_max_demand(self): - return self.max_demand - # return np.quantile(self.baseline_energy, .95) class FixedDemandPerson(Person): + def __init__(self, baseline_energy_df, points_multiplier=1): + super().__init__(baseline_energy_df, points_multiplier) - def __init__(self, baseline_energy_df, points_multiplier = 1): - super().__init__(baseline_energy_df, points_multiplier) - - - def demand_from_points(self, points, baseline_day=0): - # hack here to always grab the first day from the baseline_energy - output = np.array(self.baseline_energy)[baseline_day*24:baseline_day*24+10] - total_demand = np.sum(output) + def demand_from_points(self, points, baseline_day=0): + # hack here to always grab the first day from the baseline_energy + output = np.array(self.baseline_energy)[ + baseline_day * 24 : baseline_day * 24 + 10 + ] + total_demand = np.sum(output) + points_effect = np.array(points * self.points_multiplier) + output = output - points_effect - points_effect = np.array(points * self.points_multiplier) - output = output - points_effect + # scale to keep total_demand (almost) constant + # almost bc imposing bounds afterwards + output = output * (total_demand / np.sum(output)) - # scale to keep total_demand (almost) constant - # almost bc imposing bounds afterwards - output = output * (total_demand/np.sum(output)) + # impose bounds/constraints + output = np.maximum(output, self.min_demand) + output = np.minimum(output, self.max_demand) - # impose bounds/constraints - output = np.maximum(output, self.min_demand) - output = np.minimum(output, self.max_demand) + return output - return output + def adverserial_linear(self, points, baseline_day=0): + # hack here to always grab the first day from the baseline_energy + output = np.array(self.baseline_energy)[ + baseline_day * 24 : baseline_day * 24 + 10 + ] + total_demand = np.sum(output) - def adverserial_linear(self, points, baseline_day=0): - # hack here to always grab the first day from the baseline_energy - output = np.array(self.baseline_energy)[baseline_day*24:baseline_day*24+10] - total_demand = np.sum(output) + points_effect = np.array(points * self.points_multiplier) + output = output + points_effect + # scale to keep total_demand (almost) constant + # almost bc imposing bounds afterwards + output = output * (total_demand / np.sum(output)) - points_effect = np.array(points * self.points_multiplier) - output = output + points_effect + # impose bounds/constraints + output = np.maximum(output, self.min_demand) + output = np.minimum(output, self.max_demand) - # scale to keep total_demand (almost) constant - # almost bc imposing bounds afterwards - output = output * (total_demand/np.sum(output)) + return output - # impose bounds/constraints - output = np.maximum(output, self.min_demand) - output = np.minimum(output, self.max_demand) - - return output class DeterministicFunctionPerson(Person): + def __init__(self, baseline_energy_df, points_multiplier=1, response="t"): + super().__init__(baseline_energy_df, points_multiplier) + self.response = response + + def threshold_response_func(self, points): + points = np.array(points) * self.points_multiplier + threshold = np.mean(points) + return [p if p > threshold else 0 for p in points] + + def exponential_response_func(self, points): + points = np.array(points) * self.points_multiplier + points_effect = [p ** 2 for p in points] + + return points_effect + + def sin_response_func(self, points): + points = np.array(points) + # n = np.max(points) + # points = [np.sin((float(i)/float(n))*np.pi) for i in points] + points = [np.sin(float(i) * np.pi) * self.points_multiplier for i in points] + points = points + return points + + def linear_response_func(self, points): + return points * self.points_multiplier + + def routine_output_transform(self, points_effect, baseline_day=0, day_of_week=None): + output = np.array(self.baseline_energy)[ + baseline_day * 24 : baseline_day * 24 + 10 + ] + total_demand = np.sum(output) + + # scale to keep total_demand (almost) constant + # almost bc imposing bounds afterwards + output = output - points_effect + # output = output * (total_demand/np.sum(output)) + + # impose bounds/constraints + # output = np.maximum(output, self.min_demand) + # output = np.minimum(output, self.max_demand) + # return output + + if day_of_week != None: + energy_resp = energy_resp * self.day_of_week_multiplier[day_of_week] + + scaler = MinMaxScaler(feature_range=(self.min_demand, self.max_demand)) + scaled_output = scaler.fit_transform(output.reshape(-1, 1)) + + return np.squeeze(scaled_output) + + def threshold_response(self, points, day_of_week=None): + points_effect = self.threshold_response_func(points) + output = self.routine_output_transform(points_effect, day_of_week=day_of_week) + return output + + def sin_response(self, points, day_of_week=None): + points_effect = self.sin_response_func(points) + output = self.routine_output_transform(points_effect, day_of_week=day_of_week) + return output + + def exp_response(self, points, day_of_week=None): + points_effect = self.exponential_response_func(points) + output = self.routine_output_transform(points_effect, day_of_week=day_of_week) + return output + + def threshold_exp_response(self, points, day_of_week=None): + points_effect = self.exponential_response_func(points) + points_effect = self.threshold_response_func(points_effect) + output = self.routine_output_transform(points_effect, day_of_week=day_of_week) + return output + + def linear_response(self, points, day_of_week=None): + points_effect = points * self.points_multiplier + output = self.routine_output_transform(points_effect, day_of_week=day_of_week) + return output + + def get_response(self, points, day_of_week=None): + if self.response == "t": + energy_resp = self.threshold_exp_response(points, day_of_week=day_of_week) + elif self.response == "s": + energy_resp = self.sin_response(points, day_of_week=day_of_week) + elif self.response == "l": + energy_resp = self.linear_response(points, day_of_week=day_of_week) + elif self.response == "m": + draw = random.randint(Start=0, Stop=2) + if draw == 0: + energy_resp = self.threshold_exp_response( + points, day_of_week=day_of_week + ) + elif draw == 1: + energy_resp = self.sin_response(points, day_of_week=day_of_week) + else: + energy_resp = self.linear_response(points, day_of_week=day_of_week) + energy_resp = self.mixed_response(points, day_of_week=day_of_week) + else: + raise NotImplementedError + + return energy_resp - def __init__(self, baseline_energy_df, points_multiplier = 1, response = 't'): - super().__init__(baseline_energy_df, points_multiplier) - self.response = response - - def threshold_response_func(self, points): - points = np.array(points) * self.points_multiplier - threshold = np.mean(points) - return [p if p>threshold else 0 for p in points] - - def exponential_response_func(self, points): - points = np.array(points) * self.points_multiplier - points_effect = [p**2 for p in points] - - return points_effect - - def sin_response_func(self,points): - points = np.array(points) - # n = np.max(points) - # points = [np.sin((float(i)/float(n))*np.pi) for i in points] - points = [np.sin(float(i)*np.pi)*self.points_multiplier for i in points] - points = points - return points - - def linear_response_func(self,points): - return points * self.points_multiplier - - def routine_output_transform(self, points_effect, baseline_day=0, day_of_week = None): - output = np.array(self.baseline_energy)[baseline_day*24:baseline_day*24+10] - total_demand = np.sum(output) - - # scale to keep total_demand (almost) constant - # almost bc imposing bounds afterwards - output = output - points_effect - # output = output * (total_demand/np.sum(output)) - - # impose bounds/constraints - # output = np.maximum(output, self.min_demand) - # output = np.minimum(output, self.max_demand) - # return output - - - if(day_of_week != None): - energy_resp = energy_resp * self.day_of_week_multiplier[day_of_week] - - scaler = MinMaxScaler(feature_range = (self.min_demand, self.max_demand)) - scaled_output = scaler.fit_transform(output.reshape(-1, 1)) - - return np.squeeze(scaled_output) - - - def threshold_response(self, points, day_of_week = None): - points_effect = self.threshold_response_func(points) - output = self.routine_output_transform(points_effect, day_of_week = day_of_week) - return output - - def sin_response(self, points, day_of_week = None): - points_effect = self.sin_response_func(points) - output = self.routine_output_transform(points_effect, day_of_week = day_of_week) - return output - - def exp_response(self, points, day_of_week = None): - points_effect = self.exponential_response_func(points) - output = self.routine_output_transform(points_effect, day_of_week = day_of_week) - return output - - def threshold_exp_response(self, points, day_of_week = None): - points_effect = self.exponential_response_func(points) - points_effect = self.threshold_response_func(points_effect) - output = self.routine_output_transform(points_effect, day_of_week = day_of_week) - return output - - def linear_response(self, points, day_of_week = None): - points_effect = points*self.points_multiplier - output = self.routine_output_transform(points_effect, day_of_week = day_of_week) - return output - - def get_response(self, points, day_of_week=None): - if(self.response == 't'): - energy_resp = self.threshold_exp_response(points, day_of_week = day_of_week) - elif(self.response == 's'): - energy_resp = self.sin_response(points, day_of_week = day_of_week) - elif(self.response == 'l'): - energy_resp = self.linear_response(points, day_of_week = day_of_week) - else: - raise NotImplementedError - - return energy_resp class RandomizedFunctionPerson(DeterministicFunctionPerson): - def __init__(self, baseline_energy_df, points_multiplier=1, response='t', low = 0, high = 50, distr = 'U'): - - """ + def __init__( + self, + baseline_energy_df, + points_multiplier=1, + response="t", + low=0, + high=50, + distr="U", + ): + + """ Adds Random Noise to DeterministicFunctionPerson energy output (for D.R. purposes) New Args: @@ -236,93 +251,108 @@ def __init__(self, baseline_energy_df, points_multiplier=1, response='t', low = Note: For design purposes the random noise is updated at the end of each episode """ - #TODO: Multivariate distr?? + # TODO: Multivariate distr?? - super().__init__(baseline_energy_df, points_multiplier=points_multiplier, response=response) - - distr = distr.upper() - assert distr in ['G', 'U'] + super().__init__( + baseline_energy_df, points_multiplier=points_multiplier, response=response + ) - self.response = response - self.low = low - self.high = high if high < self.max_demand else 50 - self.distr = distr + distr = distr.upper() + assert distr in ["G", "U"] - self.noise = [] - self.update_noise() + self.response = response + self.low = low + self.high = high if high < self.max_demand else 50 + self.distr = distr - def update_noise(self): - if(self.distr == 'G'): - #TODO: Update how to sample from Gausian - self.noise = np.random.normal(loc = (self.low + self.high) / 2, scale = 10, size = 10) - - elif(self.distr == 'U'): - self.noise = np.random.uniform(low = self.low, high=self.high, size = 10) + self.noise = [] + self.update_noise() + def update_noise(self): + if self.distr == "G": + # TODO: Update how to sample from Gausian + self.noise = np.random.normal( + loc=(self.low + self.high) / 2, scale=10, size=10 + ) - def exponential_response_func(self, points): - points = np.array(points) * self.points_multiplier - points_effect = [p**2 for p in points] + elif self.distr == "U": + self.noise = np.random.uniform(low=self.low, high=self.high, size=10) - return points_effect + self.noise + def exponential_response_func(self, points): + points = np.array(points) * self.points_multiplier + points_effect = [p ** 2 for p in points] - def sin_response_func(self,points): - points = np.array(points) - # n = np.max(points) - # points = [np.sin((float(i)/float(n))*np.pi) for i in points] - points = [np.sin(float(i)*np.pi)*self.points_multiplier for i in points] - points = points - return points + self.noise - - def linear_response_func(self,points): - return points * self.points_multiplier + self.noise + return points_effect + self.noise + def sin_response_func(self, points): + points = np.array(points) + # n = np.max(points) + # points = [np.sin((float(i)/float(n))*np.pi) for i in points] + points = [np.sin(float(i) * np.pi) * self.points_multiplier for i in points] + points = points + return points + self.noise -# utkarsha's person + def linear_response_func(self, points): + return points * self.points_multiplier + self.noise -class CurtailandShiftPerson(Person): - def __init__(self, baseline_energy_df, points_multiplier = 1): - super().__init__(baseline_energy_df, points_multiplier) - self.shiftableLoadFraction = 0.1 - self.shiftByHours = 3 - self.curtailableLoadFraction = 0.1 - self.maxCurtailHours = 3 #Person willing to curtail for no more than these hours - - def shiftedLoad(self, points, baseline_day=0, day_of_week=None): - output = np.array(self.baseline_energy)[baseline_day*24:baseline_day*24+10] - points = np.array(points) * self.points_multiplier - shiftableLoad = self.shiftableLoadFraction*output - shiftByHours = self.shiftByHours - - # 10 hour day. Rearrange the sum of shiftableLoad into these hours by treating points as the 'price' at that hour - # Load can be shifted by a max of shiftByHours (default = 3 hours) - # For each hour, calculate the optimal hour to shift load to within +- 3 hours - shiftedLoad = np.zeros(10) - for hour in range(10): - candidatePrices = points[max(hour-shiftByHours,0): min(hour+shiftByHours,9)+1] - shiftToHour = max(hour-shiftByHours,0) + np.argmin(candidatePrices) - shiftedLoad[shiftToHour] += shiftableLoad[hour] - return shiftedLoad - - def curtailedLoad(self, points, baseline_day=0, day_of_week=None): - output = np.array(self.baseline_energy)[baseline_day*24:baseline_day*24+10] - points = np.array(points) * self.points_multiplier - curtailableLoad = self.curtailableLoadFraction*output - maxPriceHours = np.argsort(points)[0:self.maxCurtailHours] - for hour in maxPriceHours: - curtailableLoad[hour] = 0 - return curtailableLoad - - def get_response(self, points, day_of_week=None): - baseline_day = 0 - output = np.array(self.baseline_energy)[baseline_day*24:baseline_day*24+10] - energy_resp = output*(1 - self.curtailableLoadFraction - self.shiftableLoadFraction) + self.curtailedLoad(points) + self.shiftedLoad(points) - - - self.min_demand = np.maximum(0, min(energy_resp)) - self.max_demand = np.maximum(0, max(energy_resp)) - return energy_resp +# utkarsha's person +class CurtailandShiftPerson(Person): + def __init__(self, baseline_energy_df, points_multiplier=1): + super().__init__(baseline_energy_df, points_multiplier) + self.shiftableLoadFraction = 0.1 + self.shiftByHours = 3 + self.curtailableLoadFraction = 0.1 + self.maxCurtailHours = ( + 3 # Person willing to curtail for no more than these hours + ) + + def shiftedLoad(self, points, baseline_day=0, day_of_week=None): + output = np.array(self.baseline_energy)[ + baseline_day * 24 : baseline_day * 24 + 10 + ] + points = np.array(points) * self.points_multiplier + shiftableLoad = self.shiftableLoadFraction * output + shiftByHours = self.shiftByHours + + # 10 hour day. Rearrange the sum of shiftableLoad into these hours by treating points as the 'price' at that hour + # Load can be shifted by a max of shiftByHours (default = 3 hours) + # For each hour, calculate the optimal hour to shift load to within +- 3 hours + shiftedLoad = np.zeros(10) + for hour in range(10): + candidatePrices = points[ + max(hour - shiftByHours, 0) : min(hour + shiftByHours, 9) + 1 + ] + shiftToHour = max(hour - shiftByHours, 0) + np.argmin(candidatePrices) + shiftedLoad[shiftToHour] += shiftableLoad[hour] + return shiftedLoad + + def curtailedLoad(self, points, baseline_day=0, day_of_week=None): + output = np.array(self.baseline_energy)[ + baseline_day * 24 : baseline_day * 24 + 10 + ] + points = np.array(points) * self.points_multiplier + curtailableLoad = self.curtailableLoadFraction * output + maxPriceHours = np.argsort(points)[0 : self.maxCurtailHours] + for hour in maxPriceHours: + curtailableLoad[hour] = 0 + return curtailableLoad + + def get_response(self, points, day_of_week=None): + baseline_day = 0 + output = np.array(self.baseline_energy)[ + baseline_day * 24 : baseline_day * 24 + 10 + ] + energy_resp = ( + output * (1 - self.curtailableLoadFraction - self.shiftableLoadFraction) + + self.curtailedLoad(points) + + self.shiftedLoad(points) + ) + + self.min_demand = np.maximum(0, min(energy_resp)) + self.max_demand = np.maximum(0, max(energy_resp)) + + return energy_resp diff --git a/multiagent/reward.py b/multiagent/reward.py index d0dd724d6..0b09bea35 100644 --- a/multiagent/reward.py +++ b/multiagent/reward.py @@ -1,15 +1,16 @@ import cvxpy as cvx + # import osqp import numpy as np from sklearn.preprocessing import MinMaxScaler -#### file to calculate the rewards. Meant to be modular: +#### file to calculate the rewards. Meant to be modular: #### class Rewards should have several different functions by Dec 2019 -class Reward(): - def __init__(self, energy_use, prices, min_demand, max_demand): - """ +class Reward: + def __init__(self, energy_use, prices, min_demand, max_demand): + """ Args: energy_use: list returned by Person class signifying energy use prices: list returned by grid signifying cost throughout day @@ -17,74 +18,76 @@ def __init__(self, energy_use, prices, min_demand, max_demand): max_demand: value computed by Person class signifying maximum energy use long term """ - self.energy_use = np.array(energy_use) - self.prices = np.array(prices) - self._num_timesteps = energy_use.shape[0] - self.min_demand = np.min(energy_use) # min_demand - self.max_demand = np.max(energy_use) # max_demand - self.baseline_max_demand = 159.32 + self.energy_use = np.array(energy_use) + self.prices = np.array(prices) + self._num_timesteps = energy_use.shape[0] + self.min_demand = np.min(energy_use) # min_demand + self.max_demand = np.max(energy_use) # max_demand + self.baseline_max_demand = 159.32 - assert round(self.max_demand) == round(max_demand), "The max demand that the player is using and the optimization is using is not the same" - assert round(self.min_demand) == round(min_demand), "The min demand that the player is using and the optimization is using is not the same" + assert round(self.max_demand) == round( + max_demand + ), "The max demand that the player is using and the optimization is using is not the same" + assert round(self.min_demand) == round( + min_demand + ), "The min demand that the player is using and the optimization is using is not the same" - self.total_demand = np.sum(energy_use) + self.total_demand = np.sum(energy_use) - def ideal_use_calculation(self): - """ + def ideal_use_calculation(self): + """ Computes an optimization of demand according to price returns: np.array of ideal energy demands given a price signal """ - demands = cvx.Variable(self._num_timesteps) - min_demand = cvx.Parameter() - max_demand = cvx.Parameter() - total_demand = cvx.Parameter() - prices = cvx.Parameter(self._num_timesteps) - - min_demand = self.min_demand - max_demand = self.max_demand - total_demand = self.total_demand - - while (max_demand * 10 < total_demand): - print("multiplying demand to make optimization work") - print("max_demand: " + str(max_demand)) - print("total_demand: " + str(total_demand)) - print("energy_use") - print(self.energy_use) - max_demand *= 1.1 - - prices = self.prices - constraints = [cvx.sum(demands, axis=0, keepdims=True) == total_demand] - # constraints = [np.ones(self._num_timesteps).T * demands == total_demand] - for i in range(self._num_timesteps): - constraints += [demands[i] <= max_demand] - constraints += [min_demand <= demands[i]] - # if i != 0: - # constraints += [cvx.abs(demands[i] - demands[i-1]) <= 100] - - - objective = cvx.Minimize(demands.T * prices) - problem = cvx.Problem(objective, constraints) - - problem.solve(solver = cvx.ECOS, verbose=False) - return np.array(demands.value) - - - def log_cost(self): - """ + demands = cvx.Variable(self._num_timesteps) + min_demand = cvx.Parameter() + max_demand = cvx.Parameter() + total_demand = cvx.Parameter() + prices = cvx.Parameter(self._num_timesteps) + + min_demand = self.min_demand + max_demand = self.max_demand + total_demand = self.total_demand + + while max_demand * 10 < total_demand: + print("multiplying demand to make optimization work") + print("max_demand: " + str(max_demand)) + print("total_demand: " + str(total_demand)) + print("energy_use") + print(self.energy_use) + max_demand *= 1.1 + + prices = self.prices + constraints = [cvx.sum(demands, axis=0, keepdims=True) == total_demand] + # constraints = [np.ones(self._num_timesteps).T * demands == total_demand] + for i in range(self._num_timesteps): + constraints += [demands[i] <= max_demand] + constraints += [min_demand <= demands[i]] + # if i != 0: + # constraints += [cvx.abs(demands[i] - demands[i-1]) <= 100] + + objective = cvx.Minimize(demands.T * prices) + problem = cvx.Problem(objective, constraints) + + problem.solve(solver=cvx.ECOS, verbose=False) + return np.array(demands.value) + + def log_cost(self): + """ Scales energy_use to be between min and max energy demands (this is repeated in agent.routine_output_trasform), and then returns the simple total cost. """ - scaler = MinMaxScaler(feature_range = (self.min_demand, self.max_demand)) - scaled_energy = np.squeeze(scaler.fit_transform(self.energy_use.reshape(-1, 1))) + scaler = MinMaxScaler(feature_range=(self.min_demand, self.max_demand)) + scaled_energy = np.squeeze(scaler.fit_transform(self.energy_use.reshape(-1, 1))) - return -np.log(np.dot(scaled_energy, self.prices)) + return -np.log(np.dot(scaled_energy, self.prices)) - def log_cost_regularized(self): - """ + def log_cost_regularized(self): + """ Scales energy_use to be between min and max energy demands (this is repeated in agent.routine_output_trasform), and then returns the simple total cost. @@ -92,26 +95,21 @@ def log_cost_regularized(self): """ - scaler = MinMaxScaler(feature_range = (self.min_demand, self.max_demand)) - ## ? - scaled_energy = np.squeeze(scaler.fit_transform(self.energy_use.reshape(-1, 1))) - + scaler = MinMaxScaler(feature_range=(self.min_demand, self.max_demand)) + ## ? + scaled_energy = np.squeeze(scaler.fit_transform(self.energy_use.reshape(-1, 1))) + return -np.log(np.dot(scaled_energy, self.prices)) - 10 * ( + np.sum(self.energy_use) < (10 * (0.5 * self.baseline_max_demand)) + ) # - + # 10 * ()) + # sigmoid between 10 and 20 so there's a smooth transition + # - lambd * (difference b/w energy(t)) - put a bound on + # play with the lipschitz constant + # [10, 20, 10, 10, 10, ] - return (-np.log(np.dot(scaled_energy, self.prices)) - - 10 * (np.sum(self.energy_use) < (10 * (.5 * self.baseline_max_demand)))) # - - #10 * ()) - # sigmoid between 10 and 20 so there's a smooth transition - # - lambd * (difference b/w energy(t)) - put a bound on - # play with the lipschitz constant - # [10, 20, 10, 10, 10, ] - - - - - - def neg_distance_from_ideal(self, demands): - """ + def neg_distance_from_ideal(self, demands): + """ args: demands: np.array() of demands from ideal_use_calculation() @@ -119,84 +117,87 @@ def neg_distance_from_ideal(self, demands): a numerical distance metric, negated """ - return -((demands - self.energy_use)**2).sum() + return -((demands - self.energy_use) ** 2).sum() - def cost_distance(self, ideal_demands): - """ + def cost_distance(self, ideal_demands): + """ args: demands: np.array() of demands from ideal_use_calculation() returns: a cost-based distance metric, negated """ - current_cost = np.dot(self.prices, self.energy_use) - ideal_cost = np.dot(self.prices, ideal_demands) + current_cost = np.dot(self.prices, self.energy_use) + ideal_cost = np.dot(self.prices, ideal_demands) - cost_difference = ideal_cost - current_cost - - return cost_difference + cost_difference = ideal_cost - current_cost - def log_cost_distance(self, ideal_demands): - """ + return cost_difference + + def log_cost_distance(self, ideal_demands): + """ args: demands: np.array() of demands from ideal_use_calculation() returns: the log of the cost distance """ - current_cost = np.dot(self.prices, self.energy_use) - ideal_cost = np.dot(self.prices, ideal_demands) - - cost_difference = ideal_cost - current_cost - - #TODO ENSURE THAT COST DIFFERENCE IS < 0 - if cost_difference < 0: - return -np.log(-cost_difference) - else: - print("WEIRD REWARD ALERT. IDEAL COST >= CURRENT COST. returning reward of 10") - return 10 - - def scaled_cost_distance(self, ideal_demands): - """ + current_cost = np.dot(self.prices, self.energy_use) + ideal_cost = np.dot(self.prices, ideal_demands) + + cost_difference = ideal_cost - current_cost + + # TODO ENSURE THAT COST DIFFERENCE IS < 0 + if cost_difference < 0: + return -np.log(-cost_difference) + else: + print( + "WEIRD REWARD ALERT. IDEAL COST >= CURRENT COST. returning reward of 10" + ) + return 10 + + def scaled_cost_distance(self, ideal_demands): + """ args: demands: np.array() of demands from ideal_use_calculation() returns: a cost-based distance metric normalized by total ideal cost """ - - current_cost = np.dot(self.prices, self.energy_use) - ideal_cost = np.dot(self.prices, ideal_demands) - - cost_difference = ideal_cost - current_cost - - # print("--" * 10) - # print("ideal cost") - # print(ideal_cost) - # print("--" * 10) - # print("prices") - # print(self.prices) - # print("--" * 10) - # print("current_cost") - # print(current_cost) - - if cost_difference > 0 or ideal_cost < 0: - print("--" * 10) - print("Problem with reward") - # print("min_demand: " + str(self.min_demand)) - # print("max_demand: " + str(self.max_demand)) - # print("--" * 10) - print("prices") - print(self.prices) - # print("current_cost") - # print(current_cost) - # print("--" * 10) - # print("ideal_cost") - # print(ideal_cost) - # print("ideal_demands") - # print(ideal_demands) - # print("energy demand") - # print(self.energy_use) - print("taking the neg abs value so that it stays the same sign.") - - return -np.abs(cost_difference/ideal_cost) + + current_cost = np.dot(self.prices, self.energy_use) + ideal_cost = np.dot(self.prices, ideal_demands) + + cost_difference = ideal_cost - current_cost + + # print("--" * 10) + # print("ideal cost") + # print(ideal_cost) + # print("--" * 10) + # print("prices") + # print(self.prices) + # print("--" * 10) + # print("current_cost") + # print(current_cost) + + if cost_difference > 0 or ideal_cost < 0: + print("--" * 10) + print("Problem with reward") + # print("min_demand: " + str(self.min_demand)) + # print("max_demand: " + str(self.max_demand)) + # print("--" * 10) + print("prices") + print(self.prices) + # print("current_cost") + # print(current_cost) + # print("--" * 10) + # print("ideal_cost") + # print(ideal_cost) + # print("ideal_demands") + # print(ideal_demands) + # print("energy demand") + # print(self.energy_use) + print("taking the neg abs value so that it stays the same sign.") + + return -np.abs(cost_difference / ideal_cost) + diff --git a/multiagent/utils.py b/multiagent/utils.py index 19637d732..1cb1d8877 100644 --- a/multiagent/utils.py +++ b/multiagent/utils.py @@ -4,13 +4,14 @@ from scipy.optimize import minimize import os import matplotlib.pyplot as plt -from sklearn.preprocessing import MinMaxScaler +from sklearn.preprocessing import MinMaxScaler +import os -#data_path = os.path.join(os.getcwd(), "baselines", "behavioral_sim", "building_data.csv") +# data_path = os.path.join(os.getcwd(), "baselines", "behavioral_sim", "building_data.csv") # csv_path = os.path.dirname(os.path.realpath(__file__)) + "/building_data.csv" -def price_signal(day = 45, type_of_DR = "real_time_pricing"): +def price_signal(day=45, type_of_DR="real_time_pricing"): """ Utkarsha's work on price signal from a building with demand and solar @@ -21,7 +22,11 @@ def price_signal(day = 45, type_of_DR = "real_time_pricing"): """ csv_path = "building_data.csv" csv_path_2 = "../gym-socialgame/gym_socialgame/envs/building_data.csv" - csv_path_3 = "/global/home/users/lucas_spangher/transactive_control/gym-socialgame/gym_socialgame/envs/building_data.csv" + current_dir = os.path.dirname(os.path.realpath(__file__)) + csv_path_3 = os.path.join( + current_dir, + "../../transactive_control/gym-socialgame/gym_socialgame/envs/building_data.csv", + ) try: df = pd.read_csv(csv_path) except: @@ -30,35 +35,39 @@ def price_signal(day = 45, type_of_DR = "real_time_pricing"): except: df = pd.read_csv(csv_path_3) - pv = 0.001*np.array(df['PV (W)'].tolist()) - price = np.array(df['Price( $ per kWh)'].tolist()) - demand = np.nan_to_num(np.array(df['Office_Elizabeth (kWh)'].tolist())) - demand_charge = 10/30 # 10$/kW per month + pv = 0.001 * np.array(df["PV (W)"].tolist()) + price = np.array(df["Price( $ per kWh)"].tolist()) + demand = np.nan_to_num(np.array(df["Office_Elizabeth (kWh)"].tolist())) + demand_charge = 10 / 30 # 10$/kW per month - pvsize = 0 #Assumption - netdemand = demand - pvsize*pv + pvsize = 0 # Assumption + netdemand = demand - pvsize * pv # Data starts at 5 am on Jan 1 - netdemand_24 = netdemand[24*day-5:24*day+19] - price_24 = price[24*day-5:24*day+19] - pv_24 = pv[24*day-5:24*day+19] - demand_24 = demand[24*day-5:24*day+19] + netdemand_24 = netdemand[24 * day - 5 : 24 * day + 19] + price_24 = price[24 * day - 5 : 24 * day + 19] + pv_24 = pv[24 * day - 5 : 24 * day + 19] + demand_24 = demand[24 * day - 5 : 24 * day + 19] # Calculate optimal load scheduling. 90% of load is fixed, 10% is controllable. def optimise_24h(netdemand_24, price_24): - currentcost = netdemand_24@price_24 + currentcost = netdemand_24 @ price_24 + + fixed_load = 0.9 * netdemand_24 + controllable_load = sum(0.1 * netdemand_24) - fixed_load = 0.9*netdemand_24 - controllable_load = sum(0.1*netdemand_24) - def objective(x): load = fixed_load + x - cost = np.multiply(price_24,load) + cost = np.multiply(price_24, load) # Negative demand means zero cost, not negative cost # Adding L1 regularisation to penalise shifting of occupant demand lambd = 0.005 # Demand charge: attached to peak demand - return sum(np.maximum(cost,0)) + demand_charge*max(load) + lambd*sum(abs(x-0.1*netdemand_24)) + return ( + sum(np.maximum(cost, 0)) + + demand_charge * max(load) + + lambd * sum(abs(x - 0.1 * netdemand_24)) + ) def constraint_sumofx(x): return sum(x) - controllable_load @@ -68,48 +77,40 @@ def constraint_x_positive(x): x0 = np.zeros(24) cons = [ - {'type':'eq', 'fun': constraint_sumofx}, - {'type':'ineq', 'fun':constraint_x_positive} + {"type": "eq", "fun": constraint_sumofx}, + {"type": "ineq", "fun": constraint_x_positive}, ] sol = minimize(objective, x0, constraints=cons) return sol - if type_of_DR == "real_time_pricing": - sol = optimise_24h(netdemand_24,price_24) - x = sol['x'] - diff = x - 0.1*netdemand_24 + if type_of_DR == "real_time_pricing": + sol = optimise_24h(netdemand_24, price_24) + x = sol["x"] + diff = x - 0.1 * netdemand_24 return -diff - min(-diff) elif type_of_DR == "time_of_use": - if (np.mean(price_24[8:18]) == price_24[9]): - price_24[13:16]+=3 + if np.mean(price_24[8:18]) == price_24[9]: + price_24[13:16] += 3 return price_24 else: return "error!!!" def fourier_points_from_action(action, points_length, fourier_basis_size): - assert fourier_basis_size == (action.size + 1) // 2, "Incorrect fourier basis size for actions" - root_points = (action[0]/2)*np.ones(points_length) + assert ( + fourier_basis_size == (action.size + 1) // 2 + ), "Incorrect fourier basis size for actions" + root_points = (action[0] / 2) * np.ones(points_length) inp = np.linspace(0, 1, points_length) for k in range(1, fourier_basis_size): - ak, bk = action[2*k - 1], action[2*k] - root_points += ak * np.sin(2*np.pi*k*inp) + bk * np.cos(2*np.pi*k*inp) + ak, bk = action[2 * k - 1], action[2 * k] + root_points += ak * np.sin(2 * np.pi * k * inp) + bk * np.cos( + 2 * np.pi * k * inp + ) - points = root_points**2 - #TODO: More elegant solution than clipping + points = root_points ** 2 + # TODO: More elegant solution than clipping points = np.clip(points, 0, 10) return points - - - - - - - - - - - - From f58136f10f81dcd0e41c6181403060c34640db39 Mon Sep 17 00:00:00 2001 From: Manan Khattar Date: Tue, 15 Dec 2020 14:19:04 -0800 Subject: [PATCH 5/6] pre 12/15 work --- multiagent/agents.py | 11 ----------- multiagent/reward.py | 2 +- 2 files changed, 1 insertion(+), 12 deletions(-) diff --git a/multiagent/agents.py b/multiagent/agents.py index f7bfbd1fa..350ae904b 100644 --- a/multiagent/agents.py +++ b/multiagent/agents.py @@ -1,7 +1,6 @@ import pandas as pd import numpy as np from sklearn.preprocessing import MinMaxScaler -import random #### file to make the simulation of people that we can work with @@ -214,19 +213,9 @@ def get_response(self, points, day_of_week=None): elif self.response == "l": energy_resp = self.linear_response(points, day_of_week=day_of_week) elif self.response == "m": - draw = random.randint(Start=0, Stop=2) - if draw == 0: - energy_resp = self.threshold_exp_response( - points, day_of_week=day_of_week - ) - elif draw == 1: - energy_resp = self.sin_response(points, day_of_week=day_of_week) - else: - energy_resp = self.linear_response(points, day_of_week=day_of_week) energy_resp = self.mixed_response(points, day_of_week=day_of_week) else: raise NotImplementedError - return energy_resp diff --git a/multiagent/reward.py b/multiagent/reward.py index 0b09bea35..f55b80527 100644 --- a/multiagent/reward.py +++ b/multiagent/reward.py @@ -65,7 +65,7 @@ def ideal_use_calculation(self): for i in range(self._num_timesteps): constraints += [demands[i] <= max_demand] constraints += [min_demand <= demands[i]] - # if i != 0: + # if i != 0:y # constraints += [cvx.abs(demands[i] - demands[i-1]) <= 100] objective = cvx.Minimize(demands.T * prices) From 17f2073c791837d7248980149167e79783772d55 Mon Sep 17 00:00:00 2001 From: Manan Khattar Date: Thu, 17 Dec 2020 02:24:17 -0800 Subject: [PATCH 6/6] 12/16 --- multiagent/SG_environment.py | 21 ++++++++++++++++++--- multiagent/agents.py | 9 +++++++++ multiagent/reward.py | 4 ++-- 3 files changed, 29 insertions(+), 5 deletions(-) diff --git a/multiagent/SG_environment.py b/multiagent/SG_environment.py index de5e2e7dd..5d04e5886 100644 --- a/multiagent/SG_environment.py +++ b/multiagent/SG_environment.py @@ -307,6 +307,19 @@ def _simulate_human(self, action, agent): return player_energy def step(self, action_n): + for i, action in enumerate(action_n): + action = np.asarray(action) + if self.action_space_string == "continuous": + action = np.clip(action, 0, 10) + action_n[i] = action + else: + raise Exception("Wrong action_space_string") + + print("--" * 10) + print("--" * 10) + print("clipped action") + print(action_n) + obs_n = [] reward_n = [] done_n = [] @@ -316,7 +329,7 @@ def step(self, action_n): self.day = (self.day + 1) % 365 self.curr_iter += 1 - if self.curr_iter > 0: + if self.curr_iter > 0: ### could change done = True self.curr_iter = 0 else: @@ -325,7 +338,9 @@ def step(self, action_n): for i, agent in enumerate(self.agents.values()): agent.prev_energy = self._simulate_human(action_n[i], agent) obs_n.append(self._get_observation_per_agent(agent)) - reward_n.append(self._get_reward_per_agent(agent)) + reward_n.append( + self._get_reward_per_agent(agent, reward_function=self.reward_function) + ) done_n.append(done) info_n["n"].append( {} @@ -384,7 +399,7 @@ def _get_reward_per_agent(self, agent, reward_function="scaled_cost_distance"): def reset(self): # reset world obs_n = [] - for agent in self.agents: + for agent in self.agents.values(): obs_n.append(self._get_observation_per_agent(agent)) return obs_n diff --git a/multiagent/agents.py b/multiagent/agents.py index 350ae904b..173099157 100644 --- a/multiagent/agents.py +++ b/multiagent/agents.py @@ -1,3 +1,4 @@ +import pdb import pandas as pd import numpy as np from sklearn.preprocessing import MinMaxScaler @@ -202,6 +203,14 @@ def threshold_exp_response(self, points, day_of_week=None): def linear_response(self, points, day_of_week=None): points_effect = points * self.points_multiplier + if ( + np.isnan(points_effect).any() + or np.isneginf(points_effect).any() + or np.isposinf(points_effect).any() + ): + import pdb + + pdb.set_trace() output = self.routine_output_transform(points_effect, day_of_week=day_of_week) return output diff --git a/multiagent/reward.py b/multiagent/reward.py index f55b80527..97ee222ad 100644 --- a/multiagent/reward.py +++ b/multiagent/reward.py @@ -1,5 +1,3 @@ -import cvxpy as cvx - # import osqp import numpy as np from sklearn.preprocessing import MinMaxScaler @@ -35,6 +33,8 @@ def __init__(self, energy_use, prices, min_demand, max_demand): self.total_demand = np.sum(energy_use) def ideal_use_calculation(self): + import cvxpy as cvx + """ Computes an optimization of demand according to price