diff --git a/multiagent/SG_environment.py b/multiagent/SG_environment.py new file mode 100644 index 000000000..5d04e5886 --- /dev/null +++ b/multiagent/SG_environment.py @@ -0,0 +1,410 @@ +import gym +from gym import spaces +from gym.envs.registration import EnvSpec +import numpy as np +import pandas as pd +from multiagent.multi_discrete import MultiDiscrete + + +import random + +import tensorflow as tf + +# manan these are in the same directory -- you may need to add that to the path, but prob not +from multiagent.utils import price_signal, fourier_points_from_action +from multiagent.agents import DeterministicFunctionPerson +from multiagent.reward import Reward + +import pickle + +# environment for all agents in the multiagent world +# currently code assumes that no agents will be created/destroyed at runtime! +class MultiAgentEnv(gym.Env): + metadata = {"render.modes": ["human", "rgb_array"]} # ?? necc? + + def __init__( + self, + action_space_string="continuous", + response_type_string="l", + number_of_participants=10, + one_day=0, + energy_in_state=False, + yesterday_in_state=False, + day_of_week=False, + pricing_type="TOU", + reward_function="log_cost_regularized", + fourier_basis_size=10, + ): + + # self.world = world ## does world appear elsewhere + + self.number_of_participants = number_of_participants + self.response_type_string = response_type_string + self.action_space_string = action_space_string + + player_dict = self._create_agents() + self.agents = player_dict # or transform however needed # world.policy_agents + self.n = len(self.agents) + + self.discrete_action_space = False + self.discrete_action_input = False + self.force_discrete_action = False + + self.shared_reward = False ## right? + self.time = 0 + + self.curr_iter = 0 + + self.points_length = 10 + self.action_length = 10 + self.action_subspace = 3 + + self.action_space = [] + + # (our inputs) + self.prev_energy = np.zeros(10) + self.one_day = self._find_one_day(one_day) + self.energy_in_state = energy_in_state + self.yesterday_in_state = yesterday_in_state + self.reward_function = reward_function + self.fourier_basis_size = fourier_basis_size + + self.day = 0 + self.days_of_week = [0, 1, 2, 3, 4] + self.day_of_week_flag = day_of_week + self.day_of_week = self.days_of_week[self.day % 5] + + # Create Observation Space (aka State Space) + self.observation_space = [] + + if pricing_type == "TOU": + self.pricing_type = "time_of_use" + elif pricing_type == "RTP": + self.pricing_type = "real_time_pricing" + else: + print("Wrong pricing type") + raise ValueError + + self.prices = self._get_prices() + + ## back to filling in their skeleton + + for agent in self.agents: + # action seq isn't continuous, so skipping Multidiscrete action + self.action_space.append(self._create_action_space()) + + obs_dim = 10 + 10 * self.energy_in_state + 10 * self.yesterday_in_state + self.observation_space.append(self._create_observation_space()) + + def _find_one_day(self, one_day: int): + """ + Purpose: Helper function to find one_day to train on (if applicable) + + Args: + One_day: (Int) in range [-1,365] + + Returns: + 0 if one_day = 0 + one_day if one_day in range [1,365] + random_number(1,365) if one_day = -1 + """ + + print("one_day") + print(one_day) + + # if(one_day != -1): + # return np.random.randint(0, high=365) + + # else: + return one_day + + def _create_action_space(self): + """ + Purpose: Return action space of type specified by self.action_space_string + + Args: + None + + Returns: + Action Space for environment based on action_space_str + + Note: Multidiscrete refers to a 10-dim vector where each action {0,1,2} represents Low, Medium, High points respectively. + We pose this option to test whether simplifying the action-space helps the agent. + """ + + # Making a symmetric, continuous space to help learning for continuous control (suggested in StableBaselines doc.) + if self.action_space_string == "continuous": + return spaces.Box( + low=-1, high=1, shape=(self.action_length,), dtype=np.float32 + ) + + elif self.action_space_string == "multidiscrete": + discrete_space = [self.action_subspace] * self.action_length + return spaces.MultiDiscrete(discrete_space) + + elif self.action_space_string == "fourier": + return spaces.Box( + low=-2, + high=2, + shape=(2 * self.fourier_basis_size - 1,), + dtype=np.float32, + ) + + def _create_agents(self): + """ + Purpose: Create the participants of the social game. We create a game with n players, where n = self.number_of_participants + + Args: + None + + Returns: + agent_dict: Dictionary of players, each with response function based on self.response_type_string + + """ + + player_dict = {} + + # Sample Energy from average energy in the office (pre-treatment) from the last experiment + # Reference: Lucas Spangher, et al. Engineering vs. ambient typevisualizations: Quantifying effects of different data visualizations on energy consumption. 2019 + + sample_energy = np.array( + [ + 0.28, + 11.9, + 16.34, + 16.8, + 17.43, + 16.15, + 16.23, + 15.88, + 15.09, + 35.6, + 123.5, + 148.7, + 158.49, + 149.13, + 159.32, + 157.62, + 158.8, + 156.49, + 147.04, + 70.76, + 42.87, + 23.13, + 22.52, + 16.8, + ] + ) + + # only grab working hours (8am - 5pm) + working_hour_energy = sample_energy[8:18] + + my_baseline_energy = pd.DataFrame(data={"net_energy_use": working_hour_energy}) + for i in range(self.number_of_participants): + player = DeterministicFunctionPerson( + my_baseline_energy, + points_multiplier=10, + response=self.response_type_string, + ) # , ) + player_dict["player_{}".format(i)] = player + + return player_dict + + def _create_observation_space(self): + """ + Purpose: Returns the observation space. + If the state space includes yesterday, then it is +10 dim for yesterday's price signal + If the state space includes energy_in_state, then it is +10 dim for yesterday's energy + + Args: + None + + Returns: + Action Space for environment based on action_space_str + """ + + # TODO: Normalize obs_space ! + if self.yesterday_in_state: + if self.energy_in_state: + return spaces.Box( + low=-np.inf, high=np.inf, shape=(30,), dtype=np.float32 + ) + else: + return spaces.Box( + low=-np.inf, high=np.inf, shape=(20,), dtype=np.float32 + ) + + else: + if self.energy_in_state: + return spaces.Box( + low=-np.inf, high=np.inf, shape=(20,), dtype=np.float32 + ) + else: + return spaces.Box( + low=-np.inf, high=np.inf, shape=(10,), dtype=np.float32 + ) + + def _get_prices(self): + """ + Purpose: Get grid price signals for the entire year (using past data from a building in Los Angeles as reference) + + Args: + None + + Returns: Array containing 365 price signals, where array[day_number] = grid_price for day_number from 8AM - 5PM + + """ + all_prices = [] + + type_of_DR = self.pricing_type + + if self.one_day != -1: + # If one_day we repeat the price signals from a fixed day + # Tweak One_Day Price Signal HERE + price = price_signal(self.one_day, type_of_DR=type_of_DR) + price = np.array(price[8:18]) + if np.mean(price) == price[2]: + price[3:6] += 0.3 + price = np.maximum(0.01 * np.ones_like(price), price) + + for i in range(365): + all_prices.append(price) + else: + day = 0 + for i in range(365): + price = price_signal(day + 1, type_of_DR=type_of_DR) + price = np.array(price[8:18]) + # put a floor on the prices so we don't have negative prices + if np.mean(price) == price[2]: + price[3:6] += 0.3 + price = np.maximum(0.01 * np.ones_like(price), price) + all_prices.append(price) + day += 1 + + return np.array(all_prices) + + def _simulate_human(self, action, agent): + """ + Purpose: Gets energy consumption from players given action from agent + + Args: + Action: 10-dim vector corresponding to action for each hour 8AM - 5PM + + Returns: + Energy_consumption: Dictionary containing the energy usage by player and the average energy used in the office (key = "avg") + """ + + # Get players response to agent's actions + player = agent + + if self.day_of_week_flag: + player_energy = player.get_response(action, day_of_week=self.day_of_week) + else: + player_energy = player.get_response(action, day_of_week=None) + + # Calculate energy consumption by player and in total (over the office) + + return player_energy + + def step(self, action_n): + for i, action in enumerate(action_n): + action = np.asarray(action) + if self.action_space_string == "continuous": + action = np.clip(action, 0, 10) + action_n[i] = action + else: + raise Exception("Wrong action_space_string") + + print("--" * 10) + print("--" * 10) + print("clipped action") + print(action_n) + + obs_n = [] + reward_n = [] + done_n = [] + info_n = {"n": []} + + prev_price = self.prices[(self.day)] + self.day = (self.day + 1) % 365 + self.curr_iter += 1 + + if self.curr_iter > 0: ### could change + done = True + self.curr_iter = 0 + else: + done = False + + for i, agent in enumerate(self.agents.values()): + agent.prev_energy = self._simulate_human(action_n[i], agent) + obs_n.append(self._get_observation_per_agent(agent)) + reward_n.append( + self._get_reward_per_agent(agent, reward_function=self.reward_function) + ) + done_n.append(done) + info_n["n"].append( + {} + ) ## this was left as a formality in the stableBaselines, too + + return obs_n, reward_n, done_n, info_n + + def _get_observation_per_agent(self, agent): + prev_price = self.prices[(self.day - 1) % 365] + next_observation = self.prices[self.day] + + if self.yesterday_in_state: + if self.energy_in_state: + return np.concatenate( + (next_observation, np.concatenate((prev_price, agent.prev_energy))) + ) + else: + return np.concatenate((next_observation, prev_price)) + + elif self.energy_in_state: + return np.concatenate((next_observation, agent.prev_energy)) + + else: + return next_observation + + def _get_reward_per_agent(self, agent, reward_function="scaled_cost_distance"): + """ + Purpose: Compute reward given price signal and energy consumption of the office + + Args: + agent: DeterministicFunctionPerson object, simulated office twerker + Energy_consumption: Dictionary containing energy usage by player in the office and the average office energy usage + + Returns: + Energy_consumption: Dictionary containing the energy usage by player and the average energy used in the office (key = "avg") + """ + + price = self.prices[self.day] + # get the reward from the player's output + player_min_demand = agent.get_min_demand() + player_max_demand = agent.get_max_demand() + player_energy = agent.prev_energy + player_reward = Reward( + player_energy, price, player_min_demand, player_max_demand + ) + + if reward_function == "scaled_cost_distance": + player_ideal_demands = player_reward.ideal_use_calculation() + reward = player_reward.scaled_cost_distance(player_ideal_demands) + elif reward_function == "log_cost_regularized": + reward = player_reward.log_cost_regularized() + else: + raise Exception("reward_function not defined") + return reward + + def reset(self): + # reset world + obs_n = [] + for agent in self.agents.values(): + obs_n.append(self._get_observation_per_agent(agent)) + return obs_n + + def render(self, mode="human"): + pass + + def close(self): + pass diff --git a/multiagent/agents.py b/multiagent/agents.py new file mode 100644 index 000000000..173099157 --- /dev/null +++ b/multiagent/agents.py @@ -0,0 +1,356 @@ +import pdb +import pandas as pd +import numpy as np +from sklearn.preprocessing import MinMaxScaler + +#### file to make the simulation of people that we can work with + + +class Person: + """ Person (parent?) class -- will define how the person takes in a points signal and puts out an energy signal + baseline_energy = a list or dataframe of values. This is data from SinBerBEST + points_multiplier = an int which describes how sensitive each person is to points + + """ + + def __init__(self, baseline_energy_df, points_multiplier=1): + self.baseline_energy_df = baseline_energy_df + self.baseline_energy = np.array(self.baseline_energy_df["net_energy_use"]) + self.points_multiplier = points_multiplier + + baseline_min = self.baseline_energy.min() + baseline_max = self.baseline_energy.max() + baseline_range = baseline_max - baseline_min + + self.min_demand = np.maximum(0, baseline_min + baseline_range * 0.05) + self.max_demand = np.maximum(0, baseline_min + baseline_range * 0.95) + self.silent = True + self.prev_energy = self.baseline_energy # basically a placeholder. + + def energy_output_simple_linear(self, points): + """Determines the energy output of the person, based on the formula: + + y[n] = -sum_{rolling window of 5} points + baseline_energy + noise + + inputs: points - list or dataframe of points values. Assumes that the + list will be in the same time increment that energy_output will be. + + For now, that's in 1 hour increments + + """ + points_df = pd.DataFrame(points) + + points_effect = points_df.rolling(window=5, min_periods=1).mean() + + time = points_effect.shape[0] + energy_output = [] + + for t in range(time): + temp_energy = ( + self.baseline_energy[t] + - points_effect.iloc[t] * self.points_multiplier + + np.random.normal(1) + ) + energy_output.append(temp_energy) + + return pd.DataFrame(energy_output) + + def pure_linear_signal(self, points, baseline_day=0): + """ + A linear person. The more points you give them, the less energy they will use + (within some bounds) for each hour. No rolling effects or anything. The simplest + signal. + """ + + # hack here to always grab the first day from the baseline_energy + output = np.array(self.baseline_energy)[ + baseline_day * 24 : baseline_day * 24 + 10 + ] + + points_effect = np.array(points * self.points_multiplier) + output = output - points_effect + + # impose bounds/constraints + output = np.maximum(output, self.min_demand) + output = np.minimum(output, self.max_demand) + return output + + def get_min_demand(self): + return self.min_demand + # return np.quantile(self.baseline_energy, .05) + + def get_max_demand(self): + return self.max_demand + # return np.quantile(self.baseline_energy, .95) + + +class FixedDemandPerson(Person): + def __init__(self, baseline_energy_df, points_multiplier=1): + super().__init__(baseline_energy_df, points_multiplier) + + def demand_from_points(self, points, baseline_day=0): + # hack here to always grab the first day from the baseline_energy + output = np.array(self.baseline_energy)[ + baseline_day * 24 : baseline_day * 24 + 10 + ] + total_demand = np.sum(output) + + points_effect = np.array(points * self.points_multiplier) + output = output - points_effect + + # scale to keep total_demand (almost) constant + # almost bc imposing bounds afterwards + output = output * (total_demand / np.sum(output)) + + # impose bounds/constraints + output = np.maximum(output, self.min_demand) + output = np.minimum(output, self.max_demand) + + return output + + def adverserial_linear(self, points, baseline_day=0): + # hack here to always grab the first day from the baseline_energy + output = np.array(self.baseline_energy)[ + baseline_day * 24 : baseline_day * 24 + 10 + ] + total_demand = np.sum(output) + + points_effect = np.array(points * self.points_multiplier) + output = output + points_effect + + # scale to keep total_demand (almost) constant + # almost bc imposing bounds afterwards + output = output * (total_demand / np.sum(output)) + + # impose bounds/constraints + output = np.maximum(output, self.min_demand) + output = np.minimum(output, self.max_demand) + + return output + + +class DeterministicFunctionPerson(Person): + def __init__(self, baseline_energy_df, points_multiplier=1, response="t"): + super().__init__(baseline_energy_df, points_multiplier) + self.response = response + + def threshold_response_func(self, points): + points = np.array(points) * self.points_multiplier + threshold = np.mean(points) + return [p if p > threshold else 0 for p in points] + + def exponential_response_func(self, points): + points = np.array(points) * self.points_multiplier + points_effect = [p ** 2 for p in points] + + return points_effect + + def sin_response_func(self, points): + points = np.array(points) + # n = np.max(points) + # points = [np.sin((float(i)/float(n))*np.pi) for i in points] + points = [np.sin(float(i) * np.pi) * self.points_multiplier for i in points] + points = points + return points + + def linear_response_func(self, points): + return points * self.points_multiplier + + def routine_output_transform(self, points_effect, baseline_day=0, day_of_week=None): + output = np.array(self.baseline_energy)[ + baseline_day * 24 : baseline_day * 24 + 10 + ] + total_demand = np.sum(output) + + # scale to keep total_demand (almost) constant + # almost bc imposing bounds afterwards + output = output - points_effect + # output = output * (total_demand/np.sum(output)) + + # impose bounds/constraints + # output = np.maximum(output, self.min_demand) + # output = np.minimum(output, self.max_demand) + # return output + + if day_of_week != None: + energy_resp = energy_resp * self.day_of_week_multiplier[day_of_week] + + scaler = MinMaxScaler(feature_range=(self.min_demand, self.max_demand)) + scaled_output = scaler.fit_transform(output.reshape(-1, 1)) + + return np.squeeze(scaled_output) + + def threshold_response(self, points, day_of_week=None): + points_effect = self.threshold_response_func(points) + output = self.routine_output_transform(points_effect, day_of_week=day_of_week) + return output + + def sin_response(self, points, day_of_week=None): + points_effect = self.sin_response_func(points) + output = self.routine_output_transform(points_effect, day_of_week=day_of_week) + return output + + def exp_response(self, points, day_of_week=None): + points_effect = self.exponential_response_func(points) + output = self.routine_output_transform(points_effect, day_of_week=day_of_week) + return output + + def threshold_exp_response(self, points, day_of_week=None): + points_effect = self.exponential_response_func(points) + points_effect = self.threshold_response_func(points_effect) + output = self.routine_output_transform(points_effect, day_of_week=day_of_week) + return output + + def linear_response(self, points, day_of_week=None): + points_effect = points * self.points_multiplier + if ( + np.isnan(points_effect).any() + or np.isneginf(points_effect).any() + or np.isposinf(points_effect).any() + ): + import pdb + + pdb.set_trace() + output = self.routine_output_transform(points_effect, day_of_week=day_of_week) + return output + + def get_response(self, points, day_of_week=None): + if self.response == "t": + energy_resp = self.threshold_exp_response(points, day_of_week=day_of_week) + elif self.response == "s": + energy_resp = self.sin_response(points, day_of_week=day_of_week) + elif self.response == "l": + energy_resp = self.linear_response(points, day_of_week=day_of_week) + elif self.response == "m": + energy_resp = self.mixed_response(points, day_of_week=day_of_week) + else: + raise NotImplementedError + return energy_resp + + +class RandomizedFunctionPerson(DeterministicFunctionPerson): + def __init__( + self, + baseline_energy_df, + points_multiplier=1, + response="t", + low=0, + high=50, + distr="U", + ): + + """ + Adds Random Noise to DeterministicFunctionPerson energy output (for D.R. purposes) + + New Args: + Low = Lower bound for random noise added to energy use + High = Upper bound " " " " " " + Distr = 'G' for Gaussian noise, 'U' for Uniform random noise (Note: Continuous distr.) + + Note: For design purposes the random noise is updated at the end of each episode + """ + # TODO: Multivariate distr?? + + super().__init__( + baseline_energy_df, points_multiplier=points_multiplier, response=response + ) + + distr = distr.upper() + assert distr in ["G", "U"] + + self.response = response + self.low = low + self.high = high if high < self.max_demand else 50 + self.distr = distr + + self.noise = [] + self.update_noise() + + def update_noise(self): + if self.distr == "G": + # TODO: Update how to sample from Gausian + self.noise = np.random.normal( + loc=(self.low + self.high) / 2, scale=10, size=10 + ) + + elif self.distr == "U": + self.noise = np.random.uniform(low=self.low, high=self.high, size=10) + + def exponential_response_func(self, points): + points = np.array(points) * self.points_multiplier + points_effect = [p ** 2 for p in points] + + return points_effect + self.noise + + def sin_response_func(self, points): + points = np.array(points) + # n = np.max(points) + # points = [np.sin((float(i)/float(n))*np.pi) for i in points] + points = [np.sin(float(i) * np.pi) * self.points_multiplier for i in points] + points = points + return points + self.noise + + def linear_response_func(self, points): + return points * self.points_multiplier + self.noise + + +# utkarsha's person + + +class CurtailandShiftPerson(Person): + def __init__(self, baseline_energy_df, points_multiplier=1): + super().__init__(baseline_energy_df, points_multiplier) + self.shiftableLoadFraction = 0.1 + self.shiftByHours = 3 + self.curtailableLoadFraction = 0.1 + self.maxCurtailHours = ( + 3 # Person willing to curtail for no more than these hours + ) + + def shiftedLoad(self, points, baseline_day=0, day_of_week=None): + output = np.array(self.baseline_energy)[ + baseline_day * 24 : baseline_day * 24 + 10 + ] + points = np.array(points) * self.points_multiplier + shiftableLoad = self.shiftableLoadFraction * output + shiftByHours = self.shiftByHours + + # 10 hour day. Rearrange the sum of shiftableLoad into these hours by treating points as the 'price' at that hour + # Load can be shifted by a max of shiftByHours (default = 3 hours) + # For each hour, calculate the optimal hour to shift load to within +- 3 hours + shiftedLoad = np.zeros(10) + for hour in range(10): + candidatePrices = points[ + max(hour - shiftByHours, 0) : min(hour + shiftByHours, 9) + 1 + ] + shiftToHour = max(hour - shiftByHours, 0) + np.argmin(candidatePrices) + shiftedLoad[shiftToHour] += shiftableLoad[hour] + return shiftedLoad + + def curtailedLoad(self, points, baseline_day=0, day_of_week=None): + output = np.array(self.baseline_energy)[ + baseline_day * 24 : baseline_day * 24 + 10 + ] + points = np.array(points) * self.points_multiplier + curtailableLoad = self.curtailableLoadFraction * output + maxPriceHours = np.argsort(points)[0 : self.maxCurtailHours] + for hour in maxPriceHours: + curtailableLoad[hour] = 0 + return curtailableLoad + + def get_response(self, points, day_of_week=None): + baseline_day = 0 + output = np.array(self.baseline_energy)[ + baseline_day * 24 : baseline_day * 24 + 10 + ] + energy_resp = ( + output * (1 - self.curtailableLoadFraction - self.shiftableLoadFraction) + + self.curtailedLoad(points) + + self.shiftedLoad(points) + ) + + self.min_demand = np.maximum(0, min(energy_resp)) + self.max_demand = np.maximum(0, max(energy_resp)) + + return energy_resp + diff --git a/multiagent/environment.py b/multiagent/environment.py index d2e8d3278..18bc3c996 100644 --- a/multiagent/environment.py +++ b/multiagent/environment.py @@ -7,13 +7,18 @@ # environment for all agents in the multiagent world # currently code assumes that no agents will be created/destroyed at runtime! class MultiAgentEnv(gym.Env): - metadata = { - 'render.modes' : ['human', 'rgb_array'] - } - - def __init__(self, world, reset_callback=None, reward_callback=None, - observation_callback=None, info_callback=None, - done_callback=None, shared_viewer=True): + metadata = {"render.modes": ["human", "rgb_array"]} + + def __init__( + self, + world, + reset_callback=None, + reward_callback=None, + observation_callback=None, + info_callback=None, + done_callback=None, + shared_viewer=True, + ): self.world = world self.agents = self.world.policy_agents @@ -30,9 +35,13 @@ def __init__(self, world, reset_callback=None, reward_callback=None, # if true, action is a number 0...N, otherwise action is a one-hot N-dimensional vector self.discrete_action_input = False # if true, even the action is continuous, action will be performed discretely - self.force_discrete_action = world.discrete_action if hasattr(world, 'discrete_action') else False + self.force_discrete_action = ( + world.discrete_action if hasattr(world, "discrete_action") else False + ) # if true, every agent has the same reward - self.shared_reward = world.collaborative if hasattr(world, 'collaborative') else False + self.shared_reward = ( + world.collaborative if hasattr(world, "collaborative") else False + ) self.time = 0 # configure spaces @@ -44,21 +53,35 @@ def __init__(self, world, reset_callback=None, reward_callback=None, if self.discrete_action_space: u_action_space = spaces.Discrete(world.dim_p * 2 + 1) else: - u_action_space = spaces.Box(low=-agent.u_range, high=+agent.u_range, shape=(world.dim_p,), dtype=np.float32) + u_action_space = spaces.Box( + low=-agent.u_range, + high=+agent.u_range, + shape=(world.dim_p,), + dtype=np.float32, + ) if agent.movable: total_action_space.append(u_action_space) # communication action space if self.discrete_action_space: c_action_space = spaces.Discrete(world.dim_c) else: - c_action_space = spaces.Box(low=0.0, high=1.0, shape=(world.dim_c,), dtype=np.float32) + c_action_space = spaces.Box( + low=0.0, high=1.0, shape=(world.dim_c,), dtype=np.float32 + ) if not agent.silent: total_action_space.append(c_action_space) # total action space if len(total_action_space) > 1: # all action spaces are discrete, so simplify to MultiDiscrete action space - if all([isinstance(act_space, spaces.Discrete) for act_space in total_action_space]): - act_space = MultiDiscrete([[0, act_space.n - 1] for act_space in total_action_space]) + if all( + [ + isinstance(act_space, spaces.Discrete) + for act_space in total_action_space + ] + ): + act_space = MultiDiscrete( + [[0, act_space.n - 1] for act_space in total_action_space] + ) else: act_space = spaces.Tuple(total_action_space) self.action_space.append(act_space) @@ -66,7 +89,11 @@ def __init__(self, world, reset_callback=None, reward_callback=None, self.action_space.append(total_action_space[0]) # observation space obs_dim = len(observation_callback(agent, self.world)) - self.observation_space.append(spaces.Box(low=-np.inf, high=+np.inf, shape=(obs_dim,), dtype=np.float32)) + self.observation_space.append( + spaces.Box( + low=-np.inf, high=+np.inf, shape=(obs_dim,), dtype=np.float32 + ) + ) agent.action.c = np.zeros(self.world.dim_c) # rendering @@ -81,7 +108,7 @@ def step(self, action_n): obs_n = [] reward_n = [] done_n = [] - info_n = {'n': []} + info_n = {"n": []} self.agents = self.world.policy_agents # set action for each agent for i, agent in enumerate(self.agents): @@ -94,7 +121,7 @@ def step(self, action_n): reward_n.append(self._get_reward(agent)) done_n.append(self._get_done(agent)) - info_n['n'].append(self._get_info(agent)) + info_n["n"].append(self._get_info(agent)) # all agents get total reward in cooperative case reward = np.sum(reward_n) @@ -150,7 +177,7 @@ def _set_action(self, action, agent, action_space, time=None): size = action_space.high - action_space.low + 1 index = 0 for s in size: - act.append(action[index:(index+s)]) + act.append(action[index : (index + s)]) index += s action = act else: @@ -161,10 +188,14 @@ def _set_action(self, action, agent, action_space, time=None): if self.discrete_action_input: agent.action.u = np.zeros(self.world.dim_p) # process discrete action - if action[0] == 1: agent.action.u[0] = -1.0 - if action[0] == 2: agent.action.u[0] = +1.0 - if action[0] == 3: agent.action.u[1] = -1.0 - if action[0] == 4: agent.action.u[1] = +1.0 + if action[0] == 1: + agent.action.u[0] = -1.0 + if action[0] == 2: + agent.action.u[0] = +1.0 + if action[0] == 3: + agent.action.u[1] = -1.0 + if action[0] == 4: + agent.action.u[1] = +1.0 else: if self.force_discrete_action: d = np.argmax(action[0]) @@ -197,40 +228,43 @@ def _reset_render(self): self.render_geoms_xform = None # render environment - def render(self, mode='human'): - if mode == 'human': - alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' - message = '' + def render(self, mode="human"): + if mode == "human": + alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + message = "" for agent in self.world.agents: comm = [] for other in self.world.agents: - if other is agent: continue + if other is agent: + continue if np.all(other.state.c == 0): - word = '_' + word = "_" else: word = alphabet[np.argmax(other.state.c)] - message += (other.name + ' to ' + agent.name + ': ' + word + ' ') + message += other.name + " to " + agent.name + ": " + word + " " print(message) for i in range(len(self.viewers)): # create viewers (if necessary) if self.viewers[i] is None: # import rendering only if we need it (and don't import for headless machines) - #from gym.envs.classic_control import rendering + # from gym.envs.classic_control import rendering from multiagent import rendering - self.viewers[i] = rendering.Viewer(700,700) + + self.viewers[i] = rendering.Viewer(700, 700) # create rendering geometry if self.render_geoms is None: # import rendering only if we need it (and don't import for headless machines) - #from gym.envs.classic_control import rendering + # from gym.envs.classic_control import rendering from multiagent import rendering + self.render_geoms = [] self.render_geoms_xform = [] for entity in self.world.entities: geom = rendering.make_circle(entity.size) xform = rendering.Transform() - if 'agent' in entity.name: + if "agent" in entity.name: geom.set_color(*entity.color, alpha=0.5) else: geom.set_color(*entity.color) @@ -247,49 +281,52 @@ def render(self, mode='human'): results = [] for i in range(len(self.viewers)): from multiagent import rendering + # update bounds to center around agent cam_range = 1 if self.shared_viewer: pos = np.zeros(self.world.dim_p) else: pos = self.agents[i].state.p_pos - self.viewers[i].set_bounds(pos[0]-cam_range,pos[0]+cam_range,pos[1]-cam_range,pos[1]+cam_range) + self.viewers[i].set_bounds( + pos[0] - cam_range, + pos[0] + cam_range, + pos[1] - cam_range, + pos[1] + cam_range, + ) # update geometry positions for e, entity in enumerate(self.world.entities): self.render_geoms_xform[e].set_translation(*entity.state.p_pos) # render to display or array - results.append(self.viewers[i].render(return_rgb_array = mode=='rgb_array')) + results.append(self.viewers[i].render(return_rgb_array=mode == "rgb_array")) return results # create receptor field locations in local coordinate frame def _make_receptor_locations(self, agent): - receptor_type = 'polar' + receptor_type = "polar" range_min = 0.05 * 2.0 range_max = 1.00 dx = [] # circular receptive field - if receptor_type == 'polar': + if receptor_type == "polar": for angle in np.linspace(-np.pi, +np.pi, 8, endpoint=False): for distance in np.linspace(range_min, range_max, 3): dx.append(distance * np.array([np.cos(angle), np.sin(angle)])) # add origin dx.append(np.array([0.0, 0.0])) # grid receptive field - if receptor_type == 'grid': + if receptor_type == "grid": for x in np.linspace(-range_max, +range_max, 5): for y in np.linspace(-range_max, +range_max, 5): - dx.append(np.array([x,y])) + dx.append(np.array([x, y])) return dx # vectorized wrapper for a batch of multi-agent environments # assumes all environments have the same observation and action space class BatchMultiAgentEnv(gym.Env): - metadata = { - 'runtime.vectorized': True, - 'render.modes' : ['human', 'rgb_array'] - } + metadata = {"runtime.vectorized": True, "render.modes": ["human", "rgb_array"]} def __init__(self, env_batch): self.env_batch = env_batch @@ -310,10 +347,10 @@ def step(self, action_n, time): obs_n = [] reward_n = [] done_n = [] - info_n = {'n': []} + info_n = {"n": []} i = 0 for env in self.env_batch: - obs, reward, done, _ = env.step(action_n[i:(i+env.n)], time) + obs, reward, done, _ = env.step(action_n[i : (i + env.n)], time) i += env.n obs_n += obs # reward = [r / len(self.env_batch) for r in reward] @@ -328,7 +365,7 @@ def reset(self): return obs_n # render environment - def render(self, mode='human', close=True): + def render(self, mode="human", close=True): results_n = [] for env in self.env_batch: results_n += env.render(mode, close) diff --git a/multiagent/reward.py b/multiagent/reward.py new file mode 100644 index 000000000..97ee222ad --- /dev/null +++ b/multiagent/reward.py @@ -0,0 +1,203 @@ +# import osqp +import numpy as np +from sklearn.preprocessing import MinMaxScaler + +#### file to calculate the rewards. Meant to be modular: +#### class Rewards should have several different functions by Dec 2019 + + +class Reward: + def __init__(self, energy_use, prices, min_demand, max_demand): + """ + Args: + energy_use: list returned by Person class signifying energy use + prices: list returned by grid signifying cost throughout day + min_demand: value computed by Person class signifying minimum energy use long term + max_demand: value computed by Person class signifying maximum energy use long term + """ + + self.energy_use = np.array(energy_use) + self.prices = np.array(prices) + self._num_timesteps = energy_use.shape[0] + self.min_demand = np.min(energy_use) # min_demand + self.max_demand = np.max(energy_use) # max_demand + self.baseline_max_demand = 159.32 + + assert round(self.max_demand) == round( + max_demand + ), "The max demand that the player is using and the optimization is using is not the same" + assert round(self.min_demand) == round( + min_demand + ), "The min demand that the player is using and the optimization is using is not the same" + + self.total_demand = np.sum(energy_use) + + def ideal_use_calculation(self): + import cvxpy as cvx + + """ + Computes an optimization of demand according to price + + returns: np.array of ideal energy demands given a price signal + """ + + demands = cvx.Variable(self._num_timesteps) + min_demand = cvx.Parameter() + max_demand = cvx.Parameter() + total_demand = cvx.Parameter() + prices = cvx.Parameter(self._num_timesteps) + + min_demand = self.min_demand + max_demand = self.max_demand + total_demand = self.total_demand + + while max_demand * 10 < total_demand: + print("multiplying demand to make optimization work") + print("max_demand: " + str(max_demand)) + print("total_demand: " + str(total_demand)) + print("energy_use") + print(self.energy_use) + max_demand *= 1.1 + + prices = self.prices + constraints = [cvx.sum(demands, axis=0, keepdims=True) == total_demand] + # constraints = [np.ones(self._num_timesteps).T * demands == total_demand] + for i in range(self._num_timesteps): + constraints += [demands[i] <= max_demand] + constraints += [min_demand <= demands[i]] + # if i != 0:y + # constraints += [cvx.abs(demands[i] - demands[i-1]) <= 100] + + objective = cvx.Minimize(demands.T * prices) + problem = cvx.Problem(objective, constraints) + + problem.solve(solver=cvx.ECOS, verbose=False) + return np.array(demands.value) + + def log_cost(self): + """ + Scales energy_use to be between min and max energy demands (this is repeated + in agent.routine_output_trasform), and then returns the simple total cost. + + """ + + scaler = MinMaxScaler(feature_range=(self.min_demand, self.max_demand)) + scaled_energy = np.squeeze(scaler.fit_transform(self.energy_use.reshape(-1, 1))) + + return -np.log(np.dot(scaled_energy, self.prices)) + + def log_cost_regularized(self): + """ + Scales energy_use to be between min and max energy demands (this is repeated + in agent.routine_output_trasform), and then returns the simple total cost. + + :param: h - the hyperparameter that modifies the penalty on energy demand that's driven too low. + + """ + + scaler = MinMaxScaler(feature_range=(self.min_demand, self.max_demand)) + ## ? + scaled_energy = np.squeeze(scaler.fit_transform(self.energy_use.reshape(-1, 1))) + + return -np.log(np.dot(scaled_energy, self.prices)) - 10 * ( + np.sum(self.energy_use) < (10 * (0.5 * self.baseline_max_demand)) + ) # - + # 10 * ()) + # sigmoid between 10 and 20 so there's a smooth transition + # - lambd * (difference b/w energy(t)) - put a bound on + # play with the lipschitz constant + # [10, 20, 10, 10, 10, ] + + def neg_distance_from_ideal(self, demands): + """ + args: + demands: np.array() of demands from ideal_use_calculation() + + returns: + a numerical distance metric, negated + """ + + return -((demands - self.energy_use) ** 2).sum() + + def cost_distance(self, ideal_demands): + """ + args: + demands: np.array() of demands from ideal_use_calculation() + + returns: + a cost-based distance metric, negated + """ + current_cost = np.dot(self.prices, self.energy_use) + ideal_cost = np.dot(self.prices, ideal_demands) + + cost_difference = ideal_cost - current_cost + + return cost_difference + + def log_cost_distance(self, ideal_demands): + """ + args: + demands: np.array() of demands from ideal_use_calculation() + + returns: + the log of the cost distance + """ + current_cost = np.dot(self.prices, self.energy_use) + ideal_cost = np.dot(self.prices, ideal_demands) + + cost_difference = ideal_cost - current_cost + + # TODO ENSURE THAT COST DIFFERENCE IS < 0 + if cost_difference < 0: + return -np.log(-cost_difference) + else: + print( + "WEIRD REWARD ALERT. IDEAL COST >= CURRENT COST. returning reward of 10" + ) + return 10 + + def scaled_cost_distance(self, ideal_demands): + """ + args: + demands: np.array() of demands from ideal_use_calculation() + + returns: + a cost-based distance metric normalized by total ideal cost + """ + + current_cost = np.dot(self.prices, self.energy_use) + ideal_cost = np.dot(self.prices, ideal_demands) + + cost_difference = ideal_cost - current_cost + + # print("--" * 10) + # print("ideal cost") + # print(ideal_cost) + # print("--" * 10) + # print("prices") + # print(self.prices) + # print("--" * 10) + # print("current_cost") + # print(current_cost) + + if cost_difference > 0 or ideal_cost < 0: + print("--" * 10) + print("Problem with reward") + # print("min_demand: " + str(self.min_demand)) + # print("max_demand: " + str(self.max_demand)) + # print("--" * 10) + print("prices") + print(self.prices) + # print("current_cost") + # print(current_cost) + # print("--" * 10) + # print("ideal_cost") + # print(ideal_cost) + # print("ideal_demands") + # print(ideal_demands) + # print("energy demand") + # print(self.energy_use) + print("taking the neg abs value so that it stays the same sign.") + + return -np.abs(cost_difference / ideal_cost) + diff --git a/multiagent/scenarios/simple_push.py b/multiagent/scenarios/simple_push.py index 907fef5b5..733535860 100644 --- a/multiagent/scenarios/simple_push.py +++ b/multiagent/scenarios/simple_push.py @@ -2,18 +2,19 @@ from multiagent.core import World, Agent, Landmark from multiagent.scenario import BaseScenario + class Scenario(BaseScenario): def make_world(self): world = World() # set any world properties first world.dim_c = 2 - num_agents = 2 + num_agents = 2 # same as dim_c num_adversaries = 1 num_landmarks = 2 # add agents world.agents = [Agent() for i in range(num_agents)] for i, agent in enumerate(world.agents): - agent.name = 'agent %d' % i + agent.name = "agent %d" % i agent.collide = True agent.silent = True if i < num_adversaries: @@ -23,7 +24,7 @@ def make_world(self): # add landmarks world.landmarks = [Landmark() for i in range(num_landmarks)] for i, landmark in enumerate(world.landmarks): - landmark.name = 'landmark %d' % i + landmark.name = "landmark %d" % i landmark.collide = False landmark.movable = False # make initial conditions @@ -57,7 +58,11 @@ def reset_world(self, world): def reward(self, agent, world): # Agents are rewarded based on minimum agent distance to each landmark - return self.adversary_reward(agent, world) if agent.adversary else self.agent_reward(agent, world) + return ( + self.adversary_reward(agent, world) + if agent.adversary + else self.agent_reward(agent, world) + ) def agent_reward(self, agent, world): # the distance to the goal @@ -65,14 +70,20 @@ def agent_reward(self, agent, world): def adversary_reward(self, agent, world): # keep the nearest good agents away from the goal - agent_dist = [np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in world.agents if not a.adversary] + agent_dist = [ + np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) + for a in world.agents + if not a.adversary + ] pos_rew = min(agent_dist) - #nearest_agent = world.good_agents[np.argmin(agent_dist)] - #neg_rew = np.sqrt(np.sum(np.square(nearest_agent.state.p_pos - agent.state.p_pos))) - neg_rew = np.sqrt(np.sum(np.square(agent.goal_a.state.p_pos - agent.state.p_pos))) - #neg_rew = sum([np.sqrt(np.sum(np.square(a.state.p_pos - agent.state.p_pos))) for a in world.good_agents]) + # nearest_agent = world.good_agents[np.argmin(agent_dist)] + # neg_rew = np.sqrt(np.sum(np.square(nearest_agent.state.p_pos - agent.state.p_pos))) + neg_rew = np.sqrt( + np.sum(np.square(agent.goal_a.state.p_pos - agent.state.p_pos)) + ) + # neg_rew = sum([np.sqrt(np.sum(np.square(a.state.p_pos - agent.state.p_pos))) for a in world.good_agents]) return pos_rew - neg_rew - + def observation(self, agent, world): # get positions of all entities in this agent's reference frame entity_pos = [] @@ -86,11 +97,19 @@ def observation(self, agent, world): comm = [] other_pos = [] for other in world.agents: - if other is agent: continue + if other is agent: + continue comm.append(other.state.c) other_pos.append(other.state.p_pos - agent.state.p_pos) if not agent.adversary: - return np.concatenate([agent.state.p_vel] + [agent.goal_a.state.p_pos - agent.state.p_pos] + [agent.color] + entity_pos + entity_color + other_pos) + return np.concatenate( + [agent.state.p_vel] + + [agent.goal_a.state.p_pos - agent.state.p_pos] + + [agent.color] + + entity_pos + + entity_color + + other_pos + ) else: - #other_pos = list(reversed(other_pos)) if random.uniform(0,1) > 0.5 else other_pos # randomize position of other agents in adversary network + # other_pos = list(reversed(other_pos)) if random.uniform(0,1) > 0.5 else other_pos # randomize position of other agents in adversary network return np.concatenate([agent.state.p_vel] + entity_pos + other_pos) diff --git a/multiagent/utils.py b/multiagent/utils.py new file mode 100644 index 000000000..1cb1d8877 --- /dev/null +++ b/multiagent/utils.py @@ -0,0 +1,116 @@ +import csv +import numpy as np +import pandas as pd +from scipy.optimize import minimize +import os +import matplotlib.pyplot as plt +from sklearn.preprocessing import MinMaxScaler +import os + +# data_path = os.path.join(os.getcwd(), "baselines", "behavioral_sim", "building_data.csv") +# csv_path = os.path.dirname(os.path.realpath(__file__)) + "/building_data.csv" + + +def price_signal(day=45, type_of_DR="real_time_pricing"): + + """ + Utkarsha's work on price signal from a building with demand and solar + Input: Day = an int signifying a 24 hour period. 365 total, all of 2012, start at 1. + Output: netdemand_price, a measure of how expensive energy is at each time in the day + optionally, we can return the optimized demand, which is the building + calculating where the net demand should be allocated + """ + csv_path = "building_data.csv" + csv_path_2 = "../gym-socialgame/gym_socialgame/envs/building_data.csv" + current_dir = os.path.dirname(os.path.realpath(__file__)) + csv_path_3 = os.path.join( + current_dir, + "../../transactive_control/gym-socialgame/gym_socialgame/envs/building_data.csv", + ) + try: + df = pd.read_csv(csv_path) + except: + try: + df = pd.read_csv(csv_path_2) + except: + df = pd.read_csv(csv_path_3) + + pv = 0.001 * np.array(df["PV (W)"].tolist()) + price = np.array(df["Price( $ per kWh)"].tolist()) + demand = np.nan_to_num(np.array(df["Office_Elizabeth (kWh)"].tolist())) + demand_charge = 10 / 30 # 10$/kW per month + + pvsize = 0 # Assumption + netdemand = demand - pvsize * pv + + # Data starts at 5 am on Jan 1 + netdemand_24 = netdemand[24 * day - 5 : 24 * day + 19] + price_24 = price[24 * day - 5 : 24 * day + 19] + pv_24 = pv[24 * day - 5 : 24 * day + 19] + demand_24 = demand[24 * day - 5 : 24 * day + 19] + + # Calculate optimal load scheduling. 90% of load is fixed, 10% is controllable. + def optimise_24h(netdemand_24, price_24): + currentcost = netdemand_24 @ price_24 + + fixed_load = 0.9 * netdemand_24 + controllable_load = sum(0.1 * netdemand_24) + + def objective(x): + load = fixed_load + x + cost = np.multiply(price_24, load) + # Negative demand means zero cost, not negative cost + # Adding L1 regularisation to penalise shifting of occupant demand + lambd = 0.005 + # Demand charge: attached to peak demand + return ( + sum(np.maximum(cost, 0)) + + demand_charge * max(load) + + lambd * sum(abs(x - 0.1 * netdemand_24)) + ) + + def constraint_sumofx(x): + return sum(x) - controllable_load + + def constraint_x_positive(x): + return x + + x0 = np.zeros(24) + cons = [ + {"type": "eq", "fun": constraint_sumofx}, + {"type": "ineq", "fun": constraint_x_positive}, + ] + sol = minimize(objective, x0, constraints=cons) + return sol + + if type_of_DR == "real_time_pricing": + sol = optimise_24h(netdemand_24, price_24) + x = sol["x"] + diff = x - 0.1 * netdemand_24 + return -diff - min(-diff) + + elif type_of_DR == "time_of_use": + if np.mean(price_24[8:18]) == price_24[9]: + price_24[13:16] += 3 + return price_24 + else: + return "error!!!" + + +def fourier_points_from_action(action, points_length, fourier_basis_size): + assert ( + fourier_basis_size == (action.size + 1) // 2 + ), "Incorrect fourier basis size for actions" + root_points = (action[0] / 2) * np.ones(points_length) + inp = np.linspace(0, 1, points_length) + for k in range(1, fourier_basis_size): + ak, bk = action[2 * k - 1], action[2 * k] + root_points += ak * np.sin(2 * np.pi * k * inp) + bk * np.cos( + 2 * np.pi * k * inp + ) + + points = root_points ** 2 + # TODO: More elegant solution than clipping + points = np.clip(points, 0, 10) + return points +