diff --git a/multiagent/SG_environment.py b/multiagent/SG_environment.py
new file mode 100644
index 000000000..5d04e5886
--- /dev/null
+++ b/multiagent/SG_environment.py
@@ -0,0 +1,410 @@
+import gym
+from gym import spaces
+from gym.envs.registration import EnvSpec
+import numpy as np
+import pandas as pd
+from multiagent.multi_discrete import MultiDiscrete
+
+
+import random
+
+import tensorflow as tf
+
+# manan these are in the same directory -- you may need to add that to the path, but prob not
+from multiagent.utils import price_signal, fourier_points_from_action
+from multiagent.agents import DeterministicFunctionPerson
+from multiagent.reward import Reward
+
+import pickle
+
+# environment for all agents in the multiagent world
+# currently code assumes that no agents will be created/destroyed at runtime!
+class MultiAgentEnv(gym.Env):
+    metadata = {"render.modes": ["human", "rgb_array"]}  # ?? necc?
+
+    def __init__(
+        self,
+        action_space_string="continuous",
+        response_type_string="l",
+        number_of_participants=10,
+        one_day=0,
+        energy_in_state=False,
+        yesterday_in_state=False,
+        day_of_week=False,
+        pricing_type="TOU",
+        reward_function="log_cost_regularized",
+        fourier_basis_size=10,
+    ):
+
+        # self.world = world  ## does world appear elsewhere
+
+        self.number_of_participants = number_of_participants
+        self.response_type_string = response_type_string
+        self.action_space_string = action_space_string
+
+        player_dict = self._create_agents()
+        self.agents = player_dict  # or transform however needed # world.policy_agents
+        self.n = len(self.agents)
+
+        self.discrete_action_space = False
+        self.discrete_action_input = False
+        self.force_discrete_action = False
+
+        self.shared_reward = False  ## right?
+        self.time = 0
+
+        self.curr_iter = 0
+
+        self.points_length = 10
+        self.action_length = 10
+        self.action_subspace = 3
+
+        self.action_space = []
+
+        # (our inputs)
+        self.prev_energy = np.zeros(10)
+        self.one_day = self._find_one_day(one_day)
+        self.energy_in_state = energy_in_state
+        self.yesterday_in_state = yesterday_in_state
+        self.reward_function = reward_function
+        self.fourier_basis_size = fourier_basis_size
+
+        self.day = 0
+        self.days_of_week = [0, 1, 2, 3, 4]
+        self.day_of_week_flag = day_of_week
+        self.day_of_week = self.days_of_week[self.day % 5]
+
+        # Create Observation Space (aka State Space)
+        self.observation_space = []
+
+        if pricing_type == "TOU":
+            self.pricing_type = "time_of_use"
+        elif pricing_type == "RTP":
+            self.pricing_type = "real_time_pricing"
+        else:
+            print("Wrong pricing type")
+            raise ValueError
+
+        self.prices = self._get_prices()
+
+        ## back to filling in their skeleton
+
+        for agent in self.agents:
+            # action seq isn't continuous, so skipping Multidiscrete action
+            self.action_space.append(self._create_action_space())
+
+            obs_dim = 10 + 10 * self.energy_in_state + 10 * self.yesterday_in_state
+            self.observation_space.append(self._create_observation_space())
+
+    def _find_one_day(self, one_day: int):
+        """
+            Purpose: Helper function to find one_day to train on (if applicable)
+
+            Args:
+                One_day: (Int) in range [-1,365]
+
+            Returns:
+                0 if one_day = 0
+                one_day if one_day in range [1,365]
+                random_number(1,365) if one_day = -1
+            """
+
+        print("one_day")
+        print(one_day)
+
+        # if(one_day != -1):
+        #     return np.random.randint(0, high=365)
+
+        # else:
+        return one_day
+
+    def _create_action_space(self):
+        """
+        Purpose: Return action space of type specified by self.action_space_string
+
+        Args:
+            None
+        
+        Returns:
+            Action Space for environment based on action_space_str 
+        
+        Note: Multidiscrete refers to a 10-dim vector where each action {0,1,2} represents Low, Medium, High points respectively.
+        We pose this option to test whether simplifying the action-space helps the agent. 
+        """
+
+        # Making a symmetric, continuous space to help learning for continuous control (suggested in StableBaselines doc.)
+        if self.action_space_string == "continuous":
+            return spaces.Box(
+                low=-1, high=1, shape=(self.action_length,), dtype=np.float32
+            )
+
+        elif self.action_space_string == "multidiscrete":
+            discrete_space = [self.action_subspace] * self.action_length
+            return spaces.MultiDiscrete(discrete_space)
+
+        elif self.action_space_string == "fourier":
+            return spaces.Box(
+                low=-2,
+                high=2,
+                shape=(2 * self.fourier_basis_size - 1,),
+                dtype=np.float32,
+            )
+
+    def _create_agents(self):
+        """
+        Purpose: Create the participants of the social game. We create a game with n players, where n = self.number_of_participants
+
+        Args:
+            None
+
+        Returns:
+              agent_dict: Dictionary of players, each with response function based on self.response_type_string
+
+        """
+
+        player_dict = {}
+
+        # Sample Energy from average energy in the office (pre-treatment) from the last experiment
+        # Reference: Lucas Spangher, et al. Engineering  vs.  ambient  typevisualizations:  Quantifying effects of different data visualizations on energy consumption. 2019
+
+        sample_energy = np.array(
+            [
+                0.28,
+                11.9,
+                16.34,
+                16.8,
+                17.43,
+                16.15,
+                16.23,
+                15.88,
+                15.09,
+                35.6,
+                123.5,
+                148.7,
+                158.49,
+                149.13,
+                159.32,
+                157.62,
+                158.8,
+                156.49,
+                147.04,
+                70.76,
+                42.87,
+                23.13,
+                22.52,
+                16.8,
+            ]
+        )
+
+        # only grab working hours (8am - 5pm)
+        working_hour_energy = sample_energy[8:18]
+
+        my_baseline_energy = pd.DataFrame(data={"net_energy_use": working_hour_energy})
+        for i in range(self.number_of_participants):
+            player = DeterministicFunctionPerson(
+                my_baseline_energy,
+                points_multiplier=10,
+                response=self.response_type_string,
+            )  # , )
+            player_dict["player_{}".format(i)] = player
+
+        return player_dict
+
+    def _create_observation_space(self):
+        """
+        Purpose: Returns the observation space. 
+        If the state space includes yesterday, then it is +10 dim for yesterday's price signal
+        If the state space includes energy_in_state, then it is +10 dim for yesterday's energy
+
+        Args:
+            None
+
+        Returns:
+            Action Space for environment based on action_space_str 
+        """
+
+        # TODO: Normalize obs_space !
+        if self.yesterday_in_state:
+            if self.energy_in_state:
+                return spaces.Box(
+                    low=-np.inf, high=np.inf, shape=(30,), dtype=np.float32
+                )
+            else:
+                return spaces.Box(
+                    low=-np.inf, high=np.inf, shape=(20,), dtype=np.float32
+                )
+
+        else:
+            if self.energy_in_state:
+                return spaces.Box(
+                    low=-np.inf, high=np.inf, shape=(20,), dtype=np.float32
+                )
+            else:
+                return spaces.Box(
+                    low=-np.inf, high=np.inf, shape=(10,), dtype=np.float32
+                )
+
+    def _get_prices(self):
+        """
+        Purpose: Get grid price signals for the entire year (using past data from a building in Los Angeles as reference)
+
+        Args:
+            None
+            
+        Returns: Array containing 365 price signals, where array[day_number] = grid_price for day_number from 8AM - 5PM
+
+        """
+        all_prices = []
+
+        type_of_DR = self.pricing_type
+
+        if self.one_day != -1:
+            # If one_day we repeat the price signals from a fixed day
+            # Tweak One_Day Price Signal HERE
+            price = price_signal(self.one_day, type_of_DR=type_of_DR)
+            price = np.array(price[8:18])
+            if np.mean(price) == price[2]:
+                price[3:6] += 0.3
+            price = np.maximum(0.01 * np.ones_like(price), price)
+
+            for i in range(365):
+                all_prices.append(price)
+        else:
+            day = 0
+            for i in range(365):
+                price = price_signal(day + 1, type_of_DR=type_of_DR)
+                price = np.array(price[8:18])
+                # put a floor on the prices so we don't have negative prices
+                if np.mean(price) == price[2]:
+                    price[3:6] += 0.3
+                price = np.maximum(0.01 * np.ones_like(price), price)
+                all_prices.append(price)
+                day += 1
+
+        return np.array(all_prices)
+
+    def _simulate_human(self, action, agent):
+        """
+            Purpose: Gets energy consumption from players given action from agent
+
+            Args:
+                Action: 10-dim vector corresponding to action for each hour 8AM - 5PM
+            
+            Returns: 
+                Energy_consumption: Dictionary containing the energy usage by player and the average energy used in the office (key = "avg")
+            """
+
+        # Get players response to agent's actions
+        player = agent
+
+        if self.day_of_week_flag:
+            player_energy = player.get_response(action, day_of_week=self.day_of_week)
+        else:
+            player_energy = player.get_response(action, day_of_week=None)
+
+            # Calculate energy consumption by player and in total (over the office)
+
+        return player_energy
+
+    def step(self, action_n):
+        for i, action in enumerate(action_n):
+            action = np.asarray(action)
+            if self.action_space_string == "continuous":
+                action = np.clip(action, 0, 10)
+                action_n[i] = action
+            else:
+                raise Exception("Wrong action_space_string")
+
+        print("--" * 10)
+        print("--" * 10)
+        print("clipped action")
+        print(action_n)
+
+        obs_n = []
+        reward_n = []
+        done_n = []
+        info_n = {"n": []}
+
+        prev_price = self.prices[(self.day)]
+        self.day = (self.day + 1) % 365
+        self.curr_iter += 1
+
+        if self.curr_iter > 0:  ### could change
+            done = True
+            self.curr_iter = 0
+        else:
+            done = False
+
+        for i, agent in enumerate(self.agents.values()):
+            agent.prev_energy = self._simulate_human(action_n[i], agent)
+            obs_n.append(self._get_observation_per_agent(agent))
+            reward_n.append(
+                self._get_reward_per_agent(agent, reward_function=self.reward_function)
+            )
+            done_n.append(done)
+            info_n["n"].append(
+                {}
+            )  ## this was left as a formality in the stableBaselines, too
+
+        return obs_n, reward_n, done_n, info_n
+
+    def _get_observation_per_agent(self, agent):
+        prev_price = self.prices[(self.day - 1) % 365]
+        next_observation = self.prices[self.day]
+
+        if self.yesterday_in_state:
+            if self.energy_in_state:
+                return np.concatenate(
+                    (next_observation, np.concatenate((prev_price, agent.prev_energy)))
+                )
+            else:
+                return np.concatenate((next_observation, prev_price))
+
+        elif self.energy_in_state:
+            return np.concatenate((next_observation, agent.prev_energy))
+
+        else:
+            return next_observation
+
+    def _get_reward_per_agent(self, agent, reward_function="scaled_cost_distance"):
+        """
+            Purpose: Compute reward given price signal and energy consumption of the office
+
+            Args:
+                agent: DeterministicFunctionPerson object, simulated office twerker
+                Energy_consumption: Dictionary containing energy usage by player in the office and the average office energy usage
+            
+            Returns: 
+                Energy_consumption: Dictionary containing the energy usage by player and the average energy used in the office (key = "avg")
+            """
+
+        price = self.prices[self.day]
+        # get the reward from the player's output
+        player_min_demand = agent.get_min_demand()
+        player_max_demand = agent.get_max_demand()
+        player_energy = agent.prev_energy
+        player_reward = Reward(
+            player_energy, price, player_min_demand, player_max_demand
+        )
+
+        if reward_function == "scaled_cost_distance":
+            player_ideal_demands = player_reward.ideal_use_calculation()
+            reward = player_reward.scaled_cost_distance(player_ideal_demands)
+        elif reward_function == "log_cost_regularized":
+            reward = player_reward.log_cost_regularized()
+        else:
+            raise Exception("reward_function not defined")
+        return reward
+
+    def reset(self):
+        # reset world
+        obs_n = []
+        for agent in self.agents.values():
+            obs_n.append(self._get_observation_per_agent(agent))
+        return obs_n
+
+    def render(self, mode="human"):
+        pass
+
+    def close(self):
+        pass
diff --git a/multiagent/agents.py b/multiagent/agents.py
new file mode 100644
index 000000000..173099157
--- /dev/null
+++ b/multiagent/agents.py
@@ -0,0 +1,356 @@
+import pdb
+import pandas as pd
+import numpy as np
+from sklearn.preprocessing import MinMaxScaler
+
+#### file to make the simulation of people that we can work with
+
+
+class Person:
+    """ Person (parent?) class -- will define how the person takes in a points signal and puts out an energy signal 
+	baseline_energy = a list or dataframe of values. This is data from SinBerBEST 
+	points_multiplier = an int which describes how sensitive each person is to points 
+
+	"""
+
+    def __init__(self, baseline_energy_df, points_multiplier=1):
+        self.baseline_energy_df = baseline_energy_df
+        self.baseline_energy = np.array(self.baseline_energy_df["net_energy_use"])
+        self.points_multiplier = points_multiplier
+
+        baseline_min = self.baseline_energy.min()
+        baseline_max = self.baseline_energy.max()
+        baseline_range = baseline_max - baseline_min
+
+        self.min_demand = np.maximum(0, baseline_min + baseline_range * 0.05)
+        self.max_demand = np.maximum(0, baseline_min + baseline_range * 0.95)
+        self.silent = True
+        self.prev_energy = self.baseline_energy  # basically a placeholder.
+
+    def energy_output_simple_linear(self, points):
+        """Determines the energy output of the person, based on the formula:
+		
+		y[n] = -sum_{rolling window of 5} points + baseline_energy + noise
+
+		inputs: points - list or dataframe of points values. Assumes that the 
+		list will be in the same time increment that energy_output will be. 
+
+		For now, that's in 1 hour increments
+
+		"""
+        points_df = pd.DataFrame(points)
+
+        points_effect = points_df.rolling(window=5, min_periods=1).mean()
+
+        time = points_effect.shape[0]
+        energy_output = []
+
+        for t in range(time):
+            temp_energy = (
+                self.baseline_energy[t]
+                - points_effect.iloc[t] * self.points_multiplier
+                + np.random.normal(1)
+            )
+            energy_output.append(temp_energy)
+
+        return pd.DataFrame(energy_output)
+
+    def pure_linear_signal(self, points, baseline_day=0):
+        """
+		A linear person. The more points you give them, the less energy they will use
+		(within some bounds) for each hour. No rolling effects or anything. The simplest
+		signal. 
+		"""
+
+        # hack here to always grab the first day from the baseline_energy
+        output = np.array(self.baseline_energy)[
+            baseline_day * 24 : baseline_day * 24 + 10
+        ]
+
+        points_effect = np.array(points * self.points_multiplier)
+        output = output - points_effect
+
+        # impose bounds/constraints
+        output = np.maximum(output, self.min_demand)
+        output = np.minimum(output, self.max_demand)
+        return output
+
+    def get_min_demand(self):
+        return self.min_demand
+        # return np.quantile(self.baseline_energy, .05)
+
+    def get_max_demand(self):
+        return self.max_demand
+        # return np.quantile(self.baseline_energy, .95)
+
+
+class FixedDemandPerson(Person):
+    def __init__(self, baseline_energy_df, points_multiplier=1):
+        super().__init__(baseline_energy_df, points_multiplier)
+
+    def demand_from_points(self, points, baseline_day=0):
+        # hack here to always grab the first day from the baseline_energy
+        output = np.array(self.baseline_energy)[
+            baseline_day * 24 : baseline_day * 24 + 10
+        ]
+        total_demand = np.sum(output)
+
+        points_effect = np.array(points * self.points_multiplier)
+        output = output - points_effect
+
+        # scale to keep total_demand (almost) constant
+        # almost bc imposing bounds afterwards
+        output = output * (total_demand / np.sum(output))
+
+        # impose bounds/constraints
+        output = np.maximum(output, self.min_demand)
+        output = np.minimum(output, self.max_demand)
+
+        return output
+
+    def adverserial_linear(self, points, baseline_day=0):
+        # hack here to always grab the first day from the baseline_energy
+        output = np.array(self.baseline_energy)[
+            baseline_day * 24 : baseline_day * 24 + 10
+        ]
+        total_demand = np.sum(output)
+
+        points_effect = np.array(points * self.points_multiplier)
+        output = output + points_effect
+
+        # scale to keep total_demand (almost) constant
+        # almost bc imposing bounds afterwards
+        output = output * (total_demand / np.sum(output))
+
+        # impose bounds/constraints
+        output = np.maximum(output, self.min_demand)
+        output = np.minimum(output, self.max_demand)
+
+        return output
+
+
+class DeterministicFunctionPerson(Person):
+    def __init__(self, baseline_energy_df, points_multiplier=1, response="t"):
+        super().__init__(baseline_energy_df, points_multiplier)
+        self.response = response
+
+    def threshold_response_func(self, points):
+        points = np.array(points) * self.points_multiplier
+        threshold = np.mean(points)
+        return [p if p > threshold else 0 for p in points]
+
+    def exponential_response_func(self, points):
+        points = np.array(points) * self.points_multiplier
+        points_effect = [p ** 2 for p in points]
+
+        return points_effect
+
+    def sin_response_func(self, points):
+        points = np.array(points)
+        # n = np.max(points)
+        # points = [np.sin((float(i)/float(n))*np.pi) for i in points]
+        points = [np.sin(float(i) * np.pi) * self.points_multiplier for i in points]
+        points = points
+        return points
+
+    def linear_response_func(self, points):
+        return points * self.points_multiplier
+
+    def routine_output_transform(self, points_effect, baseline_day=0, day_of_week=None):
+        output = np.array(self.baseline_energy)[
+            baseline_day * 24 : baseline_day * 24 + 10
+        ]
+        total_demand = np.sum(output)
+
+        # scale to keep total_demand (almost) constant
+        # almost bc imposing bounds afterwards
+        output = output - points_effect
+        # output = output * (total_demand/np.sum(output))
+
+        # impose bounds/constraints
+        # output = np.maximum(output, self.min_demand)
+        # output = np.minimum(output, self.max_demand)
+        # return output
+
+        if day_of_week != None:
+            energy_resp = energy_resp * self.day_of_week_multiplier[day_of_week]
+
+        scaler = MinMaxScaler(feature_range=(self.min_demand, self.max_demand))
+        scaled_output = scaler.fit_transform(output.reshape(-1, 1))
+
+        return np.squeeze(scaled_output)
+
+    def threshold_response(self, points, day_of_week=None):
+        points_effect = self.threshold_response_func(points)
+        output = self.routine_output_transform(points_effect, day_of_week=day_of_week)
+        return output
+
+    def sin_response(self, points, day_of_week=None):
+        points_effect = self.sin_response_func(points)
+        output = self.routine_output_transform(points_effect, day_of_week=day_of_week)
+        return output
+
+    def exp_response(self, points, day_of_week=None):
+        points_effect = self.exponential_response_func(points)
+        output = self.routine_output_transform(points_effect, day_of_week=day_of_week)
+        return output
+
+    def threshold_exp_response(self, points, day_of_week=None):
+        points_effect = self.exponential_response_func(points)
+        points_effect = self.threshold_response_func(points_effect)
+        output = self.routine_output_transform(points_effect, day_of_week=day_of_week)
+        return output
+
+    def linear_response(self, points, day_of_week=None):
+        points_effect = points * self.points_multiplier
+        if (
+            np.isnan(points_effect).any()
+            or np.isneginf(points_effect).any()
+            or np.isposinf(points_effect).any()
+        ):
+            import pdb
+
+            pdb.set_trace()
+        output = self.routine_output_transform(points_effect, day_of_week=day_of_week)
+        return output
+
+    def get_response(self, points, day_of_week=None):
+        if self.response == "t":
+            energy_resp = self.threshold_exp_response(points, day_of_week=day_of_week)
+        elif self.response == "s":
+            energy_resp = self.sin_response(points, day_of_week=day_of_week)
+        elif self.response == "l":
+            energy_resp = self.linear_response(points, day_of_week=day_of_week)
+        elif self.response == "m":
+            energy_resp = self.mixed_response(points, day_of_week=day_of_week)
+        else:
+            raise NotImplementedError
+        return energy_resp
+
+
+class RandomizedFunctionPerson(DeterministicFunctionPerson):
+    def __init__(
+        self,
+        baseline_energy_df,
+        points_multiplier=1,
+        response="t",
+        low=0,
+        high=50,
+        distr="U",
+    ):
+
+        """
+		Adds Random Noise to DeterministicFunctionPerson energy output (for D.R. purposes)
+
+		New Args:
+			Low = Lower bound for random noise added to energy use
+			High = Upper bound "    "      "     "    "    "
+			Distr = 'G' for Gaussian noise, 'U' for Uniform random noise (Note: Continuous distr.)
+
+		Note: For design purposes the random noise is updated at the end of each episode
+	 """
+        # TODO: Multivariate distr??
+
+        super().__init__(
+            baseline_energy_df, points_multiplier=points_multiplier, response=response
+        )
+
+        distr = distr.upper()
+        assert distr in ["G", "U"]
+
+        self.response = response
+        self.low = low
+        self.high = high if high < self.max_demand else 50
+        self.distr = distr
+
+        self.noise = []
+        self.update_noise()
+
+    def update_noise(self):
+        if self.distr == "G":
+            # TODO: Update how to sample from Gausian
+            self.noise = np.random.normal(
+                loc=(self.low + self.high) / 2, scale=10, size=10
+            )
+
+        elif self.distr == "U":
+            self.noise = np.random.uniform(low=self.low, high=self.high, size=10)
+
+    def exponential_response_func(self, points):
+        points = np.array(points) * self.points_multiplier
+        points_effect = [p ** 2 for p in points]
+
+        return points_effect + self.noise
+
+    def sin_response_func(self, points):
+        points = np.array(points)
+        # n = np.max(points)
+        # points = [np.sin((float(i)/float(n))*np.pi) for i in points]
+        points = [np.sin(float(i) * np.pi) * self.points_multiplier for i in points]
+        points = points
+        return points + self.noise
+
+    def linear_response_func(self, points):
+        return points * self.points_multiplier + self.noise
+
+
+# utkarsha's person
+
+
+class CurtailandShiftPerson(Person):
+    def __init__(self, baseline_energy_df, points_multiplier=1):
+        super().__init__(baseline_energy_df, points_multiplier)
+        self.shiftableLoadFraction = 0.1
+        self.shiftByHours = 3
+        self.curtailableLoadFraction = 0.1
+        self.maxCurtailHours = (
+            3  # Person willing to curtail for no more than these hours
+        )
+
+    def shiftedLoad(self, points, baseline_day=0, day_of_week=None):
+        output = np.array(self.baseline_energy)[
+            baseline_day * 24 : baseline_day * 24 + 10
+        ]
+        points = np.array(points) * self.points_multiplier
+        shiftableLoad = self.shiftableLoadFraction * output
+        shiftByHours = self.shiftByHours
+
+        # 10 hour day. Rearrange the sum of shiftableLoad into these hours by treating points as the 'price' at that hour
+        # Load can be shifted by a max of shiftByHours (default = 3 hours)
+        # For each hour, calculate the optimal hour to shift load to within +- 3 hours
+        shiftedLoad = np.zeros(10)
+        for hour in range(10):
+            candidatePrices = points[
+                max(hour - shiftByHours, 0) : min(hour + shiftByHours, 9) + 1
+            ]
+            shiftToHour = max(hour - shiftByHours, 0) + np.argmin(candidatePrices)
+            shiftedLoad[shiftToHour] += shiftableLoad[hour]
+        return shiftedLoad
+
+    def curtailedLoad(self, points, baseline_day=0, day_of_week=None):
+        output = np.array(self.baseline_energy)[
+            baseline_day * 24 : baseline_day * 24 + 10
+        ]
+        points = np.array(points) * self.points_multiplier
+        curtailableLoad = self.curtailableLoadFraction * output
+        maxPriceHours = np.argsort(points)[0 : self.maxCurtailHours]
+        for hour in maxPriceHours:
+            curtailableLoad[hour] = 0
+        return curtailableLoad
+
+    def get_response(self, points, day_of_week=None):
+        baseline_day = 0
+        output = np.array(self.baseline_energy)[
+            baseline_day * 24 : baseline_day * 24 + 10
+        ]
+        energy_resp = (
+            output * (1 - self.curtailableLoadFraction - self.shiftableLoadFraction)
+            + self.curtailedLoad(points)
+            + self.shiftedLoad(points)
+        )
+
+        self.min_demand = np.maximum(0, min(energy_resp))
+        self.max_demand = np.maximum(0, max(energy_resp))
+
+        return energy_resp
+
diff --git a/multiagent/environment.py b/multiagent/environment.py
index d2e8d3278..18bc3c996 100644
--- a/multiagent/environment.py
+++ b/multiagent/environment.py
@@ -7,13 +7,18 @@
 # environment for all agents in the multiagent world
 # currently code assumes that no agents will be created/destroyed at runtime!
 class MultiAgentEnv(gym.Env):
-    metadata = {
-        'render.modes' : ['human', 'rgb_array']
-    }
-
-    def __init__(self, world, reset_callback=None, reward_callback=None,
-                 observation_callback=None, info_callback=None,
-                 done_callback=None, shared_viewer=True):
+    metadata = {"render.modes": ["human", "rgb_array"]}
+
+    def __init__(
+        self,
+        world,
+        reset_callback=None,
+        reward_callback=None,
+        observation_callback=None,
+        info_callback=None,
+        done_callback=None,
+        shared_viewer=True,
+    ):
 
         self.world = world
         self.agents = self.world.policy_agents
@@ -30,9 +35,13 @@ def __init__(self, world, reset_callback=None, reward_callback=None,
         # if true, action is a number 0...N, otherwise action is a one-hot N-dimensional vector
         self.discrete_action_input = False
         # if true, even the action is continuous, action will be performed discretely
-        self.force_discrete_action = world.discrete_action if hasattr(world, 'discrete_action') else False
+        self.force_discrete_action = (
+            world.discrete_action if hasattr(world, "discrete_action") else False
+        )
         # if true, every agent has the same reward
-        self.shared_reward = world.collaborative if hasattr(world, 'collaborative') else False
+        self.shared_reward = (
+            world.collaborative if hasattr(world, "collaborative") else False
+        )
         self.time = 0
 
         # configure spaces
@@ -44,21 +53,35 @@ def __init__(self, world, reset_callback=None, reward_callback=None,
             if self.discrete_action_space:
                 u_action_space = spaces.Discrete(world.dim_p * 2 + 1)
             else:
-                u_action_space = spaces.Box(low=-agent.u_range, high=+agent.u_range, shape=(world.dim_p,), dtype=np.float32)
+                u_action_space = spaces.Box(
+                    low=-agent.u_range,
+                    high=+agent.u_range,
+                    shape=(world.dim_p,),
+                    dtype=np.float32,
+                )
             if agent.movable:
                 total_action_space.append(u_action_space)
             # communication action space
             if self.discrete_action_space:
                 c_action_space = spaces.Discrete(world.dim_c)
             else:
-                c_action_space = spaces.Box(low=0.0, high=1.0, shape=(world.dim_c,), dtype=np.float32)
+                c_action_space = spaces.Box(
+                    low=0.0, high=1.0, shape=(world.dim_c,), dtype=np.float32
+                )
             if not agent.silent:
                 total_action_space.append(c_action_space)
             # total action space
             if len(total_action_space) > 1:
                 # all action spaces are discrete, so simplify to MultiDiscrete action space
-                if all([isinstance(act_space, spaces.Discrete) for act_space in total_action_space]):
-                    act_space = MultiDiscrete([[0, act_space.n - 1] for act_space in total_action_space])
+                if all(
+                    [
+                        isinstance(act_space, spaces.Discrete)
+                        for act_space in total_action_space
+                    ]
+                ):
+                    act_space = MultiDiscrete(
+                        [[0, act_space.n - 1] for act_space in total_action_space]
+                    )
                 else:
                     act_space = spaces.Tuple(total_action_space)
                 self.action_space.append(act_space)
@@ -66,7 +89,11 @@ def __init__(self, world, reset_callback=None, reward_callback=None,
                 self.action_space.append(total_action_space[0])
             # observation space
             obs_dim = len(observation_callback(agent, self.world))
-            self.observation_space.append(spaces.Box(low=-np.inf, high=+np.inf, shape=(obs_dim,), dtype=np.float32))
+            self.observation_space.append(
+                spaces.Box(
+                    low=-np.inf, high=+np.inf, shape=(obs_dim,), dtype=np.float32
+                )
+            )
             agent.action.c = np.zeros(self.world.dim_c)
 
         # rendering
@@ -81,7 +108,7 @@ def step(self, action_n):
         obs_n = []
         reward_n = []
         done_n = []
-        info_n = {'n': []}
+        info_n = {"n": []}
         self.agents = self.world.policy_agents
         # set action for each agent
         for i, agent in enumerate(self.agents):
@@ -94,7 +121,7 @@ def step(self, action_n):
             reward_n.append(self._get_reward(agent))
             done_n.append(self._get_done(agent))
 
-            info_n['n'].append(self._get_info(agent))
+            info_n["n"].append(self._get_info(agent))
 
         # all agents get total reward in cooperative case
         reward = np.sum(reward_n)
@@ -150,7 +177,7 @@ def _set_action(self, action, agent, action_space, time=None):
             size = action_space.high - action_space.low + 1
             index = 0
             for s in size:
-                act.append(action[index:(index+s)])
+                act.append(action[index : (index + s)])
                 index += s
             action = act
         else:
@@ -161,10 +188,14 @@ def _set_action(self, action, agent, action_space, time=None):
             if self.discrete_action_input:
                 agent.action.u = np.zeros(self.world.dim_p)
                 # process discrete action
-                if action[0] == 1: agent.action.u[0] = -1.0
-                if action[0] == 2: agent.action.u[0] = +1.0
-                if action[0] == 3: agent.action.u[1] = -1.0
-                if action[0] == 4: agent.action.u[1] = +1.0
+                if action[0] == 1:
+                    agent.action.u[0] = -1.0
+                if action[0] == 2:
+                    agent.action.u[0] = +1.0
+                if action[0] == 3:
+                    agent.action.u[1] = -1.0
+                if action[0] == 4:
+                    agent.action.u[1] = +1.0
             else:
                 if self.force_discrete_action:
                     d = np.argmax(action[0])
@@ -197,40 +228,43 @@ def _reset_render(self):
         self.render_geoms_xform = None
 
     # render environment
-    def render(self, mode='human'):
-        if mode == 'human':
-            alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
-            message = ''
+    def render(self, mode="human"):
+        if mode == "human":
+            alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+            message = ""
             for agent in self.world.agents:
                 comm = []
                 for other in self.world.agents:
-                    if other is agent: continue
+                    if other is agent:
+                        continue
                     if np.all(other.state.c == 0):
-                        word = '_'
+                        word = "_"
                     else:
                         word = alphabet[np.argmax(other.state.c)]
-                    message += (other.name + ' to ' + agent.name + ': ' + word + '   ')
+                    message += other.name + " to " + agent.name + ": " + word + "   "
             print(message)
 
         for i in range(len(self.viewers)):
             # create viewers (if necessary)
             if self.viewers[i] is None:
                 # import rendering only if we need it (and don't import for headless machines)
-                #from gym.envs.classic_control import rendering
+                # from gym.envs.classic_control import rendering
                 from multiagent import rendering
-                self.viewers[i] = rendering.Viewer(700,700)
+
+                self.viewers[i] = rendering.Viewer(700, 700)
 
         # create rendering geometry
         if self.render_geoms is None:
             # import rendering only if we need it (and don't import for headless machines)
-            #from gym.envs.classic_control import rendering
+            # from gym.envs.classic_control import rendering
             from multiagent import rendering
+
             self.render_geoms = []
             self.render_geoms_xform = []
             for entity in self.world.entities:
                 geom = rendering.make_circle(entity.size)
                 xform = rendering.Transform()
-                if 'agent' in entity.name:
+                if "agent" in entity.name:
                     geom.set_color(*entity.color, alpha=0.5)
                 else:
                     geom.set_color(*entity.color)
@@ -247,49 +281,52 @@ def render(self, mode='human'):
         results = []
         for i in range(len(self.viewers)):
             from multiagent import rendering
+
             # update bounds to center around agent
             cam_range = 1
             if self.shared_viewer:
                 pos = np.zeros(self.world.dim_p)
             else:
                 pos = self.agents[i].state.p_pos
-            self.viewers[i].set_bounds(pos[0]-cam_range,pos[0]+cam_range,pos[1]-cam_range,pos[1]+cam_range)
+            self.viewers[i].set_bounds(
+                pos[0] - cam_range,
+                pos[0] + cam_range,
+                pos[1] - cam_range,
+                pos[1] + cam_range,
+            )
             # update geometry positions
             for e, entity in enumerate(self.world.entities):
                 self.render_geoms_xform[e].set_translation(*entity.state.p_pos)
             # render to display or array
-            results.append(self.viewers[i].render(return_rgb_array = mode=='rgb_array'))
+            results.append(self.viewers[i].render(return_rgb_array=mode == "rgb_array"))
 
         return results
 
     # create receptor field locations in local coordinate frame
     def _make_receptor_locations(self, agent):
-        receptor_type = 'polar'
+        receptor_type = "polar"
         range_min = 0.05 * 2.0
         range_max = 1.00
         dx = []
         # circular receptive field
-        if receptor_type == 'polar':
+        if receptor_type == "polar":
             for angle in np.linspace(-np.pi, +np.pi, 8, endpoint=False):
                 for distance in np.linspace(range_min, range_max, 3):
                     dx.append(distance * np.array([np.cos(angle), np.sin(angle)]))
             # add origin
             dx.append(np.array([0.0, 0.0]))
         # grid receptive field
-        if receptor_type == 'grid':
+        if receptor_type == "grid":
             for x in np.linspace(-range_max, +range_max, 5):
                 for y in np.linspace(-range_max, +range_max, 5):
-                    dx.append(np.array([x,y]))
+                    dx.append(np.array([x, y]))
         return dx
 
 
 # vectorized wrapper for a batch of multi-agent environments
 # assumes all environments have the same observation and action space
 class BatchMultiAgentEnv(gym.Env):
-    metadata = {
-        'runtime.vectorized': True,
-        'render.modes' : ['human', 'rgb_array']
-    }
+    metadata = {"runtime.vectorized": True, "render.modes": ["human", "rgb_array"]}
 
     def __init__(self, env_batch):
         self.env_batch = env_batch
@@ -310,10 +347,10 @@ def step(self, action_n, time):
         obs_n = []
         reward_n = []
         done_n = []
-        info_n = {'n': []}
+        info_n = {"n": []}
         i = 0
         for env in self.env_batch:
-            obs, reward, done, _ = env.step(action_n[i:(i+env.n)], time)
+            obs, reward, done, _ = env.step(action_n[i : (i + env.n)], time)
             i += env.n
             obs_n += obs
             # reward = [r / len(self.env_batch) for r in reward]
@@ -328,7 +365,7 @@ def reset(self):
         return obs_n
 
     # render environment
-    def render(self, mode='human', close=True):
+    def render(self, mode="human", close=True):
         results_n = []
         for env in self.env_batch:
             results_n += env.render(mode, close)
diff --git a/multiagent/reward.py b/multiagent/reward.py
new file mode 100644
index 000000000..97ee222ad
--- /dev/null
+++ b/multiagent/reward.py
@@ -0,0 +1,203 @@
+# import osqp
+import numpy as np
+from sklearn.preprocessing import MinMaxScaler
+
+#### file to calculate the rewards. Meant to be modular:
+#### class Rewards should have several different functions by Dec 2019
+
+
+class Reward:
+    def __init__(self, energy_use, prices, min_demand, max_demand):
+        """
+		Args: 
+			energy_use: list returned by Person class signifying energy use 
+			prices: list returned by grid signifying cost throughout day 
+			min_demand: value computed by Person class signifying minimum energy use long term
+			max_demand: value computed by Person class signifying maximum energy use long term
+		"""
+
+        self.energy_use = np.array(energy_use)
+        self.prices = np.array(prices)
+        self._num_timesteps = energy_use.shape[0]
+        self.min_demand = np.min(energy_use)  # min_demand
+        self.max_demand = np.max(energy_use)  # max_demand
+        self.baseline_max_demand = 159.32
+
+        assert round(self.max_demand) == round(
+            max_demand
+        ), "The max demand that the player is using and the optimization is using is not the same"
+        assert round(self.min_demand) == round(
+            min_demand
+        ), "The min demand that the player is using and the optimization is using is not the same"
+
+        self.total_demand = np.sum(energy_use)
+
+    def ideal_use_calculation(self):
+        import cvxpy as cvx
+
+        """
+		Computes an optimization of demand according to price 
+
+		returns: np.array of ideal energy demands given a price signal 
+		"""
+
+        demands = cvx.Variable(self._num_timesteps)
+        min_demand = cvx.Parameter()
+        max_demand = cvx.Parameter()
+        total_demand = cvx.Parameter()
+        prices = cvx.Parameter(self._num_timesteps)
+
+        min_demand = self.min_demand
+        max_demand = self.max_demand
+        total_demand = self.total_demand
+
+        while max_demand * 10 < total_demand:
+            print("multiplying demand to make optimization work")
+            print("max_demand: " + str(max_demand))
+            print("total_demand: " + str(total_demand))
+            print("energy_use")
+            print(self.energy_use)
+            max_demand *= 1.1
+
+        prices = self.prices
+        constraints = [cvx.sum(demands, axis=0, keepdims=True) == total_demand]
+        # constraints = [np.ones(self._num_timesteps).T * demands == total_demand]
+        for i in range(self._num_timesteps):
+            constraints += [demands[i] <= max_demand]
+            constraints += [min_demand <= demands[i]]
+            # if i != 0:y
+            # 	constraints += [cvx.abs(demands[i] - demands[i-1]) <= 100]
+
+        objective = cvx.Minimize(demands.T * prices)
+        problem = cvx.Problem(objective, constraints)
+
+        problem.solve(solver=cvx.ECOS, verbose=False)
+        return np.array(demands.value)
+
+    def log_cost(self):
+        """
+		Scales energy_use to be between min and max energy demands (this is repeated 
+		in agent.routine_output_trasform), and then returns the simple total cost. 
+
+		"""
+
+        scaler = MinMaxScaler(feature_range=(self.min_demand, self.max_demand))
+        scaled_energy = np.squeeze(scaler.fit_transform(self.energy_use.reshape(-1, 1)))
+
+        return -np.log(np.dot(scaled_energy, self.prices))
+
+    def log_cost_regularized(self):
+        """
+		Scales energy_use to be between min and max energy demands (this is repeated 
+		in agent.routine_output_trasform), and then returns the simple total cost. 
+
+		:param: h - the hyperparameter that modifies the penalty on energy demand that's driven too low. 
+
+		"""
+
+        scaler = MinMaxScaler(feature_range=(self.min_demand, self.max_demand))
+        ## ?
+        scaled_energy = np.squeeze(scaler.fit_transform(self.energy_use.reshape(-1, 1)))
+
+        return -np.log(np.dot(scaled_energy, self.prices)) - 10 * (
+            np.sum(self.energy_use) < (10 * (0.5 * self.baseline_max_demand))
+        )  # -
+        # 10 * ())
+        # sigmoid between 10 and 20 so there's a smooth transition
+        # - lambd * (difference b/w energy(t)) - put a bound on
+        # play with the lipschitz constant
+        # [10, 20, 10, 10, 10, ]
+
+    def neg_distance_from_ideal(self, demands):
+        """
+		args: 
+			demands: np.array() of demands from ideal_use_calculation()
+
+		returns: 
+			a numerical distance metric, negated
+		"""
+
+        return -((demands - self.energy_use) ** 2).sum()
+
+    def cost_distance(self, ideal_demands):
+        """
+		args: 
+			demands: np.array() of demands from ideal_use_calculation()
+
+		returns: 
+			a cost-based distance metric, negated
+		"""
+        current_cost = np.dot(self.prices, self.energy_use)
+        ideal_cost = np.dot(self.prices, ideal_demands)
+
+        cost_difference = ideal_cost - current_cost
+
+        return cost_difference
+
+    def log_cost_distance(self, ideal_demands):
+        """
+		args: 
+			demands: np.array() of demands from ideal_use_calculation()
+
+		returns: 
+			the log of the cost distance
+		"""
+        current_cost = np.dot(self.prices, self.energy_use)
+        ideal_cost = np.dot(self.prices, ideal_demands)
+
+        cost_difference = ideal_cost - current_cost
+
+        # TODO ENSURE THAT COST DIFFERENCE IS < 0
+        if cost_difference < 0:
+            return -np.log(-cost_difference)
+        else:
+            print(
+                "WEIRD REWARD ALERT. IDEAL COST >= CURRENT COST. returning reward of 10"
+            )
+            return 10
+
+    def scaled_cost_distance(self, ideal_demands):
+        """
+		args: 
+			demands: np.array() of demands from ideal_use_calculation()
+
+		returns: 
+			a cost-based distance metric normalized by total ideal cost
+		"""
+
+        current_cost = np.dot(self.prices, self.energy_use)
+        ideal_cost = np.dot(self.prices, ideal_demands)
+
+        cost_difference = ideal_cost - current_cost
+
+        # print("--" * 10)
+        # print("ideal cost")
+        # print(ideal_cost)
+        # print("--" * 10)
+        # print("prices")
+        # print(self.prices)
+        # print("--" * 10)
+        # print("current_cost")
+        # print(current_cost)
+
+        if cost_difference > 0 or ideal_cost < 0:
+            print("--" * 10)
+            print("Problem with reward")
+            # print("min_demand: " + str(self.min_demand))
+            # print("max_demand: " + str(self.max_demand))
+            # print("--" * 10)
+            print("prices")
+            print(self.prices)
+            # print("current_cost")
+            # print(current_cost)
+            # print("--" * 10)
+            # print("ideal_cost")
+            # print(ideal_cost)
+            # print("ideal_demands")
+            # print(ideal_demands)
+            # print("energy demand")
+            # print(self.energy_use)
+            print("taking the neg abs value so that it stays the same sign.")
+
+        return -np.abs(cost_difference / ideal_cost)
+
diff --git a/multiagent/scenarios/simple_push.py b/multiagent/scenarios/simple_push.py
index 907fef5b5..733535860 100644
--- a/multiagent/scenarios/simple_push.py
+++ b/multiagent/scenarios/simple_push.py
@@ -2,18 +2,19 @@
 from multiagent.core import World, Agent, Landmark
 from multiagent.scenario import BaseScenario
 
+
 class Scenario(BaseScenario):
     def make_world(self):
         world = World()
         # set any world properties first
         world.dim_c = 2
-        num_agents = 2
+        num_agents = 2  # same as dim_c
         num_adversaries = 1
         num_landmarks = 2
         # add agents
         world.agents = [Agent() for i in range(num_agents)]
         for i, agent in enumerate(world.agents):
-            agent.name = 'agent %d' % i
+            agent.name = "agent %d" % i
             agent.collide = True
             agent.silent = True
             if i < num_adversaries:
@@ -23,7 +24,7 @@ def make_world(self):
         # add landmarks
         world.landmarks = [Landmark() for i in range(num_landmarks)]
         for i, landmark in enumerate(world.landmarks):
-            landmark.name = 'landmark %d' % i
+            landmark.name = "landmark %d" % i
             landmark.collide = False
             landmark.movable = False
         # make initial conditions
@@ -57,7 +58,11 @@ def reset_world(self, world):
 
     def reward(self, agent, world):
         # Agents are rewarded based on minimum agent distance to each landmark
-        return self.adversary_reward(agent, world) if agent.adversary else self.agent_reward(agent, world)
+        return (
+            self.adversary_reward(agent, world)
+            if agent.adversary
+            else self.agent_reward(agent, world)
+        )
 
     def agent_reward(self, agent, world):
         # the distance to the goal
@@ -65,14 +70,20 @@ def agent_reward(self, agent, world):
 
     def adversary_reward(self, agent, world):
         # keep the nearest good agents away from the goal
-        agent_dist = [np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in world.agents if not a.adversary]
+        agent_dist = [
+            np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos)))
+            for a in world.agents
+            if not a.adversary
+        ]
         pos_rew = min(agent_dist)
-        #nearest_agent = world.good_agents[np.argmin(agent_dist)]
-        #neg_rew = np.sqrt(np.sum(np.square(nearest_agent.state.p_pos - agent.state.p_pos)))
-        neg_rew = np.sqrt(np.sum(np.square(agent.goal_a.state.p_pos - agent.state.p_pos)))
-        #neg_rew = sum([np.sqrt(np.sum(np.square(a.state.p_pos - agent.state.p_pos))) for a in world.good_agents])
+        # nearest_agent = world.good_agents[np.argmin(agent_dist)]
+        # neg_rew = np.sqrt(np.sum(np.square(nearest_agent.state.p_pos - agent.state.p_pos)))
+        neg_rew = np.sqrt(
+            np.sum(np.square(agent.goal_a.state.p_pos - agent.state.p_pos))
+        )
+        # neg_rew = sum([np.sqrt(np.sum(np.square(a.state.p_pos - agent.state.p_pos))) for a in world.good_agents])
         return pos_rew - neg_rew
-               
+
     def observation(self, agent, world):
         # get positions of all entities in this agent's reference frame
         entity_pos = []
@@ -86,11 +97,19 @@ def observation(self, agent, world):
         comm = []
         other_pos = []
         for other in world.agents:
-            if other is agent: continue
+            if other is agent:
+                continue
             comm.append(other.state.c)
             other_pos.append(other.state.p_pos - agent.state.p_pos)
         if not agent.adversary:
-            return np.concatenate([agent.state.p_vel] + [agent.goal_a.state.p_pos - agent.state.p_pos] + [agent.color] + entity_pos + entity_color + other_pos)
+            return np.concatenate(
+                [agent.state.p_vel]
+                + [agent.goal_a.state.p_pos - agent.state.p_pos]
+                + [agent.color]
+                + entity_pos
+                + entity_color
+                + other_pos
+            )
         else:
-            #other_pos = list(reversed(other_pos)) if random.uniform(0,1) > 0.5 else other_pos  # randomize position of other agents in adversary network
+            # other_pos = list(reversed(other_pos)) if random.uniform(0,1) > 0.5 else other_pos  # randomize position of other agents in adversary network
             return np.concatenate([agent.state.p_vel] + entity_pos + other_pos)
diff --git a/multiagent/utils.py b/multiagent/utils.py
new file mode 100644
index 000000000..1cb1d8877
--- /dev/null
+++ b/multiagent/utils.py
@@ -0,0 +1,116 @@
+import csv
+import numpy as np
+import pandas as pd
+from scipy.optimize import minimize
+import os
+import matplotlib.pyplot as plt
+from sklearn.preprocessing import MinMaxScaler
+import os
+
+# data_path = os.path.join(os.getcwd(), "baselines", "behavioral_sim", "building_data.csv")
+# csv_path = os.path.dirname(os.path.realpath(__file__)) + "/building_data.csv"
+
+
+def price_signal(day=45, type_of_DR="real_time_pricing"):
+
+    """
+    Utkarsha's work on price signal from a building with demand and solar
+    Input: Day = an int signifying a 24 hour period. 365 total, all of 2012, start at 1.
+    Output: netdemand_price, a measure of how expensive energy is at each time in the day
+        optionally, we can return the optimized demand, which is the building
+        calculating where the net demand should be allocated
+    """
+    csv_path = "building_data.csv"
+    csv_path_2 = "../gym-socialgame/gym_socialgame/envs/building_data.csv"
+    current_dir = os.path.dirname(os.path.realpath(__file__))
+    csv_path_3 = os.path.join(
+        current_dir,
+        "../../transactive_control/gym-socialgame/gym_socialgame/envs/building_data.csv",
+    )
+    try:
+        df = pd.read_csv(csv_path)
+    except:
+        try:
+            df = pd.read_csv(csv_path_2)
+        except:
+            df = pd.read_csv(csv_path_3)
+
+    pv = 0.001 * np.array(df["PV (W)"].tolist())
+    price = np.array(df["Price( $ per kWh)"].tolist())
+    demand = np.nan_to_num(np.array(df["Office_Elizabeth (kWh)"].tolist()))
+    demand_charge = 10 / 30  # 10$/kW per month
+
+    pvsize = 0  # Assumption
+    netdemand = demand - pvsize * pv
+
+    # Data starts at 5 am on Jan 1
+    netdemand_24 = netdemand[24 * day - 5 : 24 * day + 19]
+    price_24 = price[24 * day - 5 : 24 * day + 19]
+    pv_24 = pv[24 * day - 5 : 24 * day + 19]
+    demand_24 = demand[24 * day - 5 : 24 * day + 19]
+
+    # Calculate optimal load scheduling. 90% of load is fixed, 10% is controllable.
+    def optimise_24h(netdemand_24, price_24):
+        currentcost = netdemand_24 @ price_24
+
+        fixed_load = 0.9 * netdemand_24
+        controllable_load = sum(0.1 * netdemand_24)
+
+        def objective(x):
+            load = fixed_load + x
+            cost = np.multiply(price_24, load)
+            # Negative demand means zero cost, not negative cost
+            # Adding L1 regularisation to penalise shifting of occupant demand
+            lambd = 0.005
+            # Demand charge: attached to peak demand
+            return (
+                sum(np.maximum(cost, 0))
+                + demand_charge * max(load)
+                + lambd * sum(abs(x - 0.1 * netdemand_24))
+            )
+
+        def constraint_sumofx(x):
+            return sum(x) - controllable_load
+
+        def constraint_x_positive(x):
+            return x
+
+        x0 = np.zeros(24)
+        cons = [
+            {"type": "eq", "fun": constraint_sumofx},
+            {"type": "ineq", "fun": constraint_x_positive},
+        ]
+        sol = minimize(objective, x0, constraints=cons)
+        return sol
+
+    if type_of_DR == "real_time_pricing":
+        sol = optimise_24h(netdemand_24, price_24)
+        x = sol["x"]
+        diff = x - 0.1 * netdemand_24
+        return -diff - min(-diff)
+
+    elif type_of_DR == "time_of_use":
+        if np.mean(price_24[8:18]) == price_24[9]:
+            price_24[13:16] += 3
+        return price_24
+    else:
+        return "error!!!"
+
+
+def fourier_points_from_action(action, points_length, fourier_basis_size):
+    assert (
+        fourier_basis_size == (action.size + 1) // 2
+    ), "Incorrect fourier basis size for actions"
+    root_points = (action[0] / 2) * np.ones(points_length)
+    inp = np.linspace(0, 1, points_length)
+    for k in range(1, fourier_basis_size):
+        ak, bk = action[2 * k - 1], action[2 * k]
+        root_points += ak * np.sin(2 * np.pi * k * inp) + bk * np.cos(
+            2 * np.pi * k * inp
+        )
+
+    points = root_points ** 2
+    # TODO: More elegant solution than clipping
+    points = np.clip(points, 0, 10)
+    return points
+