From cd234ab4fdc722dba19300b867b5fd15cfd5ad4f Mon Sep 17 00:00:00 2001
From: ymkymkymkymx <1435664939@qq.com>
Date: Mon, 15 Jul 2019 10:51:14 -0400
Subject: [PATCH 01/56] Messing with mountain car

---
 agents_using_gym/gymMountainCarv0/README.md   | 12 ++++
 agents_using_gym/gymMountainCarv0/cheating.py | 13 ++++
 .../gymMountainCarv0/simpleqlearning.py       | 62 +++++++++++++++++++
 3 files changed, 87 insertions(+)
 create mode 100644 agents_using_gym/gymMountainCarv0/README.md
 create mode 100644 agents_using_gym/gymMountainCarv0/cheating.py
 create mode 100644 agents_using_gym/gymMountainCarv0/simpleqlearning.py

diff --git a/agents_using_gym/gymMountainCarv0/README.md b/agents_using_gym/gymMountainCarv0/README.md
new file mode 100644
index 000000000..64aca326a
--- /dev/null
+++ b/agents_using_gym/gymMountainCarv0/README.md
@@ -0,0 +1,12 @@
+## This folders incude some agents for gym's mountain car environment.
+## The codes in this folder are using Python 3.6.1, gym==0.13.1,numpy==1.16.4. The codes are using some functions from gym==0.13.1 which are not implemented in gym==0.10.5, so please upgrade your gym before running these codes. 
+### If you don't know how to upgrade gym: 
+``` 
+pip uninstall gym
+pip install gym
+```
+## Python files
+### These files are just using gym, and can be run by ```python filename.py``` or ```python3 filename.py``` if you are using linux. IDEs shold be able to run them as well.
+### cheating.py is an straight solution by Mark Yu after 2 second of thinking about this game, it represents Mark's superiority against AI. JK.
+### simpleqlearning.py is an implementation of qlearning that Mark learnt from wikipedia [https://en.wikipedia.org/wiki/Q-learning](https://en.wikipedia.org/wiki/Q-learning). Feel free to mess with the learning rate and discountrate and compare the time it takes for the car to reach the summit.
+
diff --git a/agents_using_gym/gymMountainCarv0/cheating.py b/agents_using_gym/gymMountainCarv0/cheating.py
new file mode 100644
index 000000000..c67572d6c
--- /dev/null
+++ b/agents_using_gym/gymMountainCarv0/cheating.py
@@ -0,0 +1,13 @@
+import gym
+env = gym.make("MountainCar-v0")
+
+done=False
+state=env.reset()
+while not done:
+    if state[1]<=0:
+        state, reward, done,info = env.step(0)
+    else:
+        state, reward, done,info = env.step(2)
+    env.render()
+    
+env.close()
\ No newline at end of file
diff --git a/agents_using_gym/gymMountainCarv0/simpleqlearning.py b/agents_using_gym/gymMountainCarv0/simpleqlearning.py
new file mode 100644
index 000000000..f955d6e5c
--- /dev/null
+++ b/agents_using_gym/gymMountainCarv0/simpleqlearning.py
@@ -0,0 +1,62 @@
+import gym
+import numpy 
+
+env = gym.make("MountainCar-v0")
+
+learningrate = 0.7
+discount = 0.90
+#initiallize the Q table [40,40,3] with random values.  The meaning of the q table is the q value of a set of [state of positions,state of velocity, action you take]. 
+#Note that the game is continous but the states of our q table are discrete(since we can only deal with finite states), So I also need a getstate function to turn the continous states into deiscrete states.
+#all q values are initialized between -2 and 0 because the reward is always -1 in the mountaincar game.
+q_table = numpy.random.uniform(-2, 0, [40,40,3])
+
+
+def getstate(state):
+    discrete_state = (state - env.observation_space.low)/((env.observation_space.high-env.observation_space.low)/[40,40])
+    return tuple(discrete_state.astype(numpy.int))  # we use this tuple to look up the 3 Q values for the available actions in the q-table
+
+
+for episode in range(2700):
+    currentstate = getstate(env.reset())
+    done = False
+    #render every 300 episodes to save time.
+    if episode % 300 == 0:
+        render = True
+        print(episode)
+    else:
+        render = False
+
+    while not done:
+        action = numpy.argmax(q_table[currentstate])
+        new_state, reward, done,info = env.step(action)
+        #nextstate is the discrete mapping from the new state to the q table
+        nextstate = getstate(new_state)
+
+        if render:
+            env.render()
+
+        # Update Q table
+        if not done:
+            # Maximum possible Q value in next step (for new state)
+            maxnextq = numpy.max(q_table[nextstate])
+            # Current Q value (for current state and performed action)
+            current_q = q_table[currentstate + (action,)]
+            # the qlearning function
+            new_q = (1 - learningrate) * current_q + learningrate * (reward + discount * maxnextq)
+            # Update Q table with new Q value
+            q_table[currentstate + (action,)] = new_q
+
+
+        # Simulation ended (for any reson) - if goal position is achived - update Q value with reward directly
+        elif new_state[0] >= env.goal_position:
+            print("We make it!")
+            print(episode)
+            q_table[currentstate + (action,)] = 0
+            
+
+        currentstate = nextstate
+
+   
+
+
+env.close()
\ No newline at end of file

From 3703822df03cff68dcd5c6ec48ba1e17eb955d29 Mon Sep 17 00:00:00 2001
From: ymkymkymkymx <43044797+ymkymkymkymx@users.noreply.github.com>
Date: Mon, 15 Jul 2019 10:53:33 -0400
Subject: [PATCH 02/56] Update README.md

---
 agents_using_gym/gymMountainCarv0/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/agents_using_gym/gymMountainCarv0/README.md b/agents_using_gym/gymMountainCarv0/README.md
index 64aca326a..a36f90fd1 100644
--- a/agents_using_gym/gymMountainCarv0/README.md
+++ b/agents_using_gym/gymMountainCarv0/README.md
@@ -6,7 +6,7 @@ pip uninstall gym
 pip install gym
 ```
 ## Python files
-### These files are just using gym, and can be run by ```python filename.py``` or ```python3 filename.py``` if you are using linux. IDEs shold be able to run them as well.
+### These files are just using gym, and can be run by ```python filename.py``` (or ```python3 filename.py``` if you are using linux.) IDEs shold be able to run them as well.
 ### cheating.py is an straight solution by Mark Yu after 2 second of thinking about this game, it represents Mark's superiority against AI. JK.
 ### simpleqlearning.py is an implementation of qlearning that Mark learnt from wikipedia [https://en.wikipedia.org/wiki/Q-learning](https://en.wikipedia.org/wiki/Q-learning). Feel free to mess with the learning rate and discountrate and compare the time it takes for the car to reach the summit.
 

From 6a5113c50582db60b02d685352a79138a588f0ba Mon Sep 17 00:00:00 2001
From: ymkymkymkymx <43044797+ymkymkymkymx@users.noreply.github.com>
Date: Mon, 15 Jul 2019 10:56:15 -0400
Subject: [PATCH 03/56] Update README.md

---
 agents_using_gym/gymMountainCarv0/README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/agents_using_gym/gymMountainCarv0/README.md b/agents_using_gym/gymMountainCarv0/README.md
index a36f90fd1..8a2475783 100644
--- a/agents_using_gym/gymMountainCarv0/README.md
+++ b/agents_using_gym/gymMountainCarv0/README.md
@@ -7,6 +7,6 @@ pip install gym
 ```
 ## Python files
 ### These files are just using gym, and can be run by ```python filename.py``` (or ```python3 filename.py``` if you are using linux.) IDEs shold be able to run them as well.
-### cheating.py is an straight solution by Mark Yu after 2 second of thinking about this game, it represents Mark's superiority against AI. JK.
-### simpleqlearning.py is an implementation of qlearning that Mark learnt from wikipedia [https://en.wikipedia.org/wiki/Q-learning](https://en.wikipedia.org/wiki/Q-learning). Feel free to mess with the learning rate and discountrate and compare the time it takes for the car to reach the summit.
+### cheating.py is a straight solution by Mark Yu after 2 seconds of thinking, it represents Mark's superiority against AI. JK.
+### simpleqlearning.py is an implementation of qlearning, an algorithm that Mark learnt from wikipedia [https://en.wikipedia.org/wiki/Q-learning](https://en.wikipedia.org/wiki/Q-learning). Feel free to mess with the learning rate and discountrate in the code and compare the time it takes for the AI to learn how to push the car to the summit.
 

From ba131a8888cdc61762f6f6123bb5650498751b5f Mon Sep 17 00:00:00 2001
From: Devak Patel <devakusa@gmail.com>
Date: Wed, 17 Jul 2019 10:10:18 -0400
Subject: [PATCH 04/56] Create EnvironmentIdeas.md

---
 EnvironmentIdeas.md | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 EnvironmentIdeas.md

diff --git a/EnvironmentIdeas.md b/EnvironmentIdeas.md
new file mode 100644
index 000000000..c91c25734
--- /dev/null
+++ b/EnvironmentIdeas.md
@@ -0,0 +1 @@
+#Idea 1:

From 367a1c60c233cb6984b5c72e63bec92593d396d4 Mon Sep 17 00:00:00 2001
From: Devak Patel <devakusa@gmail.com>
Date: Wed, 17 Jul 2019 10:14:29 -0400
Subject: [PATCH 05/56] Update and rename EnvironmentIdeas.md to
 ScenarioIdeas.md

---
 EnvironmentIdeas.md |  1 -
 ScenarioIdeas.md    | 11 +++++++++++
 2 files changed, 11 insertions(+), 1 deletion(-)
 delete mode 100644 EnvironmentIdeas.md
 create mode 100644 ScenarioIdeas.md

diff --git a/EnvironmentIdeas.md b/EnvironmentIdeas.md
deleted file mode 100644
index c91c25734..000000000
--- a/EnvironmentIdeas.md
+++ /dev/null
@@ -1 +0,0 @@
-#Idea 1:
diff --git a/ScenarioIdeas.md b/ScenarioIdeas.md
new file mode 100644
index 000000000..e32eb935e
--- /dev/null
+++ b/ScenarioIdeas.md
@@ -0,0 +1,11 @@
+# Idea table:
+
+Generated at: https://www.tablesgenerator.com/markdown_tables# \
+
+|    	| Possible Actions 	| Rewards per Outcome 	| Properties of other entities 	| Nash Equilibrium 	| Other Notes 	|
+|----	|------------------	|---------------------	|------------------------------	|------------------	|-------------	|
+| #1 	|                  	|                     	|                              	|                  	|             	|
+| #2 	|                  	|                     	|                              	|                  	|             	|
+| #3 	|                  	|                     	|                              	|                  	|             	|
+| #4 	|                  	|                     	|                              	|                  	|             	|
+| #5 	|                  	|                     	|                              	|                  	|             	|

From 593f2cb285cde6bf5009853dd589d3e7543cc4a9 Mon Sep 17 00:00:00 2001
From: Devak Patel <devakusa@gmail.com>
Date: Wed, 17 Jul 2019 10:44:07 -0400
Subject: [PATCH 06/56] Added Scenario Idea 1

---
 ScenarioIdeas.md | 31 ++++++++++++++++++++++---------
 1 file changed, 22 insertions(+), 9 deletions(-)

diff --git a/ScenarioIdeas.md b/ScenarioIdeas.md
index e32eb935e..b2fa57b3d 100644
--- a/ScenarioIdeas.md
+++ b/ScenarioIdeas.md
@@ -1,11 +1,24 @@
 # Idea table:
 
-Generated at: https://www.tablesgenerator.com/markdown_tables# \
-
-|    	| Possible Actions 	| Rewards per Outcome 	| Properties of other entities 	| Nash Equilibrium 	| Other Notes 	|
-|----	|------------------	|---------------------	|------------------------------	|------------------	|-------------	|
-| #1 	|                  	|                     	|                              	|                  	|             	|
-| #2 	|                  	|                     	|                              	|                  	|             	|
-| #3 	|                  	|                     	|                              	|                  	|             	|
-| #4 	|                  	|                     	|                              	|                  	|             	|
-| #5 	|                  	|                     	|                              	|                  	|             	|
+Generated at: https://www.tablesgenerator.com/markdown_tables
+
+|  | Possible Actions | Rewards per Outcome | Properties of other entities | Nash Equilibrium | Other Notes |
+|----|-----------------------|--------------------------------------------------------------------------------------------------------------------------|-------------------------------------|------------------|-------------|
+| #1 | Expand, attack, trade | Expanding + attacking spends resources for greater resource bonuses later. Trading gives bonus resources for both agents | No other entities other than agents | Attack |  |
+| #2 |  |  |  |  |  |
+| #3 |  |  |  |  |  |
+| #4 |  |  |  |  |  |
+| #5 |  |  |  |  |  |
+
+# Details:
+## Idea 1. (Risk but on a grid)
+Grid based cell game, each agent starts with 1 cell on some part of the grid. Agents use resources to expand, attack, or trade with neighboring cells. Every turn agents gain a set amount of resources based on area of agent's cells. For every neighboring cell, if it is not occupied, the agent can choose to spend resources to expand into the area, or not. If the cell is occupied, the agent can choose to attack, or trade. Attacking allows for the takeover of the cell and requires the agent to spend resources. Trading requires the agent to give resources to the other agent, but if both agents decide to trade, they can recieve some bonus based on who gave more resources. If one agent attacks, and the other trades, the attacker automatically wins. If both attack, the agent that spent more resources to attack wins. Resource costs and bonuses can be tweaked to ensure fairness and balance.
+### Examples: 
+Agent A and Agent B are neighbors: if A trades 2 resources, and B trades 4 resources, A could gain 4(from B) + 2(bonus includes how much given) + 1(some multiplier of how much was given in this case 0.5 for giving less) resulting in net +5, B would gain 2(from A) + 4(given) + 4(multiplier bonus of 1 for giving more) resulting in +6 \
+If A attacks B, spending 5 resources; B attempts to trade 4 resources, A takes over some area of B and gains 4 resources from B's trade with net gain of -1 resource and + some area; B has a net gain of -4 resources and -some area. \
+If A attacks B, spending 5 resources; B attacks A spending 6 resources, B takes some area of A. A has a net gain of -5 resources and -some area; B has a net gain of -5 resources and -some area.
+
+### Possible expansion:
+Add defend action, which blocks attack, but opponent agent gains bigger bonus resource if they try to trade. 
+
+

From c6ac044e4ab3d550c4a73e0ba9c3de2e701c2c20 Mon Sep 17 00:00:00 2001
From: Tim <tmclough98@gmail.com>
Date: Wed, 17 Jul 2019 13:10:19 -0400
Subject: [PATCH 07/56] start finding incompatibilities for latest gym version

---
 .idea/misc.xml                     | 4 ++++
 .idea/modules.xml                  | 8 ++++++++
 .idea/multiagent-particle-envs.iml | 8 ++++++++
 .idea/vcs.xml                      | 6 ++++++
 changes.txt                        | 9 +++++++++
 5 files changed, 35 insertions(+)
 create mode 100644 .idea/misc.xml
 create mode 100644 .idea/modules.xml
 create mode 100644 .idea/multiagent-particle-envs.iml
 create mode 100644 .idea/vcs.xml
 create mode 100644 changes.txt

diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 000000000..65531ca99
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6" project-jdk-type="Python SDK" />
+</project>
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 000000000..20414b498
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/multiagent-particle-envs.iml" filepath="$PROJECT_DIR$/.idea/multiagent-particle-envs.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/multiagent-particle-envs.iml b/.idea/multiagent-particle-envs.iml
new file mode 100644
index 000000000..d0876a78d
--- /dev/null
+++ b/.idea/multiagent-particle-envs.iml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 000000000..94a25f7f4
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/changes.txt b/changes.txt
new file mode 100644
index 000000000..560f4a9cd
--- /dev/null
+++ b/changes.txt
@@ -0,0 +1,9 @@
+multi_discrete.py:
+- Changed random_array assignment in sample() to use gym.utils.seeding
+rendering.py:
+- The reraise function appears to no longer exist. Fortunately, all this does is add additional information to a raised
+  exception, so should be simple to reimplement (or ignore)
+environment.py:
+- line 234: geom.set_color(*entity.color, alpha=0.5) - receives multiple arguments. This is a pain, because it is
+  expanding the color argument, a 3-tuple or 4-tuple, but we want to set alpha to 0.5. A dumb fix is to make a new
+  tuple with the first three arguments of the color, and 0.5 for alpha.

From 113c199da5ddee2c9287f9f88576c4494e3aff67 Mon Sep 17 00:00:00 2001
From: Tim <tmclough98@gmail.com>
Date: Wed, 17 Jul 2019 13:17:44 -0400
Subject: [PATCH 08/56] whoops remove editor files

---
 .gitignore                         | 3 ++-
 .idea/misc.xml                     | 4 ----
 .idea/modules.xml                  | 8 --------
 .idea/multiagent-particle-envs.iml | 8 --------
 .idea/vcs.xml                      | 6 ------
 5 files changed, 2 insertions(+), 27 deletions(-)
 delete mode 100644 .idea/misc.xml
 delete mode 100644 .idea/modules.xml
 delete mode 100644 .idea/multiagent-particle-envs.iml
 delete mode 100644 .idea/vcs.xml

diff --git a/.gitignore b/.gitignore
index 4681f8b57..2a0bf53ee 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
 __pycache__/
 *.egg-info/
-*.pyc
\ No newline at end of file
+*.pyc
+.idea/
diff --git a/.idea/misc.xml b/.idea/misc.xml
deleted file mode 100644
index 65531ca99..000000000
--- a/.idea/misc.xml
+++ /dev/null
@@ -1,4 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6" project-jdk-type="Python SDK" />
-</project>
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
deleted file mode 100644
index 20414b498..000000000
--- a/.idea/modules.xml
+++ /dev/null
@@ -1,8 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="ProjectModuleManager">
-    <modules>
-      <module fileurl="file://$PROJECT_DIR$/.idea/multiagent-particle-envs.iml" filepath="$PROJECT_DIR$/.idea/multiagent-particle-envs.iml" />
-    </modules>
-  </component>
-</project>
\ No newline at end of file
diff --git a/.idea/multiagent-particle-envs.iml b/.idea/multiagent-particle-envs.iml
deleted file mode 100644
index d0876a78d..000000000
--- a/.idea/multiagent-particle-envs.iml
+++ /dev/null
@@ -1,8 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<module type="PYTHON_MODULE" version="4">
-  <component name="NewModuleRootManager">
-    <content url="file://$MODULE_DIR$" />
-    <orderEntry type="inheritedJdk" />
-    <orderEntry type="sourceFolder" forTests="false" />
-  </component>
-</module>
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
deleted file mode 100644
index 94a25f7f4..000000000
--- a/.idea/vcs.xml
+++ /dev/null
@@ -1,6 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="VcsDirectoryMappings">
-    <mapping directory="$PROJECT_DIR$" vcs="Git" />
-  </component>
-</project>
\ No newline at end of file

From 38490a2407f8d35f7d0a1aae20559be8fc9608a5 Mon Sep 17 00:00:00 2001
From: ymkymkymkymx <1435664939@qq.com>
Date: Wed, 17 Jul 2019 13:24:03 -0400
Subject: [PATCH 09/56] downgrade

---
 agents_using_gym/gymMountainCarv0/simpleqlearning.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/agents_using_gym/gymMountainCarv0/simpleqlearning.py b/agents_using_gym/gymMountainCarv0/simpleqlearning.py
index f955d6e5c..3e8ccd7d1 100644
--- a/agents_using_gym/gymMountainCarv0/simpleqlearning.py
+++ b/agents_using_gym/gymMountainCarv0/simpleqlearning.py
@@ -48,7 +48,7 @@ def getstate(state):
 
 
         # Simulation ended (for any reson) - if goal position is achived - update Q value with reward directly
-        elif new_state[0] >= env.goal_position:
+        elif new_state[0] >= 0.5:
             print("We make it!")
             print(episode)
             q_table[currentstate + (action,)] = 0

From 2ef3400085abd67d4d410213be88ce329a4e5629 Mon Sep 17 00:00:00 2001
From: dpakalarry <devakusa@gmail.com>
Date: Wed, 17 Jul 2019 13:33:22 -0400
Subject: [PATCH 10/56] Added testing.py to play around with a scenario

---
 multiagent/scenarios/testing.py | 139 ++++++++++++++++++++++++++++++++
 1 file changed, 139 insertions(+)
 create mode 100644 multiagent/scenarios/testing.py

diff --git a/multiagent/scenarios/testing.py b/multiagent/scenarios/testing.py
new file mode 100644
index 000000000..9bbf95de4
--- /dev/null
+++ b/multiagent/scenarios/testing.py
@@ -0,0 +1,139 @@
+import numpy as np
+from multiagent.core import World, Agent, Landmark
+from multiagent.scenario import BaseScenario
+
+
+class Scenario(BaseScenario):
+
+    def make_world(self):
+        world = World()
+        # set any world properties first
+        world.dim_c = 2
+        num_agents = 5
+        world.num_agents = num_agents
+        num_adversaries = 0
+        num_landmarks = num_agents - 1
+        # add agents
+        world.agents = [Agent() for i in range(num_agents)]
+        for i, agent in enumerate(world.agents):
+            agent.name = 'agent %d' % i
+            agent.collide = False
+            agent.silent = True
+            agent.adversary = True if i < num_adversaries else False
+            agent.size = 0.15
+        # add landmarks
+        world.landmarks = [Landmark() for i in range(num_landmarks)]
+        for i, landmark in enumerate(world.landmarks):
+            landmark.name = 'landmark %d' % i
+            landmark.collide = False
+            landmark.movable = False
+            landmark.size = 0.08
+        # make initial conditions
+        self.reset_world(world)
+        return world
+
+    def reset_world(self, world):
+        # random properties for agents
+        world.agents[0].color = np.array([0.85, 0.35, 0.35])
+        for i in range(1, world.num_agents):
+            world.agents[i].color = np.array([0.35, 0.35, 0.85])
+        # random properties for landmarks
+        for i, landmark in enumerate(world.landmarks):
+            landmark.color = np.array([0.15, 0.15, 0.15])
+        # set goal landmark
+        goal = np.random.choice(world.landmarks)
+        goal.color = np.array([0.15, 0.65, 0.15])
+        for agent in world.agents:
+            agent.goal_a = goal
+        # set random initial states
+        for agent in world.agents:
+            agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
+            agent.state.p_vel = np.zeros(world.dim_p)
+            agent.state.c = np.zeros(world.dim_c)
+        for i, landmark in enumerate(world.landmarks):
+            landmark.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
+            landmark.state.p_vel = np.zeros(world.dim_p)
+
+    def benchmark_data(self, agent, world):
+        # returns data for benchmarking purposes
+        if agent.adversary:
+            return np.sum(np.square(agent.state.p_pos - agent.goal_a.state.p_pos))
+        else:
+            dists = []
+            for l in world.landmarks:
+                dists.append(np.sum(np.square(agent.state.p_pos - l.state.p_pos)))
+            dists.append(np.sum(np.square(agent.state.p_pos - agent.goal_a.state.p_pos)))
+            return tuple(dists)
+
+    # return all agents that are not adversaries
+    def good_agents(self, world):
+        return [agent for agent in world.agents if not agent.adversary]
+
+    # return all adversarial agents
+    def adversaries(self, world):
+        return [agent for agent in world.agents if agent.adversary]
+
+    def reward(self, agent, world):
+        # Agents are rewarded based on minimum agent distance to each landmark
+        return self.adversary_reward(agent, world) if agent.adversary else self.agent_reward(agent, world)
+
+    def agent_reward(self, agent, world):
+        # Rewarded based on how close any good agent is to the goal landmark, and how far the adversary is from it
+        shaped_reward = True
+        shaped_adv_reward = True
+
+        # Calculate negative reward for adversary
+        adversary_agents = self.adversaries(world)
+        if shaped_adv_reward:  # distance-based adversary reward
+            adv_rew = sum([np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in adversary_agents])
+        else:  # proximity-based adversary reward (binary)
+            adv_rew = 0
+            for a in adversary_agents:
+                if np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) < 2 * a.goal_a.size:
+                    adv_rew -= 5
+
+        # Calculate positive reward for agents
+        good_agents = self.good_agents(world)
+        if shaped_reward:  # distance-based agent reward
+            pos_rew = -min(
+                [np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in good_agents])
+        else:  # proximity-based agent reward (binary)
+            pos_rew = 0
+            if min([np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in good_agents]) \
+                    < 2 * agent.goal_a.size:
+                pos_rew += 5
+            pos_rew -= min(
+                [np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in good_agents])
+        return pos_rew + adv_rew
+
+    def adversary_reward(self, agent, world):
+        # Rewarded based on proximity to the goal landmark
+        shaped_reward = True
+        if shaped_reward:  # distance-based reward
+            return -np.sum(np.square(agent.state.p_pos - agent.goal_a.state.p_pos))
+        else:  # proximity-based reward (binary)
+            adv_rew = 0
+            if np.sqrt(np.sum(np.square(agent.state.p_pos - agent.goal_a.state.p_pos))) < 2 * agent.goal_a.size:
+                adv_rew += 5
+            return adv_rew
+
+
+    def observation(self, agent, world):
+        # get positions of all entities in this agent's reference frame
+        entity_pos = []
+        for entity in world.landmarks:
+            entity_pos.append(entity.state.p_pos - agent.state.p_pos)
+        # entity colors
+        entity_color = []
+        for entity in world.landmarks:
+            entity_color.append(entity.color)
+        # communication of all other agents
+        other_pos = []
+        for other in world.agents:
+            if other is agent: continue
+            other_pos.append(other.state.p_pos - agent.state.p_pos)
+
+        if not agent.adversary:
+            return np.concatenate([agent.goal_a.state.p_pos - agent.state.p_pos] + entity_pos + other_pos)
+        else:
+            return np.concatenate(entity_pos + other_pos)

From b68236cf1a37f232e988c493a5126f20a3a5d7b8 Mon Sep 17 00:00:00 2001
From: Tim <tmclough98@gmail.com>
Date: Wed, 17 Jul 2019 13:39:52 -0400
Subject: [PATCH 11/56] weird simple_crypto dimension mismatch

---
 changes.txt | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/changes.txt b/changes.txt
index 560f4a9cd..af7cbe1b5 100644
--- a/changes.txt
+++ b/changes.txt
@@ -4,6 +4,11 @@ rendering.py:
 - The reraise function appears to no longer exist. Fortunately, all this does is add additional information to a raised
   exception, so should be simple to reimplement (or ignore)
 environment.py:
-- line 234: geom.set_color(*entity.color, alpha=0.5) - receives multiple arguments. This is a pain, because it is
+- line 234: `geom.set_color(*entity.color, alpha=0.5)` - receives multiple arguments. This is a pain, because it is
   expanding the color argument, a 3-tuple or 4-tuple, but we want to set alpha to 0.5. A dumb fix is to make a new
   tuple with the first three arguments of the color, and 0.5 for alpha.
+
+simple_crypto.py:
+- line 121: array in conditional can potentially be a boolean rather than an ndarray so it will have no .all() method
+- line 122: the sizes in the expression `agent.state.c - agent.goal_a.color` are mismatched and it is unclear where they
+  come from
\ No newline at end of file

From d7e489a62a672dd4deae0a87ca5b36289e09547c Mon Sep 17 00:00:00 2001
From: Tim <tmclough98@gmail.com>
Date: Wed, 17 Jul 2019 13:43:21 -0400
Subject: [PATCH 12/56] switch from prng to gym.utils.seeding

---
 changes.txt                  | 4 +++-
 multiagent/multi_discrete.py | 7 +++++--
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/changes.txt b/changes.txt
index af7cbe1b5..04dc7d817 100644
--- a/changes.txt
+++ b/changes.txt
@@ -1,8 +1,10 @@
 multi_discrete.py:
-- Changed random_array assignment in sample() to use gym.utils.seeding
+- [FIXED] Changed random_array assignment in sample() to use gym.utils.seeding
+
 rendering.py:
 - The reraise function appears to no longer exist. Fortunately, all this does is add additional information to a raised
   exception, so should be simple to reimplement (or ignore)
+
 environment.py:
 - line 234: `geom.set_color(*entity.color, alpha=0.5)` - receives multiple arguments. This is a pain, because it is
   expanding the color argument, a 3-tuple or 4-tuple, but we want to set alpha to 0.5. A dumb fix is to make a new
diff --git a/multiagent/multi_discrete.py b/multiagent/multi_discrete.py
index d7108ad43..041484729 100644
--- a/multiagent/multi_discrete.py
+++ b/multiagent/multi_discrete.py
@@ -4,7 +4,8 @@
 import numpy as np
 
 import gym
-from gym.spaces import prng
+from gym.utils import seeding
+
 
 class MultiDiscrete(gym.Space):
     """
@@ -27,10 +28,12 @@ def __init__(self, array_of_param_array):
         self.high = np.array([x[1] for x in array_of_param_array])
         self.num_discrete_space = self.low.shape[0]
 
+        self.random = seeding.np_random()
+
     def sample(self):
         """ Returns a array with one sample from each discrete action space """
         # For each row: round(random .* (max - min) + min, 0)
-        random_array = prng.np_random.rand(self.num_discrete_space)
+        random_array = self.random.rand(self.num_discrete_space)
         return [int(x) for x in np.floor(np.multiply((self.high - self.low + 1.), random_array) + self.low)]
     def contains(self, x):
         return len(x) == self.num_discrete_space and (np.array(x) >= self.low).all() and (np.array(x) <= self.high).all()

From 2db0588d0d182ca8702a83672c117692cfea6201 Mon Sep 17 00:00:00 2001
From: Tim <tmclough98@gmail.com>
Date: Wed, 17 Jul 2019 13:47:31 -0400
Subject: [PATCH 13/56] fix environment argument splatting error with color

---
 changes.txt               | 2 +-
 multiagent/environment.py | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/changes.txt b/changes.txt
index 04dc7d817..d4eea280c 100644
--- a/changes.txt
+++ b/changes.txt
@@ -6,7 +6,7 @@ rendering.py:
   exception, so should be simple to reimplement (or ignore)
 
 environment.py:
-- line 234: `geom.set_color(*entity.color, alpha=0.5)` - receives multiple arguments. This is a pain, because it is
+- [FIXED] line 234: `geom.set_color(*entity.color, alpha=0.5)` - receives multiple arguments. This is a pain, because it is
   expanding the color argument, a 3-tuple or 4-tuple, but we want to set alpha to 0.5. A dumb fix is to make a new
   tuple with the first three arguments of the color, and 0.5 for alpha.
 
diff --git a/multiagent/environment.py b/multiagent/environment.py
index d2e8d3278..69efccde1 100644
--- a/multiagent/environment.py
+++ b/multiagent/environment.py
@@ -231,7 +231,8 @@ def render(self, mode='human'):
                 geom = rendering.make_circle(entity.size)
                 xform = rendering.Transform()
                 if 'agent' in entity.name:
-                    geom.set_color(*entity.color, alpha=0.5)
+                    color = (entity.color[0], entity.color[1], entity.color[2], 0.5)
+                    geom.set_color(*color)
                 else:
                     geom.set_color(*entity.color)
                 geom.add_attr(xform)

From c7dfadf866da6277c35fbdcfcfce6fa7829f7ec9 Mon Sep 17 00:00:00 2001
From: dpakalarry <devakusa@gmail.com>
Date: Wed, 17 Jul 2019 13:54:02 -0400
Subject: [PATCH 14/56] Fixed reraise error

---
 multiagent/rendering.py | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/multiagent/rendering.py b/multiagent/rendering.py
index cd00c7fb8..3962d8a04 100644
--- a/multiagent/rendering.py
+++ b/multiagent/rendering.py
@@ -17,13 +17,24 @@
 try:
     import pyglet
 except ImportError as e:
-    reraise(suffix="HINT: you can install pyglet directly via 'pip install pyglet'. But if you really just want to install all Gym dependencies and not have to think about it, 'pip install -e .[all]' or 'pip install gym[all]' will do it.")
+    #reraise(suffix="HINT: you can install pyglet directly via 'pip install pyglet'. But if you really just want to install all Gym dependencies and not have to think about it, 'pip install -e .[all]' or 'pip install gym[all]' will do it.")
+    raise ImportError('''
+    Cannot import pyglet.
+    HINT: you can install pyglet directly via 'pip install pyglet'.
+    But if you really just want to install all Gym dependencies and not have to think about it,
+    'pip install -e .[all]' or 'pip install gym[all]' will do it.
+    ''')
 
 try:
     from pyglet.gl import *
 except ImportError as e:
-    reraise(prefix="Error occured while running `from pyglet.gl import *`",suffix="HINT: make sure you have OpenGL install. On Ubuntu, you can run 'apt-get install python-opengl'. If you're running on a server, you may need a virtual frame buffer; something like this should work: 'xvfb-run -s \"-screen 0 1400x900x24\" python <your_script.py>'")
-
+    #reraise(prefix="Error occured while running `from pyglet.gl import *`",suffix="HINT: make sure you have OpenGL install. On Ubuntu, you can run 'apt-get install python-opengl'. If you're running on a server, you may need a virtual frame buffer; something like this should work: 'xvfb-run -s \"-screen 0 1400x900x24\" python <your_script.py>'")
+    raise ImportError('''
+    Error occured while running `from pyglet.gl import *`
+    HINT: make sure you have OpenGL install. On Ubuntu, you can run 'apt-get install python-opengl'.
+    If you're running on a server, you may need a virtual frame buffer; something like this should work:
+    'xvfb-run -s \"-screen 0 1400x900x24\" python <your_script.py>'
+    ''')
 import math
 import numpy as np
 

From 0732a28ec4a3917fd1d84ed9a1bfa73c2fd6e2f8 Mon Sep 17 00:00:00 2001
From: dpakalarry <devakusa@gmail.com>
Date: Wed, 17 Jul 2019 13:54:33 -0400
Subject: [PATCH 15/56] Update changes.txt

---
 changes.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/changes.txt b/changes.txt
index d4eea280c..5e1d094fa 100644
--- a/changes.txt
+++ b/changes.txt
@@ -2,7 +2,7 @@ multi_discrete.py:
 - [FIXED] Changed random_array assignment in sample() to use gym.utils.seeding
 
 rendering.py:
-- The reraise function appears to no longer exist. Fortunately, all this does is add additional information to a raised
+- [FIXED] The reraise function appears to no longer exist. Fortunately, all this does is add additional information to a raised
   exception, so should be simple to reimplement (or ignore)
 
 environment.py:

From dddd55989b4a724537e95017707e12c74a878e8d Mon Sep 17 00:00:00 2001
From: jarbus <jarbus@tutanota.com>
Date: Wed, 17 Jul 2019 15:15:28 -0400
Subject: [PATCH 16/56] adding documentation

---
 documentation.md | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)
 create mode 100644 documentation.md

diff --git a/documentation.md b/documentation.md
new file mode 100644
index 000000000..b4db86409
--- /dev/null
+++ b/documentation.md
@@ -0,0 +1,48 @@
+# Environment
+
+- `make_env.py`: contains code for importing a multiagent environment as an OpenAI Gym-like object.
+
+- `./multiagent/environment.py`: contains code for environment simulation (interaction physics, `_step()` function, etc.)
+
+# Policy
+
+A policy seems to be a system to control an agent. The interactive policy allows control of an agent with keyboard and mouse, but if we wish to implement algorithms we will most likely be implementing them as a policy.
+
+- `./multiagent/policy.py`: contains code for interactive policy based on keyboard input.
+
+
+# Scenarios
+
+- `./multiagent/scenario.py`: contains base scenario object that is extended for all scenarios.
+
+- `./multiagent/scenarios/`: folder where various scenarios/ environments are stored. scenario code consists of several functions:
+    1) `make_world()`: creates all of the entities that inhabit the world (landmarks, agents, etc.), assigns their capabilities (whether they can communicate, or move, or both).
+     called once at the beginning of each training session
+    2) `reset_world()`: resets the world by assigning properties (position, color, etc.) to all entities in the world
+    called before every episode (including after make_world() before the first episode)
+    3) `reward()`: defines the reward function for a given agent
+    4) `observation()`: defines the observation space of a given agent
+    5) (optional) `benchmark_data()`: provides diagnostic data for policies trained on the environment (e.g. evaluation metrics)
+
+You can create new scenarios by implementing the first 4 functions above (`make_world()`, `reset_world()`, `reward()`, and `observation()`).
+
+# Miscellaneous
+
+- `./multiagent/core.py`: contains classes for various objects (Entities, Landmarks, Agents, etc.) that are used throughout the code.
+
+- `./multiagent/rendering.py`: used for displaying agent behaviors on the screen.
+
+# Execution:
+
+1. bin/script.py loads - acts as main script
+2. Loads scenario
+    - Uses scenario to generate world
+3. Loads mutli-agent enviroment given scenario settings and world
+4. Renders environment (initial render)
+5. Assigns policies (algorithms) for each agent
+    - stored as policies[] list
+6. Resets environment
+7. Infinite while loop
+    1. Makes a list of actions, one action per policy
+    2. Performs one environment step using entire action list
+    3. Re-render

From eb09d38689cb0dd360ba7330183a05a2a41b4181 Mon Sep 17 00:00:00 2001
From: jarbus <jarbus@tutanota.com>
Date: Wed, 17 Jul 2019 19:54:27 -0400
Subject: [PATCH 17/56] More documentation

---
 documentation.md | 68 ++++++++++++++++++++++++++++++++----------------
 1 file changed, 46 insertions(+), 22 deletions(-)

diff --git a/documentation.md b/documentation.md
index b4db86409..4cb09cec2 100644
--- a/documentation.md
+++ b/documentation.md
@@ -1,17 +1,55 @@
-# Environment
+# Execution:
+
+In a simulation with `n` agents:
+
+1. bin/script.py loads - acts as main script
+2. Loads scenario
+    - `./multiagent/scenarios/scenario.py.make_world()`
+3. Loads multi-agent enviroment given scenario settings and world
+    - `./multiagent/environment.py.MultiAgentEnv(Scenario.world())`
+4. Renders environment (initial render)
+    - `./multiagent/environment.py.render()`
+5. Assigns policies (algorithms) for each agent
+    - stored as policies[] list
+    - policy[agent_index] = ./multiagent/policies/template.py.TemplatePolicy(env,agent_index)
+        - Note: Template not implemented yet, see `./multiagent/policy.py.InteractivePolicy()` for now
+        - For more information, see [Policies](#POLICIES)
+6. Resets environment
+7. Infinite while loop
+    1. Makes a list of actions, one action per policy
+        - actions[i]
+    2. Performs one environment step using entire action list
+        - `multiagent/environment.py.step()` returns:
+            - n observations
+            - n rewards
+            - n done states
+            - n debug objects
+    3. Re-render
+        - `multiagent/environment.py.render()`
+
+## Environment
+
+The main class in use during execution. The environment interacts with the scenario and the agents. There is one environment that all scenarios use. Each scenario implements reward() and observation() which the environment calls.
 
-- `make_env.py`: contains code for importing a multiagent environment as an OpenAI Gym-like object.
+- `./make_env.py`: contains code for importing a multiagent environment as an OpenAI Gym-like object.
 
 - `./multiagent/environment.py`: contains code for environment simulation (interaction physics, `_step()` function, etc.)
 
-# Policy
+## Policy <a name="POLICIES"></a>
 
-A policy seems to be a system to control an agent. The interactive policy allows control of an agent with keyboard and mouse, but if we wish to implement algorithms we will most likely be implementing them as a policy.
+A policy seems to be a system to control an agent. The interactive policy allows control of an agent with keyboard and mouse, but if we wish to implement algorithms we will most likely be implementing them as a policy. **NOTE: Policies are enumerable**
 
 - `./multiagent/policy.py`: contains code for interactive policy based on keyboard input.
 
+A Policy has two functions:
 
-# Scenarios
+- `__init__()` passes the environment to the policy class
+- `action(obs)` performs an action given an observation
+
+
+## Scenarios
+
+A BaseScenario `multiagent/scenario.py` incorporates at least `make_world()` and `reset_world()`. An implemented Scenario will incorporate reward() and observation(). All scenario calls are made through the environment.
 
 - `./multiagent/scenario.py`: contains base scenario object that is extended for all scenarios.
 
@@ -20,29 +58,15 @@ A policy seems to be a system to control an agent. The interactive policy allows
      called once at the beginning of each training session
     2) `reset_world()`: resets the world by assigning properties (position, color, etc.) to all entities in the world
     called before every episode (including after make_world() before the first episode)
-    3) `reward()`: defines the reward function for a given agent
-    4) `observation()`: defines the observation space of a given agent
+    3) `reward(agent,world)`: defines the reward function for a given agent
+    4) `observation(agent, world)`: defines the observation space of a given agent
     5) (optional) `benchmark_data()`: provides diagnostic data for policies trained on the environment (e.g. evaluation metrics)
 
 You can create new scenarios by implementing the first 4 functions above (`make_world()`, `reset_world()`, `reward()`, and `observation()`).
 
-# Miscellaneous
+## Miscellaneous
 
 - `./multiagent/core.py`: contains classes for various objects (Entities, Landmarks, Agents, etc.) that are used throughout the code.
 
 - `./multiagent/rendering.py`: used for displaying agent behaviors on the screen.
 
-# Execution:
-
-1. bin/script.py loads - acts as main script
-2. Loads scenario
-    - Uses scenario to generate world
-3. Loads mutli-agent enviroment given scenario settings and world
-4. Renders environment (initial render)
-5. Assigns policies (algorithms) for each agent
-    - stored as policies[] list
-6. Resets environment
-7. Infinite while loop
-    1. Makes a list of actions, one action per policy
-    2. Performs one environment step using entire action list
-    3. Re-render

From 990ca856be8873d6aef025e5713e4aa64b011612 Mon Sep 17 00:00:00 2001
From: dpakalarry <devakusa@gmail.com>
Date: Fri, 19 Jul 2019 21:09:24 -0400
Subject: [PATCH 18/56] Added some comments to testing.py to better understand

---
 multiagent/scenarios/testing.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/multiagent/scenarios/testing.py b/multiagent/scenarios/testing.py
index 9bbf95de4..9a46a9817 100644
--- a/multiagent/scenarios/testing.py
+++ b/multiagent/scenarios/testing.py
@@ -6,7 +6,7 @@
 class Scenario(BaseScenario):
 
     def make_world(self):
-        world = World()
+        world = World()	#World has agents and landmarks
         # set any world properties first
         world.dim_c = 2
         num_agents = 5
@@ -104,8 +104,9 @@ def agent_reward(self, agent, world):
                 pos_rew += 5
             pos_rew -= min(
                 [np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in good_agents])
-        return pos_rew + adv_rew
+        return pos_rew + adv_rew   #Rewards are a simple int
 
+    #Adversaries are given rewards
     def adversary_reward(self, agent, world):
         # Rewarded based on proximity to the goal landmark
         shaped_reward = True
@@ -118,6 +119,7 @@ def adversary_reward(self, agent, world):
             return adv_rew
 
 
+    #What is passed to the agent ie How they see the world
     def observation(self, agent, world):
         # get positions of all entities in this agent's reference frame
         entity_pos = []

From 8a6cad26adef142c37ae83075faf1d0ae1325f38 Mon Sep 17 00:00:00 2001
From: zrysnd <43715612+zrysnd@users.noreply.github.com>
Date: Sat, 20 Jul 2019 13:20:05 -0400
Subject: [PATCH 19/56] Path for "scenario.py" in the documentation

---
 documentation.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/documentation.md b/documentation.md
index 4cb09cec2..a72097df1 100644
--- a/documentation.md
+++ b/documentation.md
@@ -4,7 +4,7 @@ In a simulation with `n` agents:
 
 1. bin/script.py loads - acts as main script
 2. Loads scenario
-    - `./multiagent/scenarios/scenario.py.make_world()`
+    - `./multiagent/scenario.py.make_world()`
 3. Loads multi-agent enviroment given scenario settings and world
     - `./multiagent/environment.py.MultiAgentEnv(Scenario.world())`
 4. Renders environment (initial render)

From fcbd86ba8989cd736fb3c34c2d535ce92e6e9691 Mon Sep 17 00:00:00 2001
From: Brin775 <43180128+Brin775@users.noreply.github.com>
Date: Sat, 20 Jul 2019 15:11:59 -0400
Subject: [PATCH 20/56] More info on simple_crypto.py

---
 changes.txt | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/changes.txt b/changes.txt
index 5e1d094fa..95529c034 100644
--- a/changes.txt
+++ b/changes.txt
@@ -12,5 +12,12 @@ environment.py:
 
 simple_crypto.py:
 - line 121: array in conditional can potentially be a boolean rather than an ndarray so it will have no .all() method
+    - Similar statements appear on lines 104, 109
 - line 122: the sizes in the expression `agent.state.c - agent.goal_a.color` are mismatched and it is unclear where they
-  come from
\ No newline at end of file
+  come from
+    - Similar statements appear on lines 107, 112
+- [Austen] I got it to run by removing the .all() calls and changing lines like 
+        np.sum(np.square(agent.state.c - agent.goal_a.color))
+            to
+        np.square(len(agent.state.c)-len(agent.goal_a.color))
+        Not sure if scenario still functions correctly / if reward calculations are accurate

From b7ceac72940d001fb0f1546d324412d303a5fe27 Mon Sep 17 00:00:00 2001
From: zrysnd <43715612+zrysnd@users.noreply.github.com>
Date: Sat, 20 Jul 2019 15:36:48 -0400
Subject: [PATCH 21/56] More on multiagent/core.py

---
 documentation.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/documentation.md b/documentation.md
index a72097df1..11ed36a06 100644
--- a/documentation.md
+++ b/documentation.md
@@ -66,7 +66,7 @@ You can create new scenarios by implementing the first 4 functions above (`make_
 
 ## Miscellaneous
 
-- `./multiagent/core.py`: contains classes for various objects (Entities, Landmarks, Agents, etc.) that are used throughout the code.
+- `./multiagent/core.py`: contains classes for various objects (Entities, Landmarks, Agents, etc.) that are used throughout the code.(used for creating a scenario. We might need customized entities, agents for our own scenarios.)
 
 - `./multiagent/rendering.py`: used for displaying agent behaviors on the screen.
 

From 4314fb66f15b9aa10e61161415fa5e6bf766ca0d Mon Sep 17 00:00:00 2001
From: jarbus <jarbus@tutanota.com>
Date: Sat, 20 Jul 2019 16:06:28 -0400
Subject: [PATCH 22/56] compatible?

---
 multiagent/rendering.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/multiagent/rendering.py b/multiagent/rendering.py
index 3962d8a04..d72f1b98e 100644
--- a/multiagent/rendering.py
+++ b/multiagent/rendering.py
@@ -11,7 +11,7 @@
         os.environ['DYLD_FALLBACK_LIBRARY_PATH'] += ':/usr/lib'
         # (JDS 2016/04/15): avoid bug on Anaconda 2.3.0 / Yosemite
 
-from gym.utils import reraise
+#from gym.utils import reraise
 from gym import error
 
 try:
@@ -353,4 +353,4 @@ def close(self):
             self.window.close()
             self.isopen = False
     def __del__(self):
-        self.close()
\ No newline at end of file
+        self.close()

From 28de1ad212865d5546af8952aab1ab891634353f Mon Sep 17 00:00:00 2001
From: Devak Patel <devakusa@gmail.com>
Date: Sat, 20 Jul 2019 16:22:41 -0400
Subject: [PATCH 23/56] Added race scenario

---
 ScenarioIdeas.md | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/ScenarioIdeas.md b/ScenarioIdeas.md
index b2fa57b3d..ca603279a 100644
--- a/ScenarioIdeas.md
+++ b/ScenarioIdeas.md
@@ -3,9 +3,9 @@
 Generated at: https://www.tablesgenerator.com/markdown_tables
 
 |  | Possible Actions | Rewards per Outcome | Properties of other entities | Nash Equilibrium | Other Notes |
-|----|-----------------------|--------------------------------------------------------------------------------------------------------------------------|-------------------------------------|------------------|-------------|
+|----|-----------------------|--------------------------------------------------------------------------------------------------------------------------|-------------------------------------|-------------------------|-------------|
 | #1 | Expand, attack, trade | Expanding + attacking spends resources for greater resource bonuses later. Trading gives bonus resources for both agents | No other entities other than agents | Attack |  |
-| #2 |  |  |  |  |  |
+| #2 | Move x steps | Agents try to close distance to flag | No other entities other than agents | Move as far as possible |  |
 | #3 |  |  |  |  |  |
 | #4 |  |  |  |  |  |
 | #5 |  |  |  |  |  |
@@ -21,4 +21,5 @@ If A attacks B, spending 5 resources; B attacks A spending 6 resources, B takes
 ### Possible expansion:
 Add defend action, which blocks attack, but opponent agent gains bigger bonus resource if they try to trade. 
 
-
+## Idea 2. Race
+2D plane where agents try to race to their landmark. Agents can take any x number of steps to advance to the landmark. If the sum of all the steps taken by the agents(y) exceeds z, then all agents that moved get moved backwards w steps. Agents are rewarded for reaching the landmark. 

From dd886dd9ac7698ea933f1636ba944cd8905fce62 Mon Sep 17 00:00:00 2001
From: Devak Patel <devakusa@gmail.com>
Date: Sat, 20 Jul 2019 16:23:31 -0400
Subject: [PATCH 24/56] Update ScenarioIdeas.md

---
 ScenarioIdeas.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ScenarioIdeas.md b/ScenarioIdeas.md
index ca603279a..795d9709b 100644
--- a/ScenarioIdeas.md
+++ b/ScenarioIdeas.md
@@ -22,4 +22,4 @@ If A attacks B, spending 5 resources; B attacks A spending 6 resources, B takes
 Add defend action, which blocks attack, but opponent agent gains bigger bonus resource if they try to trade. 
 
 ## Idea 2. Race
-2D plane where agents try to race to their landmark. Agents can take any x number of steps to advance to the landmark. If the sum of all the steps taken by the agents(y) exceeds z, then all agents that moved get moved backwards w steps. Agents are rewarded for reaching the landmark. 
+2D plane where agents try to race to their landmark. Agents can take any x number of steps to advance to the landmark. If the sum of all the steps taken by the agents(y) exceeds z, then all agents that moved get moved backwards w steps. Agents are rewarded for reaching the landmark. Parameters y, z, and the initial distance for each agent to the landmark can be varied for balance and to compare agent behavior.

From 77e94869237f1ad08be6614e0265af4860f04209 Mon Sep 17 00:00:00 2001
From: dpakalarry <devakusa@gmail.com>
Date: Sat, 20 Jul 2019 16:38:24 -0400
Subject: [PATCH 25/56] Update testing.py

---
 multiagent/scenarios/testing.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/multiagent/scenarios/testing.py b/multiagent/scenarios/testing.py
index 9a46a9817..70c448c14 100644
--- a/multiagent/scenarios/testing.py
+++ b/multiagent/scenarios/testing.py
@@ -8,11 +8,11 @@ class Scenario(BaseScenario):
     def make_world(self):
         world = World()	#World has agents and landmarks
         # set any world properties first
-        world.dim_c = 2
-        num_agents = 5
+        world.dim_c = 0
+        num_agents = 2      #Change this to add agents
         world.num_agents = num_agents
         num_adversaries = 0
-        num_landmarks = num_agents - 1
+        num_landmarks = num_agents
         # add agents
         world.agents = [Agent() for i in range(num_agents)]
         for i, agent in enumerate(world.agents):
@@ -45,14 +45,14 @@ def reset_world(self, world):
         goal.color = np.array([0.15, 0.65, 0.15])
         for agent in world.agents:
             agent.goal_a = goal
-        # set random initial states
+        # set random initial states     TODO: Initialize agents + landmarks to set positions with 0 velocity
         for agent in world.agents:
             agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
-            agent.state.p_vel = np.zeros(world.dim_p)
+            agent.state.p_vel = 0
             agent.state.c = np.zeros(world.dim_c)
         for i, landmark in enumerate(world.landmarks):
             landmark.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
-            landmark.state.p_vel = np.zeros(world.dim_p)
+            landmark.state.p_vel = 0
 
     def benchmark_data(self, agent, world):
         # returns data for benchmarking purposes
@@ -77,7 +77,7 @@ def reward(self, agent, world):
         # Agents are rewarded based on minimum agent distance to each landmark
         return self.adversary_reward(agent, world) if agent.adversary else self.agent_reward(agent, world)
 
-    def agent_reward(self, agent, world):
+    def agent_reward(self, agent, world):   #TODO: set reward to distance to goal landmark, remove adversary stuff
         # Rewarded based on how close any good agent is to the goal landmark, and how far the adversary is from it
         shaped_reward = True
         shaped_adv_reward = True
@@ -129,7 +129,7 @@ def observation(self, agent, world):
         entity_color = []
         for entity in world.landmarks:
             entity_color.append(entity.color)
-        # communication of all other agents
+        # communication of all other Agents    TODO: remove communication
         other_pos = []
         for other in world.agents:
             if other is agent: continue

From f643ac2a893670a1ae5dc4129176dd51ec760312 Mon Sep 17 00:00:00 2001
From: dpakalarry <devakusa@gmail.com>
Date: Sat, 20 Jul 2019 16:55:53 -0400
Subject: [PATCH 26/56] Update testing.py

---
 multiagent/scenarios/testing.py | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/multiagent/scenarios/testing.py b/multiagent/scenarios/testing.py
index 70c448c14..3ed98c815 100644
--- a/multiagent/scenarios/testing.py
+++ b/multiagent/scenarios/testing.py
@@ -40,18 +40,15 @@ def reset_world(self, world):
         # random properties for landmarks
         for i, landmark in enumerate(world.landmarks):
             landmark.color = np.array([0.15, 0.15, 0.15])
-        # set goal landmark
-        goal = np.random.choice(world.landmarks)
-        goal.color = np.array([0.15, 0.65, 0.15])
         for agent in world.agents:
             agent.goal_a = goal
         # set random initial states     TODO: Initialize agents + landmarks to set positions with 0 velocity
-        for agent in world.agents:
-            agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
+        for i, agent in enumerate(world.agents):
+            agent.state.p_pos = np.array([i/2,0])
             agent.state.p_vel = 0
             agent.state.c = np.zeros(world.dim_c)
         for i, landmark in enumerate(world.landmarks):
-            landmark.state.p_pos = np.random.uniform(-1, +1, world.dim_p)
+            landmark.state.p_pos = np.array([i,5])
             landmark.state.p_vel = 0
 
     def benchmark_data(self, agent, world):
@@ -69,15 +66,16 @@ def benchmark_data(self, agent, world):
     def good_agents(self, world):
         return [agent for agent in world.agents if not agent.adversary]
 
-    # return all adversarial agents
-    def adversaries(self, world):
-        return [agent for agent in world.agents if agent.adversary]
+    # # return all adversarial agents
+    # def adversaries(self, world):
+    #     return [agent for agent in world.agents if agent.adversary]
 
     def reward(self, agent, world):
+        return self.agent_reward(agent,world)
         # Agents are rewarded based on minimum agent distance to each landmark
-        return self.adversary_reward(agent, world) if agent.adversary else self.agent_reward(agent, world)
+        # return self.adversary_reward(agent, world) if agent.adversary else self.agent_reward(agent, world)
 
-    def agent_reward(self, agent, world):   #TODO: set reward to distance to goal landmark, remove adversary stuff
+    def agent_reward(self, agent, world):   #TODO: set reward to distance to their landmark, remove adversary stuff
         # Rewarded based on how close any good agent is to the goal landmark, and how far the adversary is from it
         shaped_reward = True
         shaped_adv_reward = True

From bd12a438ae017ce17d87cf080a952b441e24e77e Mon Sep 17 00:00:00 2001
From: linlinbest <444053358@qq.com>
Date: Sat, 20 Jul 2019 17:23:23 -0400
Subject: [PATCH 27/56] Modified policy.py so that agents can go to the
 landmark automatically. No learning algorithms implemented yet.

---
 multiagent/policy.py | 35 ++++++++++++++++++++++++++++++-----
 1 file changed, 30 insertions(+), 5 deletions(-)

diff --git a/multiagent/policy.py b/multiagent/policy.py
index cf9ad0e1b..8d70f9aef 100644
--- a/multiagent/policy.py
+++ b/multiagent/policy.py
@@ -1,6 +1,8 @@
 import numpy as np
 from pyglet.window import key
 
+from multiagent.scenarios.simple import Scenario
+
 # individual agent policy
 class Policy(object):
     def __init__(self):
@@ -14,6 +16,7 @@ class InteractivePolicy(Policy):
     def __init__(self, env, agent_index):
         super(InteractivePolicy, self).__init__()
         self.env = env
+        #self.agent_index = agent_index
         # hard-coded keyboard events
         self.move = [False for i in range(4)]
         self.comm = [False for i in range(env.world.dim_c)]
@@ -23,6 +26,28 @@ def __init__(self, env, agent_index):
 
     def action(self, obs):
         # ignore observation and just act based on keyboard events
+
+        
+        #x_axis = self.env.agents[self.agent_index].state.p_pos[0]
+        #y_axis = self.env.agents[self.agent_index].state.p_pos[1]
+
+        if obs[2] < 0:
+            self.move[1] = True
+        elif obs[2] > 0:
+            self.move[0] = True
+        else:
+            self.move[0] = False
+            self.move[1] = False
+
+        if obs[3] > 0:
+            self.move[3] = True
+        elif obs[3] < 0:
+            self.move[2] = True
+        else:
+            self.move[2] = False
+            self.move[3] = False
+        
+
         if self.env.discrete_action_input:
             u = 0
             if self.move[0]: u = 1
@@ -31,12 +56,12 @@ def action(self, obs):
             if self.move[3]: u = 3
         else:
             u = np.zeros(5) # 5-d because of no-move action
-            if self.move[0]: u[1] += 1.0
-            if self.move[1]: u[2] += 1.0
-            if self.move[3]: u[3] += 1.0
-            if self.move[2]: u[4] += 1.0
+            if self.move[0]: u[1] += 0.01
+            if self.move[1]: u[2] += 0.01
+            if self.move[3]: u[3] += 0.01
+            if self.move[2]: u[4] += 0.01
             if True not in self.move:
-                u[0] += 1.0
+                u[0] += 0.01
         return np.concatenate([u, np.zeros(self.env.world.dim_c)])
 
     # keyboard event callbacks

From f26f54c84adda56c96be5b162c9a7abe36a6b08f Mon Sep 17 00:00:00 2001
From: zrysnd <43715612+zrysnd@users.noreply.github.com>
Date: Sat, 20 Jul 2019 17:37:02 -0400
Subject: [PATCH 28/56] more details

---
 documentation.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/documentation.md b/documentation.md
index 11ed36a06..c4721afe8 100644
--- a/documentation.md
+++ b/documentation.md
@@ -62,7 +62,7 @@ A BaseScenario `multiagent/scenario.py` incorporates at least `make_world()` and
     4) `observation(agent, world)`: defines the observation space of a given agent
     5) (optional) `benchmark_data()`: provides diagnostic data for policies trained on the environment (e.g. evaluation metrics)
 
-You can create new scenarios by implementing the first 4 functions above (`make_world()`, `reset_world()`, `reward()`, and `observation()`).
+You can create new scenarios by implementing the first 4 functions above (`make_world()`, `reset_world()`, `reward()`, and `observation()`), and have to keep the same function signature(can't not change parameters).
 
 ## Miscellaneous
 

From ab27aaa82c3763b3d14242f4736ccd285991671c Mon Sep 17 00:00:00 2001
From: zrysnd <43715612+zrysnd@users.noreply.github.com>
Date: Sat, 20 Jul 2019 17:39:33 -0400
Subject: [PATCH 29/56] more details

---
 documentation.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/documentation.md b/documentation.md
index c4721afe8..4cdb2c007 100644
--- a/documentation.md
+++ b/documentation.md
@@ -62,7 +62,7 @@ A BaseScenario `multiagent/scenario.py` incorporates at least `make_world()` and
     4) `observation(agent, world)`: defines the observation space of a given agent
     5) (optional) `benchmark_data()`: provides diagnostic data for policies trained on the environment (e.g. evaluation metrics)
 
-You can create new scenarios by implementing the first 4 functions above (`make_world()`, `reset_world()`, `reward()`, and `observation()`), and have to keep the same function signature(can't not change parameters).
+You can create new scenarios by implementing the first 4 functions above (`make_world()`, `reset_world()`, `reward()`, and `observation()`), and have to keep the same function signature(can't not change parameters), unless we all make changes to multiagent/environment.
 
 ## Miscellaneous
 

From b1928194851c965cc2615fcaaa644f7f92dce5e5 Mon Sep 17 00:00:00 2001
From: zrysnd <503591415@qq.com>
Date: Sat, 20 Jul 2019 17:41:59 -0400
Subject: [PATCH 30/56] added reward and observation function to BaseScenario

---
 multiagent/scenario.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/multiagent/scenario.py b/multiagent/scenario.py
index 02d86773e..1718ea9bf 100644
--- a/multiagent/scenario.py
+++ b/multiagent/scenario.py
@@ -8,3 +8,7 @@ def make_world(self):
     # create initial conditions of the world
     def reset_world(self, world):
         raise NotImplementedError()
+    def reward(self, agent, world):
+    	raise NotImplementedError()
+    def observation(self, agent, world):
+    	raise NotImplementedError()

From 51e2cb83b3c5c99d0313ebff6ea4b74d223e7b1c Mon Sep 17 00:00:00 2001
From: zrysnd <503591415@qq.com>
Date: Sat, 20 Jul 2019 18:09:43 -0400
Subject: [PATCH 31/56] a customized scenario

---
 multiagent/scenarios/cus.py | 53 +++++++++++++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)
 create mode 100644 multiagent/scenarios/cus.py

diff --git a/multiagent/scenarios/cus.py b/multiagent/scenarios/cus.py
new file mode 100644
index 000000000..7e169701a
--- /dev/null
+++ b/multiagent/scenarios/cus.py
@@ -0,0 +1,53 @@
+import numpy as np
+from multiagent.core import World, Agent, Landmark
+from multiagent.scenario import BaseScenario
+
+class Scenario(BaseScenario):
+    def make_world(self):
+        world = World()
+        # add agents
+        world.agents = [Agent() for i in range(1)]
+        for i, agent in enumerate(world.agents):
+            agent.name = 'agent %d' % i
+            agent.collide = False
+            agent.silent = True
+        # add landmarks
+        world.landmarks = [Landmark() for i in range(1)]
+        for i, landmark in enumerate(world.landmarks):
+            landmark.name = 'landmark %d' % i
+            landmark.collide = False
+            landmark.movable = False
+        # make initial conditions
+        self.reset_world(world)
+        return world
+
+    def reset_world(self, world):
+        # random properties for agents
+        for i, agent in enumerate(world.agents):
+            agent.color = np.array([0.25,0.25,0.25])
+        # random properties for landmarks
+        for i, landmark in enumerate(world.landmarks):
+            landmark.color = np.array([0.75,0.75,0.75])
+        world.landmarks[0].color = np.array([0.75,0.25,0.25])
+        # set random initial states
+        for agent in world.agents:
+            agent.state.p_pos = np.random.uniform(-1,+1, world.dim_p)
+            agent.state.p_vel = np.zeros(world.dim_p)
+            agent.state.c = np.zeros(world.dim_c)
+        for i, landmark in enumerate(world.landmarks):
+            landmark.state.p_pos = np.random.uniform(-1,+1, world.dim_p)
+            landmark.state.p_vel = np.zeros(world.dim_p)
+
+    def reward(self, agent, world):
+        # dist2 = np.sum(np.square(agent.state.p_pos - world.landmarks[0].state.p_pos))
+        # dist2 = world.landmarks[0].state.p_pos
+        delta_pos = agent.state.p_pos - world.landmarks[0].state.p_pos
+        dist = np.sqrt(np.sum(np.square(delta_pos)))
+        return dist
+
+    def observation(self, agent, world):
+        # get positions of all entities in this agent's reference frame
+        entity_pos = []
+        for entity in world.landmarks:
+            entity_pos.append(entity.state.p_pos - agent.state.p_pos)
+        return np.concatenate([agent.state.p_vel] + entity_pos)

From ee9d0687efac7632252f1f65da4413363117fa0c Mon Sep 17 00:00:00 2001
From: zrysnd <503591415@qq.com>
Date: Sat, 20 Jul 2019 18:20:08 -0400
Subject: [PATCH 32/56] adding dictionary<agent, landmark>

---
 multiagent/scenarios/cus.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/multiagent/scenarios/cus.py b/multiagent/scenarios/cus.py
index 7e169701a..0b65e0342 100644
--- a/multiagent/scenarios/cus.py
+++ b/multiagent/scenarios/cus.py
@@ -3,6 +3,10 @@
 from multiagent.scenario import BaseScenario
 
 class Scenario(BaseScenario):
+    def __init__(self):
+    	super(Scenario, self).__init__()
+    	self.agentsToLandMarks = None
+    	
     def make_world(self):
         world = World()
         # add agents

From e2f2fdeb19052ceb0c1a2a18b4f18b8e9e19d74f Mon Sep 17 00:00:00 2001
From: zrysnd <503591415@qq.com>
Date: Sat, 20 Jul 2019 18:34:54 -0400
Subject: [PATCH 33/56] remards based on distance between agent and its target

---
 multiagent/scenarios/cus.py | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/multiagent/scenarios/cus.py b/multiagent/scenarios/cus.py
index 0b65e0342..4de51ce58 100644
--- a/multiagent/scenarios/cus.py
+++ b/multiagent/scenarios/cus.py
@@ -5,22 +5,27 @@
 class Scenario(BaseScenario):
     def __init__(self):
     	super(Scenario, self).__init__()
-    	self.agentsToLandMarks = None
-    	
+    	self.agentsToLandMarks = {}
+
     def make_world(self):
         world = World()
         # add agents
-        world.agents = [Agent() for i in range(1)]
+        numberOfAgents = 2;
+        world.agents = [Agent() for i in range(numberOfAgents)]
         for i, agent in enumerate(world.agents):
             agent.name = 'agent %d' % i
             agent.collide = False
             agent.silent = True
         # add landmarks
-        world.landmarks = [Landmark() for i in range(1)]
+        world.landmarks = [Landmark() for i in range(numberOfAgents)]
         for i, landmark in enumerate(world.landmarks):
             landmark.name = 'landmark %d' % i
             landmark.collide = False
             landmark.movable = False
+        #fill in the dictionary
+        for i in range(numberOfAgents):
+        	self.agentsToLandMarks.update({ world.agents[i]: world.landmarks[i] })
+
         # make initial conditions
         self.reset_world(world)
         return world
@@ -45,7 +50,7 @@ def reset_world(self, world):
     def reward(self, agent, world):
         # dist2 = np.sum(np.square(agent.state.p_pos - world.landmarks[0].state.p_pos))
         # dist2 = world.landmarks[0].state.p_pos
-        delta_pos = agent.state.p_pos - world.landmarks[0].state.p_pos
+        delta_pos = agent.state.p_pos - self.agentsToLandMarks[agent].state.p_pos
         dist = np.sqrt(np.sum(np.square(delta_pos)))
         return dist
 

From 54d39c6b8b3560a579960551b477e295c42c0b72 Mon Sep 17 00:00:00 2001
From: zrysnd <503591415@qq.com>
Date: Sat, 20 Jul 2019 18:38:22 -0400
Subject: [PATCH 34/56] minor changes

---
 multiagent/scenarios/cus.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/multiagent/scenarios/cus.py b/multiagent/scenarios/cus.py
index 4de51ce58..8b53fff0d 100644
--- a/multiagent/scenarios/cus.py
+++ b/multiagent/scenarios/cus.py
@@ -10,7 +10,7 @@ def __init__(self):
     def make_world(self):
         world = World()
         # add agents
-        numberOfAgents = 2;
+        numberOfAgents = 1;
         world.agents = [Agent() for i in range(numberOfAgents)]
         for i, agent in enumerate(world.agents):
             agent.name = 'agent %d' % i

From 883ccaf378d0b051962538ce6b0b8d345e13d5af Mon Sep 17 00:00:00 2001
From: zrysnd <503591415@qq.com>
Date: Sat, 20 Jul 2019 18:39:52 -0400
Subject: [PATCH 35/56] leave policy unchanged for now

---
 multiagent/policy.py | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/multiagent/policy.py b/multiagent/policy.py
index 8d70f9aef..ff2e8997a 100644
--- a/multiagent/policy.py
+++ b/multiagent/policy.py
@@ -31,21 +31,21 @@ def action(self, obs):
         #x_axis = self.env.agents[self.agent_index].state.p_pos[0]
         #y_axis = self.env.agents[self.agent_index].state.p_pos[1]
 
-        if obs[2] < 0:
-            self.move[1] = True
-        elif obs[2] > 0:
-            self.move[0] = True
-        else:
-            self.move[0] = False
-            self.move[1] = False
+        # if obs[2] < 0:
+        #     self.move[1] = True
+        # elif obs[2] > 0:
+        #     self.move[0] = True
+        # else:
+        #     self.move[0] = False
+        #     self.move[1] = False
 
-        if obs[3] > 0:
-            self.move[3] = True
-        elif obs[3] < 0:
-            self.move[2] = True
-        else:
-            self.move[2] = False
-            self.move[3] = False
+        # if obs[3] > 0:
+        #     self.move[3] = True
+        # elif obs[3] < 0:
+        #     self.move[2] = True
+        # else:
+        #     self.move[2] = False
+        #     self.move[3] = False
         
 
         if self.env.discrete_action_input:

From 4753e06700f42d5fb8406b90a00156c399734d41 Mon Sep 17 00:00:00 2001
From: zrysnd <503591415@qq.com>
Date: Sat, 20 Jul 2019 18:44:00 -0400
Subject: [PATCH 36/56] environment no longer printing message, leave printing
 in script

---
 multiagent/environment.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/multiagent/environment.py b/multiagent/environment.py
index 69efccde1..86df92de0 100644
--- a/multiagent/environment.py
+++ b/multiagent/environment.py
@@ -210,7 +210,7 @@ def render(self, mode='human'):
                     else:
                         word = alphabet[np.argmax(other.state.c)]
                     message += (other.name + ' to ' + agent.name + ': ' + word + '   ')
-            print(message)
+            # print(message)
 
         for i in range(len(self.viewers)):
             # create viewers (if necessary)

From b15177410a3d509017ba1d99ec57bca1136bf3ad Mon Sep 17 00:00:00 2001
From: zrysnd <503591415@qq.com>
Date: Sat, 20 Jul 2019 18:53:55 -0400
Subject: [PATCH 37/56] agent landmark position fixed

---
 multiagent/scenarios/cus.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/multiagent/scenarios/cus.py b/multiagent/scenarios/cus.py
index 8b53fff0d..20c254caf 100644
--- a/multiagent/scenarios/cus.py
+++ b/multiagent/scenarios/cus.py
@@ -10,7 +10,7 @@ def __init__(self):
     def make_world(self):
         world = World()
         # add agents
-        numberOfAgents = 1;
+        numberOfAgents = 2;
         world.agents = [Agent() for i in range(numberOfAgents)]
         for i, agent in enumerate(world.agents):
             agent.name = 'agent %d' % i
@@ -36,15 +36,17 @@ def reset_world(self, world):
             agent.color = np.array([0.25,0.25,0.25])
         # random properties for landmarks
         for i, landmark in enumerate(world.landmarks):
-            landmark.color = np.array([0.75,0.75,0.75])
+            landmark.color = np.array([0.75,0.25,0.25])
         world.landmarks[0].color = np.array([0.75,0.25,0.25])
         # set random initial states
-        for agent in world.agents:
-            agent.state.p_pos = np.random.uniform(-1,+1, world.dim_p)
+        for i,agent in enumerate(world.agents):
+            # agent.state.p_pos = np.random.uniform(-1,+1, world.dim_p)
+            agent.state.p_pos = np.array([i/2,0])
             agent.state.p_vel = np.zeros(world.dim_p)
             agent.state.c = np.zeros(world.dim_c)
         for i, landmark in enumerate(world.landmarks):
-            landmark.state.p_pos = np.random.uniform(-1,+1, world.dim_p)
+            # landmark.state.p_pos = np.random.uniform(-1,+1, world.dim_p)
+            landmark.state.p_pos = np.array([i/2,0.75])
             landmark.state.p_vel = np.zeros(world.dim_p)
 
     def reward(self, agent, world):

From 1ff2b05785138ad422dbf412c9ff50cd948847cd Mon Sep 17 00:00:00 2001
From: zrysnd <503591415@qq.com>
Date: Sat, 20 Jul 2019 18:57:19 -0400
Subject: [PATCH 38/56] more reward closer

---
 multiagent/scenarios/cus.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/multiagent/scenarios/cus.py b/multiagent/scenarios/cus.py
index 20c254caf..c32da49e2 100644
--- a/multiagent/scenarios/cus.py
+++ b/multiagent/scenarios/cus.py
@@ -54,7 +54,7 @@ def reward(self, agent, world):
         # dist2 = world.landmarks[0].state.p_pos
         delta_pos = agent.state.p_pos - self.agentsToLandMarks[agent].state.p_pos
         dist = np.sqrt(np.sum(np.square(delta_pos)))
-        return dist
+        return -dist
 
     def observation(self, agent, world):
         # get positions of all entities in this agent's reference frame

From 8fbb1bf421ad6875c497acccfe387ed41b397594 Mon Sep 17 00:00:00 2001
From: zrysnd <43715612+zrysnd@users.noreply.github.com>
Date: Sat, 20 Jul 2019 19:04:22 -0400
Subject: [PATCH 39/56] documenting visualization

---
 documentation.md | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/documentation.md b/documentation.md
index 4cdb2c007..731e40296 100644
--- a/documentation.md
+++ b/documentation.md
@@ -70,3 +70,8 @@ You can create new scenarios by implementing the first 4 functions above (`make_
 
 - `./multiagent/rendering.py`: used for displaying agent behaviors on the screen.
 
+## Visualization:
+
+1.Each agent will have one corresponding window generated for it, agents always locate at the center of the camera in its      own wondow.  
+2.In the interactive policy, pressing -> will make the agent go left in the world, but everything else goes right in its window(since it's also at the center of its own window).
+

From c358b4b40489e559f39e523be8a213fa09160582 Mon Sep 17 00:00:00 2001
From: zrysnd <43715612+zrysnd@users.noreply.github.com>
Date: Sat, 20 Jul 2019 19:06:37 -0400
Subject: [PATCH 40/56] documenting visualization

---
 documentation.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/documentation.md b/documentation.md
index 731e40296..f7e697ccf 100644
--- a/documentation.md
+++ b/documentation.md
@@ -72,6 +72,6 @@ You can create new scenarios by implementing the first 4 functions above (`make_
 
 ## Visualization:
 
-1.Each agent will have one corresponding window generated for it, agents always locate at the center of the camera in its      own wondow.  
-2.In the interactive policy, pressing -> will make the agent go left in the world, but everything else goes right in its window(since it's also at the center of its own window).
+1. Each agent will have one corresponding window generated for itself, agents always locate at the center of the camera in     its own wondow.    
+2.  In the interactive policy, pressing -> will make the agent go left in the world, but everything else goes right in its  own window(since it's always at the center of its own window).  
 

From 2c2d347f3cc618d4be161bef7b6b7178b23301ad Mon Sep 17 00:00:00 2001
From: Brin775 <43180128+Brin775@users.noreply.github.com>
Date: Sun, 21 Jul 2019 13:02:52 -0400
Subject: [PATCH 41/56] Added race.py (not finished)

---
 multiagent/scenarios/race.py | 59 ++++++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)
 create mode 100644 multiagent/scenarios/race.py

diff --git a/multiagent/scenarios/race.py b/multiagent/scenarios/race.py
new file mode 100644
index 000000000..f4a21b57f
--- /dev/null
+++ b/multiagent/scenarios/race.py
@@ -0,0 +1,59 @@
+import numpy as np
+from multiagent.core import World, Agent, Landmark
+from multiagent.scenario import BaseScenario
+
+class Scenario(BaseScenario):
+    def make_world(self):
+        world = World()
+        # add agents
+        world.agents = [Agent() for i in range(2)]
+        for i, agent in enumerate(world.agents):
+            agent.name = 'agent %d' % i
+            agent.collide = False
+            agent.silent = True
+        # add landmarks
+        world.landmarks = [Landmark() for i in range(2)]
+        for i, landmark in enumerate(world.landmarks):
+            landmark.name = 'landmark %d' % i
+            landmark.collide = False
+            landmark.movable = False
+        # make initial conditions
+        self.reset_world(world)
+        return world
+
+    def reset_world(self, world):
+        # random properties for agents
+        for i, agent in enumerate(world.agents):
+            agent.color = np.array([0.25,0.25,0.25])
+        # random properties for landmarks
+        for i, landmark in enumerate(world.landmarks):
+            landmark.color = np.array([0.75,0.75,0.75])
+        world.landmarks[0].color = np.array([0.75,0.25,0.25])
+        world.landmarks[1].color = np.array([0.75,0.25,0.25])
+        # set random initial states
+        #for agent in world.agents:
+            #agent.state.p_pos = np.random.uniform(-1,+1, world.dim_p)
+        world.agents[0].state.p_pos = np.array([0.0,0.0])
+        world.agents[0].state.p_vel = np.zeros(world.dim_p)
+        world.agents[0].state.c = np.zeros(world.dim_c)
+
+        world.agents[1].state.p_pos = np.array([0.5,0.0])
+        world.agents[1].state.p_vel = np.zeros(world.dim_p)
+        world.agents[1].state.c = np.zeros(world.dim_c)
+
+        for i, landmark in enumerate(world.landmarks):
+            #landmark.state.p_pos = np.random.uniform(-1,+1, world.dim_p)
+            landmark.state.p_pos = np.array([0.0 + i*0.5, 5.0])
+            landmark.state.p_vel = np.zeros(world.dim_p)
+
+    def reward(self, agent, world):
+        #dist2 = np.sum(np.square(agent.state.p_pos - world.landmarks[0].state.p_pos))
+        dist2 = np.sum(np.square(agent.state.p_pos - np.array([agent.state.p_pos[0], 5.0])))
+        return -dist2
+
+    def observation(self, agent, world):
+        # get positions of all entities in this agent's reference frame
+        entity_pos = []
+        for entity in world.landmarks:
+            entity_pos.append(entity.state.p_pos - agent.state.p_pos)
+        return np.concatenate([agent.state.p_vel] + entity_pos)

From 927f504a13883123554f4e18abf398a3e9e98605 Mon Sep 17 00:00:00 2001
From: jarbus <jarbus@tutanota.com>
Date: Sun, 21 Jul 2019 13:22:24 -0400
Subject: [PATCH 42/56] Tweak scenarioideas.md

---
 ScenarioIdeas.md | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/ScenarioIdeas.md b/ScenarioIdeas.md
index 795d9709b..0e9f6169d 100644
--- a/ScenarioIdeas.md
+++ b/ScenarioIdeas.md
@@ -5,7 +5,7 @@ Generated at: https://www.tablesgenerator.com/markdown_tables
 |  | Possible Actions | Rewards per Outcome | Properties of other entities | Nash Equilibrium | Other Notes |
 |----|-----------------------|--------------------------------------------------------------------------------------------------------------------------|-------------------------------------|-------------------------|-------------|
 | #1 | Expand, attack, trade | Expanding + attacking spends resources for greater resource bonuses later. Trading gives bonus resources for both agents | No other entities other than agents | Attack |  |
-| #2 | Move x steps | Agents try to close distance to flag | No other entities other than agents | Move as far as possible |  |
+| #2 | Move x steps | Reward = progress in last step | No other entities other than agents | Move as far as possible |  |
 | #3 |  |  |  |  |  |
 | #4 |  |  |  |  |  |
 | #5 |  |  |  |  |  |
@@ -22,4 +22,7 @@ If A attacks B, spending 5 resources; B attacks A spending 6 resources, B takes
 Add defend action, which blocks attack, but opponent agent gains bigger bonus resource if they try to trade. 
 
 ## Idea 2. Race
-2D plane where agents try to race to their landmark. Agents can take any x number of steps to advance to the landmark. If the sum of all the steps taken by the agents(y) exceeds z, then all agents that moved get moved backwards w steps. Agents are rewarded for reaching the landmark. Parameters y, z, and the initial distance for each agent to the landmark can be varied for balance and to compare agent behavior.
+2D plane where agents try to race to their landmark. Agents can take any x number of steps to advance to the landmark. If the sum of all the steps taken by the agents(y) exceeds z, then all agents that moved get moved backwards w steps. ~~Agents are rewarded for reaching the landmark~~ Agents are rewarded based off of how many steps they are able to take per turn. Parameters y, z, and the initial distance for each agent to the landmark can be varied for balance and to compare agent behavior.
+
+- The landmarks don't actually get taken into account for the rewards or observation, it's simply a visualization of how much progess each agent is able to make.
+- Agents will either have to be moved by the scenario via physics, or they can move based off of the reward recieved on their next action, the following turn.

From 2e9d4ec759d533494fc8fc52046da7fea7df90e1 Mon Sep 17 00:00:00 2001
From: jarbus <jarbus@tutanota.com>
Date: Sun, 21 Jul 2019 14:26:11 -0400
Subject: [PATCH 43/56] race tweaks

---
 ScenarioIdeas.md | 33 ++++++++++++++++++++++++---------
 1 file changed, 24 insertions(+), 9 deletions(-)

diff --git a/ScenarioIdeas.md b/ScenarioIdeas.md
index 0e9f6169d..82e8e2e58 100644
--- a/ScenarioIdeas.md
+++ b/ScenarioIdeas.md
@@ -2,13 +2,14 @@
 
 Generated at: https://www.tablesgenerator.com/markdown_tables
 
-|  | Possible Actions | Rewards per Outcome | Properties of other entities | Nash Equilibrium | Other Notes |
-|----|-----------------------|--------------------------------------------------------------------------------------------------------------------------|-------------------------------------|-------------------------|-------------|
-| #1 | Expand, attack, trade | Expanding + attacking spends resources for greater resource bonuses later. Trading gives bonus resources for both agents | No other entities other than agents | Attack |  |
-| #2 | Move x steps | Reward = progress in last step | No other entities other than agents | Move as far as possible |  |
-| #3 |  |  |  |  |  |
-| #4 |  |  |  |  |  |
-| #5 |  |  |  |  |  |
+|      | Possible Actions        | Rewards per Outcome                                                                                                        | Properties of other entities          | Nash Equilibrium          | Other Notes   |
+| ---- | ----------------------- | -------------------------------------------------------------------------------------------------------------------------- | ------------------------------------- | ------------------------- | ------------- |
+| #1   | Expand, attack, trade   | Expanding + attacking spends resources for greater resource bonuses later.                                                 | No other entities other than agents   | Attack                    |               |
+|      |                         | Trading gives bonus resources for both agents                                                                              |                                       |                           |               |
+| #2   | Move x steps            | Reward = progress in last step                                                                                             | No other entities other than agents   | Move as far as possible   |               |
+| #3   |                         |                                                                                                                            |                                       |                           |               |
+| #4   |                         |                                                                                                                            |                                       |                           |               |
+| #5   |                         |                                                                                                                            |                                       |                           |               |
 
 # Details:
 ## Idea 1. (Risk but on a grid)
@@ -22,7 +23,21 @@ If A attacks B, spending 5 resources; B attacks A spending 6 resources, B takes
 Add defend action, which blocks attack, but opponent agent gains bigger bonus resource if they try to trade. 
 
 ## Idea 2. Race
-2D plane where agents try to race to their landmark. Agents can take any x number of steps to advance to the landmark. If the sum of all the steps taken by the agents(y) exceeds z, then all agents that moved get moved backwards w steps. ~~Agents are rewarded for reaching the landmark~~ Agents are rewarded based off of how many steps they are able to take per turn. Parameters y, z, and the initial distance for each agent to the landmark can be varied for balance and to compare agent behavior.
 
-- The landmarks don't actually get taken into account for the rewards or observation, it's simply a visualization of how much progess each agent is able to make.
+#### World
+- 2D plane where agents try to race to their landmark.
+- Agents only move forward/backward parallel to each other
+- All agents start from the same location
+
+#### Rules
+- Agents can take any x number of steps to advance to the landmark.
+- If the sum of all the steps taken by the agents(y) exceeds z, then all agents that moved get moved backwards w steps. 
+- Agents are rewarded based off of how many steps they are successfully able to take per turn.
+    - Say each agent tries to take 10 steps, but because too many agents are trying to move in a turn, they all get moved back 5 steps. In this case, their reward would be -5, even though they tried to take 10 steps.
+
+#### Variables
+- Parameters y, z, and the initial distance for each agent to the landmark can be varied for balance and to compare agent behavior.
+
+#### Notes
+- The landmarks don't actually get taken into account for the rewards or observation, it's simply aiding visualization of how much progess each agent is able to make.
 - Agents will either have to be moved by the scenario via physics, or they can move based off of the reward recieved on their next action, the following turn.

From 2c178b702640e6eeefeb7d0b6fd77797fbadf627 Mon Sep 17 00:00:00 2001
From: dpakalarry <devakusa@gmail.com>
Date: Mon, 22 Jul 2019 16:58:28 -0400
Subject: [PATCH 44/56] Commenting

---
 multiagent/scenarios/testing.py | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/multiagent/scenarios/testing.py b/multiagent/scenarios/testing.py
index 3ed98c815..80324dc0a 100644
--- a/multiagent/scenarios/testing.py
+++ b/multiagent/scenarios/testing.py
@@ -71,6 +71,7 @@ def good_agents(self, world):
     #     return [agent for agent in world.agents if agent.adversary]
 
     def reward(self, agent, world):
+        return np.sum(np.s)
         return self.agent_reward(agent,world)
         # Agents are rewarded based on minimum agent distance to each landmark
         # return self.adversary_reward(agent, world) if agent.adversary else self.agent_reward(agent, world)
@@ -80,15 +81,15 @@ def agent_reward(self, agent, world):   #TODO: set reward to distance to their l
         shaped_reward = True
         shaped_adv_reward = True
 
-        # Calculate negative reward for adversary
-        adversary_agents = self.adversaries(world)
-        if shaped_adv_reward:  # distance-based adversary reward
-            adv_rew = sum([np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in adversary_agents])
-        else:  # proximity-based adversary reward (binary)
-            adv_rew = 0
-            for a in adversary_agents:
-                if np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) < 2 * a.goal_a.size:
-                    adv_rew -= 5
+        # # Calculate negative reward for adversary
+        # adversary_agents = self.adversaries(world)
+        # if shaped_adv_reward:  # distance-based adversary reward
+        #     adv_rew = sum([np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in adversary_agents])
+        # else:  # proximity-based adversary reward (binary)
+        #     adv_rew = 0
+        #     for a in adversary_agents:
+        #         if np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) < 2 * a.goal_a.size:
+        #             adv_rew -= 5
 
         # Calculate positive reward for agents
         good_agents = self.good_agents(world)

From 4f2b97c6a5767733b4527e87a185831d4c46bc67 Mon Sep 17 00:00:00 2001
From: dpakalarry <devakusa@gmail.com>
Date: Mon, 22 Jul 2019 20:00:44 -0400
Subject: [PATCH 45/56] Setup testing.py for the scenario

---
 multiagent/scenarios/testing.py | 108 +++++++++++++++++---------------
 1 file changed, 56 insertions(+), 52 deletions(-)

diff --git a/multiagent/scenarios/testing.py b/multiagent/scenarios/testing.py
index 80324dc0a..3cefa0755 100644
--- a/multiagent/scenarios/testing.py
+++ b/multiagent/scenarios/testing.py
@@ -4,7 +4,7 @@
 
 
 class Scenario(BaseScenario):
-
+    goalDist = 5.0;
     def make_world(self):
         world = World()	#World has agents and landmarks
         # set any world properties first
@@ -48,7 +48,7 @@ def reset_world(self, world):
             agent.state.p_vel = 0
             agent.state.c = np.zeros(world.dim_c)
         for i, landmark in enumerate(world.landmarks):
-            landmark.state.p_pos = np.array([i,5])
+            landmark.state.p_pos = np.array([i,goalDist])
             landmark.state.p_vel = 0
 
     def benchmark_data(self, agent, world):
@@ -70,69 +70,73 @@ def good_agents(self, world):
     # def adversaries(self, world):
     #     return [agent for agent in world.agents if agent.adversary]
 
+    #Simplified to just distance from y = 5;
     def reward(self, agent, world):
-        return np.sum(np.s)
-        return self.agent_reward(agent,world)
+        alpha = 0.5
+        return alpha * agent.state.p_pos[1] - (1-alpha) * 1/(world.num_agents-1)*sum([other.state.p_pos[1] for other in world.agents if other is not agent])
+        #Right now + for distance - average of the distance covered by other agents.
+
+
+        # return self.agent_reward(agent,world)
         # Agents are rewarded based on minimum agent distance to each landmark
         # return self.adversary_reward(agent, world) if agent.adversary else self.agent_reward(agent, world)
 
-    def agent_reward(self, agent, world):   #TODO: set reward to distance to their landmark, remove adversary stuff
-        # Rewarded based on how close any good agent is to the goal landmark, and how far the adversary is from it
-        shaped_reward = True
-        shaped_adv_reward = True
-
-        # # Calculate negative reward for adversary
-        # adversary_agents = self.adversaries(world)
-        # if shaped_adv_reward:  # distance-based adversary reward
-        #     adv_rew = sum([np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in adversary_agents])
-        # else:  # proximity-based adversary reward (binary)
-        #     adv_rew = 0
-        #     for a in adversary_agents:
-        #         if np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) < 2 * a.goal_a.size:
-        #             adv_rew -= 5
-
-        # Calculate positive reward for agents
-        good_agents = self.good_agents(world)
-        if shaped_reward:  # distance-based agent reward
-            pos_rew = -min(
-                [np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in good_agents])
-        else:  # proximity-based agent reward (binary)
-            pos_rew = 0
-            if min([np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in good_agents]) \
-                    < 2 * agent.goal_a.size:
-                pos_rew += 5
-            pos_rew -= min(
-                [np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in good_agents])
-        return pos_rew + adv_rew   #Rewards are a simple int
+    # def agent_reward(self, agent, world):   #TODO: set reward to distance to their landmark, remove adversary stuff
+    #     # Rewarded based on how close any good agent is to the goal landmark, and how far the adversary is from it
+    #     shaped_reward = True
+    #     shaped_adv_reward = True
+
+    #     # # Calculate negative reward for adversary
+    #     # adversary_agents = self.adversaries(world)
+    #     # if shaped_adv_reward:  # distance-based adversary reward
+    #     #     adv_rew = sum([np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in adversary_agents])
+    #     # else:  # proximity-based adversary reward (binary)
+    #     #     adv_rew = 0
+    #     #     for a in adversary_agents:
+    #     #         if np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) < 2 * a.goal_a.size:
+    #     #             adv_rew -= 5
+
+    #     # Calculate positive reward for agents
+    #     good_agents = self.good_agents(world)
+    #     if shaped_reward:  # distance-based agent reward
+    #         pos_rew = -min(
+    #             [np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in good_agents])
+    #     else:  # proximity-based agent reward (binary)
+    #         pos_rew = 0
+    #         if min([np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in good_agents]) \
+    #                 < 2 * agent.goal_a.size:
+    #             pos_rew += 5
+    #         pos_rew -= min(
+    #             [np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in good_agents])
+    #     return pos_rew + adv_rew   #Rewards are a simple int
 
     #Adversaries are given rewards
-    def adversary_reward(self, agent, world):
-        # Rewarded based on proximity to the goal landmark
-        shaped_reward = True
-        if shaped_reward:  # distance-based reward
-            return -np.sum(np.square(agent.state.p_pos - agent.goal_a.state.p_pos))
-        else:  # proximity-based reward (binary)
-            adv_rew = 0
-            if np.sqrt(np.sum(np.square(agent.state.p_pos - agent.goal_a.state.p_pos))) < 2 * agent.goal_a.size:
-                adv_rew += 5
-            return adv_rew
+    # def adversary_reward(self, agent, world):
+    #     # Rewarded based on proximity to the goal landmark
+    #     shaped_reward = True
+    #     if shaped_reward:  # distance-based reward
+    #         return -np.sum(np.square(agent.state.p_pos - agent.goal_a.state.p_pos))
+    #     else:  # proximity-based reward (binary)
+    #         adv_rew = 0
+    #         if np.sqrt(np.sum(np.square(agent.state.p_pos - agent.goal_a.state.p_pos))) < 2 * agent.goal_a.size:
+    #             adv_rew += 5
+    #         return adv_rew
 
 
     #What is passed to the agent ie How they see the world
     def observation(self, agent, world):
         # get positions of all entities in this agent's reference frame
-        entity_pos = []
-        for entity in world.landmarks:
-            entity_pos.append(entity.state.p_pos - agent.state.p_pos)
-        # entity colors
-        entity_color = []
-        for entity in world.landmarks:
-            entity_color.append(entity.color)
-        # communication of all other Agents    TODO: remove communication
+        # entity_pos = []
+        # for entity in world.landmarks:
+        #     entity_pos.append(entity.state.p_pos - agent.state.p_pos)
+        entity_pos = [goalDist - agent.state.p_pos[1]] #Should only need the distance to it's own landmark goal
+
+
+        # communication of all other Agents
         other_pos = []
         for other in world.agents:
-            if other is agent: continue
-            other_pos.append(other.state.p_pos - agent.state.p_pos)
+            # if other is agent: continue
+            other_pos.append(goalDist - other.state.p_pos[1]) #Agents know how far other agents are from their goals
 
         if not agent.adversary:
             return np.concatenate([agent.goal_a.state.p_pos - agent.state.p_pos] + entity_pos + other_pos)

From 7f59b61a0e0c5d3fa04b798e071df7d51c9edd0f Mon Sep 17 00:00:00 2001
From: dpakalarry <devakusa@gmail.com>
Date: Mon, 22 Jul 2019 20:01:22 -0400
Subject: [PATCH 46/56] Added comments

---
 multiagent/scenarios/testing.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/multiagent/scenarios/testing.py b/multiagent/scenarios/testing.py
index 3cefa0755..51c2d4f20 100644
--- a/multiagent/scenarios/testing.py
+++ b/multiagent/scenarios/testing.py
@@ -4,7 +4,8 @@
 
 
 class Scenario(BaseScenario):
-    goalDist = 5.0;
+    goalDist = 5.0; #Currently the distance to landmark
+
     def make_world(self):
         world = World()	#World has agents and landmarks
         # set any world properties first
@@ -72,7 +73,7 @@ def good_agents(self, world):
 
     #Simplified to just distance from y = 5;
     def reward(self, agent, world):
-        alpha = 0.5
+        alpha = 0.5 #Can be adjusted to determine whether individual performance, or ranked importance is more important [0,1]
         return alpha * agent.state.p_pos[1] - (1-alpha) * 1/(world.num_agents-1)*sum([other.state.p_pos[1] for other in world.agents if other is not agent])
         #Right now + for distance - average of the distance covered by other agents.
 

From 1f97b16821f66e98ac1e8fdc3a4946ab6585fbcd Mon Sep 17 00:00:00 2001
From: SimplySonder <46611486+SimplySonder@users.noreply.github.com>
Date: Wed, 24 Jul 2019 12:28:47 -0400
Subject: [PATCH 47/56] Added Idea 3 to ScenarioIdeas.md

Anthony added first draft of Hunger games Scenario
---
 ScenarioIdeas.md | 56 +++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 55 insertions(+), 1 deletion(-)

diff --git a/ScenarioIdeas.md b/ScenarioIdeas.md
index 82e8e2e58..217c4e80a 100644
--- a/ScenarioIdeas.md
+++ b/ScenarioIdeas.md
@@ -7,7 +7,7 @@ Generated at: https://www.tablesgenerator.com/markdown_tables
 | #1   | Expand, attack, trade   | Expanding + attacking spends resources for greater resource bonuses later.                                                 | No other entities other than agents   | Attack                    |               |
 |      |                         | Trading gives bonus resources for both agents                                                                              |                                       |                           |               |
 | #2   | Move x steps            | Reward = progress in last step                                                                                             | No other entities other than agents   | Move as far as possible   |               |
-| #3   |                         |                                                                                                                            |                                       |                           |               |
+| #3   | move, attack, loot, rest |  Staying alive, get gear, kills, winning                                                                                                         |     No other entities other than agents                                 |     live and loot                      |              |
 | #4   |                         |                                                                                                                            |                                       |                           |               |
 | #5   |                         |                                                                                                                            |                                       |                           |               |
 
@@ -41,3 +41,57 @@ Add defend action, which blocks attack, but opponent agent gains bigger bonus re
 #### Notes
 - The landmarks don't actually get taken into account for the rewards or observation, it's simply aiding visualization of how much progess each agent is able to make.
 - Agents will either have to be moved by the scenario via physics, or they can move based off of the reward recieved on their next action, the following turn.
+
+## Idea 3. Hunger Games
+
+#### World
+- 10 x 10 plane where agents try to be the last survivor
+- 12 agents start equidistant from each other in a circle
+    - Middle of circle is high tier loot
+- Set loot spawns with a set tier, but random loot
+- Structures agents can enter and be hidden from sight
+
+#### Agent
+- Main Attributes
+    - Attack Range
+    - Attack Power
+    - Def
+    - HP
+    - Stamina
+- Choices:
+    - Loot
+        - Sight limited
+    - Attack an adjacent agent
+        - Additional attack options possible with certain loot
+        - Uses Stamina
+    - Move
+        - Walk one space
+        - Run 2x fast w/ Stamina
+    - Rest
+        - Recover HP/Stamina
+#### Rewards
+- Kills are not intrinsically rewarded
+- Looting from chests/bodies result in a set reward value per tier/killcount, and additional reward from net stat gain
+- Time alive gives slight reward with each tick
+- Winning gives the highest reward
+
+#### Agent Variables
+- Environment Knowledge
+    - Excludes:
+        Chest loot status
+        Chest loot items
+        Alive/Dead Enemy location
+- Sight
+- Self position
+- Attributes
+- Loot
+- Kill count
+- Kill counts of other agents
+- List of Alive Agents
+- List of Dead Agents
+- Attributes of agents in Sight
+    
+#### Notes
+- Co-op can be implemented where agents spawn with a teammate they cannot attack, and exchange loot with.
+- Combat can function similarly to D&D involving some RNG
+

From cbfea477097d6ecd4a22671519ec894ca8339e63 Mon Sep 17 00:00:00 2001
From: linlinbest <43051929+linlinbest@users.noreply.github.com>
Date: Wed, 24 Jul 2019 12:56:14 -0400
Subject: [PATCH 48/56] Add files via upload

The template for Q-learing algorithm in interactive_tmp.py, but there are still some bugs.
---
 .../gymMountainCarv0/interactive_tmp.py       | 84 ++++++++++++++++++
 agents_using_gym/gymMountainCarv0/policy.py   | 88 +++++++++++++++++++
 2 files changed, 172 insertions(+)
 create mode 100644 agents_using_gym/gymMountainCarv0/interactive_tmp.py
 create mode 100644 agents_using_gym/gymMountainCarv0/policy.py

diff --git a/agents_using_gym/gymMountainCarv0/interactive_tmp.py b/agents_using_gym/gymMountainCarv0/interactive_tmp.py
new file mode 100644
index 000000000..858378dec
--- /dev/null
+++ b/agents_using_gym/gymMountainCarv0/interactive_tmp.py
@@ -0,0 +1,84 @@
+#!/usr/bin/env python
+import os,sys
+sys.path.insert(1, os.path.join(sys.path[0], '..'))
+import argparse
+
+from multiagent.environment import MultiAgentEnv
+from multiagent.policy import InteractivePolicy
+import multiagent.scenarios as scenarios
+
+import numpy as np
+
+if __name__ == '__main__':
+    # parse arguments
+    parser = argparse.ArgumentParser(description=None)
+    parser.add_argument('-s', '--scenario', default='simple.py', help='Path of the scenario Python script.')
+    args = parser.parse_args()
+
+    # load scenario from script
+    scenario = scenarios.load(args.scenario).Scenario()
+    # create world
+    world = scenario.make_world()
+    # create multiagent environment
+    env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation, info_callback=None, shared_viewer = False)
+    # render call to create viewer window (necessary only for interactive policies)
+    env.render()
+    # create interactive policies for each agent
+    policies = [InteractivePolicy(env,i) for i in range(env.n)]
+    
+    LEARNING_RATE = 0.1
+    DISCOUNT = 0.95
+    EPISODE = 25000
+
+    def get_discrete_state(state):
+        #DISCRETE_OBS_SPACE = [20] * len(state)
+        high_bound = np.array([1] * len(state))
+        low_bound = np.array([-1] * len(state))
+        obs_win_size = (high_bound-low_bound) / ([20]* len(state))
+        discrete_state = np.subtract(state, low_bound)/ obs_win_size
+        #print(discrete_state.astype(np.float))
+        # we use this tuple to look up the 3 Q values for the available actions in the q-table
+        return tuple(discrete_state.astype(np.int))
+
+    # execution loop
+    obs_n = env.reset()
+
+    #a list of q_tables (one q_table for each agent)
+
+    DISCRETE_OBS_SPACE = [20] * len(obs_n[0])
+    q_tables = []
+    for i in range(env.n):
+        q_tables.append(np.random.uniform(low=-3, high=3, size=(DISCRETE_OBS_SPACE + [4])))
+    q_tables = np.array(q_tables)
+    #print(q_tables)
+
+    
+    #for i in range(EPISODE): do the following
+    obs_n = env.reset()
+    while True:
+        # query for action from each agent's policy
+        act_n = []
+        for i, policy in enumerate(policies):
+            act_n.append(policy.action(obs_n[i]))
+            new_discrete_state = get_discrete_state(obs_n[i])
+
+        print(act_n)
+        #print(obs_n)
+        # step environment
+        obs_n, reward_n, done_n, _ = env.step(act_n)
+        # render all agent views
+        env.render()
+        # display rewards
+        #for agent in env.world.agents:
+        #    print(agent.name + " reward: %0.3f" % env._get_reward(agent))
+
+        
+        if True:
+            for i, policy in enumerate(policies):
+                #print(q_tables[tuple([0])+(new_discrete_state,)])
+                max_future_q = np.max(q_tables[tuple([i])+new_discrete_state])
+                current_q = q_tables[tuple([i])+new_discrete_state]
+                new_q = (1 - LEARNING_RATE) * current_q + LEARNING_RATE * (reward_n[i] + DISCOUNT * max_future_q)
+                q_tables[tuple([i])+ new_discrete_state+(act_n[i], )] = new_q
+        
+    
diff --git a/agents_using_gym/gymMountainCarv0/policy.py b/agents_using_gym/gymMountainCarv0/policy.py
new file mode 100644
index 000000000..d8b7ef8ce
--- /dev/null
+++ b/agents_using_gym/gymMountainCarv0/policy.py
@@ -0,0 +1,88 @@
+import numpy as np
+from pyglet.window import key
+
+from multiagent.scenarios.simple import Scenario
+
+# individual agent policy
+class Policy(object):
+    def __init__(self):
+        self.move = [False for i in range(4)]
+    def action(self, obs):
+        #agent = env.agents
+        raise NotImplementedError()
+
+# interactive policy based on keyboard input
+# hard-coded to deal only with movement, not communication
+class InteractivePolicy(Policy):
+    def __init__(self, env, agent_index):
+        super(InteractivePolicy, self).__init__()
+        self.env = env
+        #self.agent_index = agent_index
+        # hard-coded keyboard events
+        self.move = [False for i in range(4)]
+        self.comm = [False for i in range(env.world.dim_c)]
+        # register keyboard events with this environment's window
+        env.viewers[agent_index].window.on_key_press = self.key_press
+        env.viewers[agent_index].window.on_key_release = self.key_release
+
+    def action(self, obs):
+        # ignore observation and just act based on keyboard events
+
+        
+        #x_axis = self.env.agents[self.agent_index].state.p_pos[0]
+        #y_axis = self.env.agents[self.agent_index].state.p_pos[1]
+
+        '''
+        If we try to implement Q-learning in Interactive.action(self, obs),
+        we may first need to have a get_reward() function for each agent.
+
+        Or a simpler way is to have Interactive.action(self, obs) return the action space
+        each time. Then implement the Q-learning algorithm in bin/interactive.py since interactive.py have access to everything
+        and it's more convinient to implement.
+        '''
+        
+        #obs[2] is the x-axis of the relative position between first landmark and the agent
+        if obs[2] < 0:
+            self.move[1] = True
+        elif obs[2] > 0:
+            self.move[0] = True
+        else:
+            self.move[0] = False
+            self.move[1] = False
+
+        if obs[3] > 0:
+            self.move[3] = True
+        elif obs[3] < 0:
+            self.move[2] = True
+        else:
+            self.move[2] = False
+            self.move[3] = False
+        
+
+        if self.env.discrete_action_input:
+            u = 0
+            if self.move[0]: u = 1
+            if self.move[1]: u = 2
+            if self.move[2]: u = 4
+            if self.move[3]: u = 3
+        else:
+            u = np.zeros(5) # 5-d because of no-move action
+            if self.move[0]: u[1] += 1.0
+            if self.move[1]: u[2] += 1.0
+            if self.move[3]: u[3] += 1.0
+            if self.move[2]: u[4] += 1.0
+            if True not in self.move:
+                u[0] += 1.0
+        return np.concatenate([u, np.zeros(self.env.world.dim_c)])
+
+    # keyboard event callbacks
+    def key_press(self, k, mod):
+        if k==key.LEFT:  self.move[0] = True
+        if k==key.RIGHT: self.move[1] = True
+        if k==key.UP:    self.move[2] = True
+        if k==key.DOWN:  self.move[3] = True
+    def key_release(self, k, mod):
+        if k==key.LEFT:  self.move[0] = False
+        if k==key.RIGHT: self.move[1] = False
+        if k==key.UP:    self.move[2] = False
+        if k==key.DOWN:  self.move[3] = False

From df8bf17c7c0acffb5ba4773e1351ce89b97d2840 Mon Sep 17 00:00:00 2001
From: syhdd <45134514+syhdd@users.noreply.github.com>
Date: Wed, 24 Jul 2019 13:16:42 -0400
Subject: [PATCH 49/56] Update ScenarioIdeas.md

---
 ScenarioIdeas.md | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/ScenarioIdeas.md b/ScenarioIdeas.md
index 217c4e80a..83aafb2e0 100644
--- a/ScenarioIdeas.md
+++ b/ScenarioIdeas.md
@@ -95,3 +95,20 @@ Add defend action, which blocks attack, but opponent agent gains bigger bonus re
 - Co-op can be implemented where agents spawn with a teammate they cannot attack, and exchange loot with.
 - Combat can function similarly to D&D involving some RNG
 
+## Idea 4. Warship Survival Game  
+The Idea is extended from the hunger game  
+#### World  
+100 x 100 plane where agents try to be the last survivor  
+Every agent was assigned 5 blocks as ships in the plane.  
+when every blocks are eliminated, the agents are terminated  
+The agent has sight within 5 blocks which it can attack  
+#### Main Attributes:  
+points: every round each agent is assigned 5 points  
+Attack: use 2 point to attack a block in the 2-D plane(no range limitation)  
+Move: use 1 point to move one ship into nearby block  
+generate new ships:use 4 points put an new ship into plane  
+#### Rewards  
+Kills are rewarded(granted points or not)  
+ships are rewarded(one ship is worth 1 point)  
+#### Notes  
+Co-op can be implemented in the way that share sight  

From 447ac7b6afb25421c10f18fe1b253a4bda6d2bac Mon Sep 17 00:00:00 2001
From: syhdd <45134514+syhdd@users.noreply.github.com>
Date: Wed, 24 Jul 2019 13:17:17 -0400
Subject: [PATCH 50/56] Update ScenarioIdeas.md

---
 ScenarioIdeas.md | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/ScenarioIdeas.md b/ScenarioIdeas.md
index 83aafb2e0..9ff5c1ab4 100644
--- a/ScenarioIdeas.md
+++ b/ScenarioIdeas.md
@@ -98,17 +98,17 @@ Add defend action, which blocks attack, but opponent agent gains bigger bonus re
 ## Idea 4. Warship Survival Game  
 The Idea is extended from the hunger game  
 #### World  
-100 x 100 plane where agents try to be the last survivor  
-Every agent was assigned 5 blocks as ships in the plane.  
-when every blocks are eliminated, the agents are terminated  
-The agent has sight within 5 blocks which it can attack  
+- 100 x 100 plane where agents try to be the last survivor  
+- Every agent was assigned 5 blocks as ships in the plane.  
+- when every blocks are eliminated, the agents are terminated  
+- The agent has sight within 5 blocks which it can attack  
 #### Main Attributes:  
-points: every round each agent is assigned 5 points  
-Attack: use 2 point to attack a block in the 2-D plane(no range limitation)  
-Move: use 1 point to move one ship into nearby block  
-generate new ships:use 4 points put an new ship into plane  
+- points: every round each agent is assigned 5 points  
+- Attack: use 2 point to attack a block in the 2-D plane(no range limitation)  
+- Move: use 1 point to move one ship into nearby block  
+- generate new ships:use 4 points put an new ship into plane  
 #### Rewards  
-Kills are rewarded(granted points or not)  
-ships are rewarded(one ship is worth 1 point)  
+- Kills are rewarded(granted points or not)  
+- ships are rewarded(one ship is worth 1 point)  
 #### Notes  
-Co-op can be implemented in the way that share sight  
+- Co-op can be implemented in the way that share sight  

From 6ffece211cf0a1092cbb75c98854600792cf6353 Mon Sep 17 00:00:00 2001
From: syhdd <45134514+syhdd@users.noreply.github.com>
Date: Wed, 24 Jul 2019 13:20:18 -0400
Subject: [PATCH 51/56] Add new Scenario Idea

This Idea is a simplified and extended version from hunger game
---
 ScenarioIdeas.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/ScenarioIdeas.md b/ScenarioIdeas.md
index 9ff5c1ab4..d04cf4759 100644
--- a/ScenarioIdeas.md
+++ b/ScenarioIdeas.md
@@ -111,4 +111,5 @@ The Idea is extended from the hunger game
 - Kills are rewarded(granted points or not)  
 - ships are rewarded(one ship is worth 1 point)  
 #### Notes  
-- Co-op can be implemented in the way that share sight  
+- Co-op can be implemented in the way that share sight with allay
+- Co-op can still attack each other

From d01ecf0b6382f92f71999882639027f60fe7bbf6 Mon Sep 17 00:00:00 2001
From: zrysnd <503591415@qq.com>
Date: Wed, 24 Jul 2019 13:33:04 -0400
Subject: [PATCH 52/56] reward based on cheat/cooperate

---
 multiagent/scenarios/race.py | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/multiagent/scenarios/race.py b/multiagent/scenarios/race.py
index f4a21b57f..2db68db46 100644
--- a/multiagent/scenarios/race.py
+++ b/multiagent/scenarios/race.py
@@ -47,9 +47,20 @@ def reset_world(self, world):
             landmark.state.p_vel = np.zeros(world.dim_p)
 
     def reward(self, agent, world):
-        #dist2 = np.sum(np.square(agent.state.p_pos - world.landmarks[0].state.p_pos))
-        dist2 = np.sum(np.square(agent.state.p_pos - np.array([agent.state.p_pos[0], 5.0])))
-        return -dist2
+        # dist2 = np.sum(np.square(agent.state.p_pos - np.array([agent.state.p_pos[0], 5.0])))
+        agentCheated = False
+        theOtherAgentCheated = False
+        if agentCheated and theOtherAgentCheated:
+            return 1
+        if agentCheated and !theOtherAgentCheated:
+            return 5
+        if !agentCheated and theOtherAgentCheated:
+            return -3
+        else:
+            return 3
+        # if !agentCheated and !theOtherAgentCheated:
+        #     return 3
+        # return -dist2
 
     def observation(self, agent, world):
         # get positions of all entities in this agent's reference frame

From 3e8fc138227c8f21689a4642da9e7dda8456c619 Mon Sep 17 00:00:00 2001
From: Jarbus <jarbus@users.noreply.github.com>
Date: Wed, 24 Jul 2019 19:28:29 -0400
Subject: [PATCH 53/56] update readme

---
 README.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index aa9bd3e8a..d07408ccc 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,6 @@
-**Status:** Archive (code is provided as-is, no updates expected)
+**Status:** Code in active development for a class project at the Rensselaer Polytechnic Institute
+
+- Most team communication done via mattermost at chat.rcos.io
 
 # Multi-Agent Particle Environment
 

From 250d49aa810c9244e3756b34a470577c8f882aa4 Mon Sep 17 00:00:00 2001
From: jarbus <jarbus@tutanota.com>
Date: Thu, 25 Jul 2019 22:05:58 -0400
Subject: [PATCH 54/56] clean up for pull

---
 README.md                                     |   4 +-
 ScenarioIdeas.md                              | 115 --------------
 agents_using_gym/gymMountainCarv0/README.md   |  12 --
 agents_using_gym/gymMountainCarv0/cheating.py |  13 --
 .../gymMountainCarv0/interactive_tmp.py       |  84 ----------
 agents_using_gym/gymMountainCarv0/policy.py   |  88 -----------
 .../gymMountainCarv0/simpleqlearning.py       |  62 --------
 changes.txt                                   |  23 ---
 documentation.md                              |  77 ----------
 multiagent/scenarios/cus.py                   |  64 --------
 multiagent/scenarios/race.py                  |  70 ---------
 multiagent/scenarios/testing.py               | 145 ------------------
 12 files changed, 1 insertion(+), 756 deletions(-)
 delete mode 100644 ScenarioIdeas.md
 delete mode 100644 agents_using_gym/gymMountainCarv0/README.md
 delete mode 100644 agents_using_gym/gymMountainCarv0/cheating.py
 delete mode 100644 agents_using_gym/gymMountainCarv0/interactive_tmp.py
 delete mode 100644 agents_using_gym/gymMountainCarv0/policy.py
 delete mode 100644 agents_using_gym/gymMountainCarv0/simpleqlearning.py
 delete mode 100644 changes.txt
 delete mode 100644 documentation.md
 delete mode 100644 multiagent/scenarios/cus.py
 delete mode 100644 multiagent/scenarios/race.py
 delete mode 100644 multiagent/scenarios/testing.py

diff --git a/README.md b/README.md
index d07408ccc..aa9bd3e8a 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,4 @@
-**Status:** Code in active development for a class project at the Rensselaer Polytechnic Institute
-
-- Most team communication done via mattermost at chat.rcos.io
+**Status:** Archive (code is provided as-is, no updates expected)
 
 # Multi-Agent Particle Environment
 
diff --git a/ScenarioIdeas.md b/ScenarioIdeas.md
deleted file mode 100644
index d04cf4759..000000000
--- a/ScenarioIdeas.md
+++ /dev/null
@@ -1,115 +0,0 @@
-# Idea table:
-
-Generated at: https://www.tablesgenerator.com/markdown_tables
-
-|      | Possible Actions        | Rewards per Outcome                                                                                                        | Properties of other entities          | Nash Equilibrium          | Other Notes   |
-| ---- | ----------------------- | -------------------------------------------------------------------------------------------------------------------------- | ------------------------------------- | ------------------------- | ------------- |
-| #1   | Expand, attack, trade   | Expanding + attacking spends resources for greater resource bonuses later.                                                 | No other entities other than agents   | Attack                    |               |
-|      |                         | Trading gives bonus resources for both agents                                                                              |                                       |                           |               |
-| #2   | Move x steps            | Reward = progress in last step                                                                                             | No other entities other than agents   | Move as far as possible   |               |
-| #3   | move, attack, loot, rest |  Staying alive, get gear, kills, winning                                                                                                         |     No other entities other than agents                                 |     live and loot                      |              |
-| #4   |                         |                                                                                                                            |                                       |                           |               |
-| #5   |                         |                                                                                                                            |                                       |                           |               |
-
-# Details:
-## Idea 1. (Risk but on a grid)
-Grid based cell game, each agent starts with 1 cell on some part of the grid. Agents use resources to expand, attack, or trade with neighboring cells. Every turn agents gain a set amount of resources based on area of agent's cells. For every neighboring cell, if it is not occupied, the agent can choose to spend resources to expand into the area, or not. If the cell is occupied, the agent can choose to attack, or trade. Attacking allows for the takeover of the cell and requires the agent to spend resources. Trading requires the agent to give resources to the other agent, but if both agents decide to trade, they can recieve some bonus based on who gave more resources. If one agent attacks, and the other trades, the attacker automatically wins. If both attack, the agent that spent more resources to attack wins. Resource costs and bonuses can be tweaked to ensure fairness and balance.
-### Examples: 
-Agent A and Agent B are neighbors: if A trades 2 resources, and B trades 4 resources, A could gain 4(from B) + 2(bonus includes how much given) + 1(some multiplier of how much was given in this case 0.5 for giving less) resulting in net +5, B would gain 2(from A) + 4(given) + 4(multiplier bonus of 1 for giving more) resulting in +6 \
-If A attacks B, spending 5 resources; B attempts to trade 4 resources, A takes over some area of B and gains 4 resources from B's trade with net gain of -1 resource and + some area; B has a net gain of -4 resources and -some area. \
-If A attacks B, spending 5 resources; B attacks A spending 6 resources, B takes some area of A. A has a net gain of -5 resources and -some area; B has a net gain of -5 resources and -some area.
-
-### Possible expansion:
-Add defend action, which blocks attack, but opponent agent gains bigger bonus resource if they try to trade. 
-
-## Idea 2. Race
-
-#### World
-- 2D plane where agents try to race to their landmark.
-- Agents only move forward/backward parallel to each other
-- All agents start from the same location
-
-#### Rules
-- Agents can take any x number of steps to advance to the landmark.
-- If the sum of all the steps taken by the agents(y) exceeds z, then all agents that moved get moved backwards w steps. 
-- Agents are rewarded based off of how many steps they are successfully able to take per turn.
-    - Say each agent tries to take 10 steps, but because too many agents are trying to move in a turn, they all get moved back 5 steps. In this case, their reward would be -5, even though they tried to take 10 steps.
-
-#### Variables
-- Parameters y, z, and the initial distance for each agent to the landmark can be varied for balance and to compare agent behavior.
-
-#### Notes
-- The landmarks don't actually get taken into account for the rewards or observation, it's simply aiding visualization of how much progess each agent is able to make.
-- Agents will either have to be moved by the scenario via physics, or they can move based off of the reward recieved on their next action, the following turn.
-
-## Idea 3. Hunger Games
-
-#### World
-- 10 x 10 plane where agents try to be the last survivor
-- 12 agents start equidistant from each other in a circle
-    - Middle of circle is high tier loot
-- Set loot spawns with a set tier, but random loot
-- Structures agents can enter and be hidden from sight
-
-#### Agent
-- Main Attributes
-    - Attack Range
-    - Attack Power
-    - Def
-    - HP
-    - Stamina
-- Choices:
-    - Loot
-        - Sight limited
-    - Attack an adjacent agent
-        - Additional attack options possible with certain loot
-        - Uses Stamina
-    - Move
-        - Walk one space
-        - Run 2x fast w/ Stamina
-    - Rest
-        - Recover HP/Stamina
-#### Rewards
-- Kills are not intrinsically rewarded
-- Looting from chests/bodies result in a set reward value per tier/killcount, and additional reward from net stat gain
-- Time alive gives slight reward with each tick
-- Winning gives the highest reward
-
-#### Agent Variables
-- Environment Knowledge
-    - Excludes:
-        Chest loot status
-        Chest loot items
-        Alive/Dead Enemy location
-- Sight
-- Self position
-- Attributes
-- Loot
-- Kill count
-- Kill counts of other agents
-- List of Alive Agents
-- List of Dead Agents
-- Attributes of agents in Sight
-    
-#### Notes
-- Co-op can be implemented where agents spawn with a teammate they cannot attack, and exchange loot with.
-- Combat can function similarly to D&D involving some RNG
-
-## Idea 4. Warship Survival Game  
-The Idea is extended from the hunger game  
-#### World  
-- 100 x 100 plane where agents try to be the last survivor  
-- Every agent was assigned 5 blocks as ships in the plane.  
-- when every blocks are eliminated, the agents are terminated  
-- The agent has sight within 5 blocks which it can attack  
-#### Main Attributes:  
-- points: every round each agent is assigned 5 points  
-- Attack: use 2 point to attack a block in the 2-D plane(no range limitation)  
-- Move: use 1 point to move one ship into nearby block  
-- generate new ships:use 4 points put an new ship into plane  
-#### Rewards  
-- Kills are rewarded(granted points or not)  
-- ships are rewarded(one ship is worth 1 point)  
-#### Notes  
-- Co-op can be implemented in the way that share sight with allay
-- Co-op can still attack each other
diff --git a/agents_using_gym/gymMountainCarv0/README.md b/agents_using_gym/gymMountainCarv0/README.md
deleted file mode 100644
index 8a2475783..000000000
--- a/agents_using_gym/gymMountainCarv0/README.md
+++ /dev/null
@@ -1,12 +0,0 @@
-## This folders incude some agents for gym's mountain car environment.
-## The codes in this folder are using Python 3.6.1, gym==0.13.1,numpy==1.16.4. The codes are using some functions from gym==0.13.1 which are not implemented in gym==0.10.5, so please upgrade your gym before running these codes. 
-### If you don't know how to upgrade gym: 
-``` 
-pip uninstall gym
-pip install gym
-```
-## Python files
-### These files are just using gym, and can be run by ```python filename.py``` (or ```python3 filename.py``` if you are using linux.) IDEs shold be able to run them as well.
-### cheating.py is a straight solution by Mark Yu after 2 seconds of thinking, it represents Mark's superiority against AI. JK.
-### simpleqlearning.py is an implementation of qlearning, an algorithm that Mark learnt from wikipedia [https://en.wikipedia.org/wiki/Q-learning](https://en.wikipedia.org/wiki/Q-learning). Feel free to mess with the learning rate and discountrate in the code and compare the time it takes for the AI to learn how to push the car to the summit.
-
diff --git a/agents_using_gym/gymMountainCarv0/cheating.py b/agents_using_gym/gymMountainCarv0/cheating.py
deleted file mode 100644
index c67572d6c..000000000
--- a/agents_using_gym/gymMountainCarv0/cheating.py
+++ /dev/null
@@ -1,13 +0,0 @@
-import gym
-env = gym.make("MountainCar-v0")
-
-done=False
-state=env.reset()
-while not done:
-    if state[1]<=0:
-        state, reward, done,info = env.step(0)
-    else:
-        state, reward, done,info = env.step(2)
-    env.render()
-    
-env.close()
\ No newline at end of file
diff --git a/agents_using_gym/gymMountainCarv0/interactive_tmp.py b/agents_using_gym/gymMountainCarv0/interactive_tmp.py
deleted file mode 100644
index 858378dec..000000000
--- a/agents_using_gym/gymMountainCarv0/interactive_tmp.py
+++ /dev/null
@@ -1,84 +0,0 @@
-#!/usr/bin/env python
-import os,sys
-sys.path.insert(1, os.path.join(sys.path[0], '..'))
-import argparse
-
-from multiagent.environment import MultiAgentEnv
-from multiagent.policy import InteractivePolicy
-import multiagent.scenarios as scenarios
-
-import numpy as np
-
-if __name__ == '__main__':
-    # parse arguments
-    parser = argparse.ArgumentParser(description=None)
-    parser.add_argument('-s', '--scenario', default='simple.py', help='Path of the scenario Python script.')
-    args = parser.parse_args()
-
-    # load scenario from script
-    scenario = scenarios.load(args.scenario).Scenario()
-    # create world
-    world = scenario.make_world()
-    # create multiagent environment
-    env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation, info_callback=None, shared_viewer = False)
-    # render call to create viewer window (necessary only for interactive policies)
-    env.render()
-    # create interactive policies for each agent
-    policies = [InteractivePolicy(env,i) for i in range(env.n)]
-    
-    LEARNING_RATE = 0.1
-    DISCOUNT = 0.95
-    EPISODE = 25000
-
-    def get_discrete_state(state):
-        #DISCRETE_OBS_SPACE = [20] * len(state)
-        high_bound = np.array([1] * len(state))
-        low_bound = np.array([-1] * len(state))
-        obs_win_size = (high_bound-low_bound) / ([20]* len(state))
-        discrete_state = np.subtract(state, low_bound)/ obs_win_size
-        #print(discrete_state.astype(np.float))
-        # we use this tuple to look up the 3 Q values for the available actions in the q-table
-        return tuple(discrete_state.astype(np.int))
-
-    # execution loop
-    obs_n = env.reset()
-
-    #a list of q_tables (one q_table for each agent)
-
-    DISCRETE_OBS_SPACE = [20] * len(obs_n[0])
-    q_tables = []
-    for i in range(env.n):
-        q_tables.append(np.random.uniform(low=-3, high=3, size=(DISCRETE_OBS_SPACE + [4])))
-    q_tables = np.array(q_tables)
-    #print(q_tables)
-
-    
-    #for i in range(EPISODE): do the following
-    obs_n = env.reset()
-    while True:
-        # query for action from each agent's policy
-        act_n = []
-        for i, policy in enumerate(policies):
-            act_n.append(policy.action(obs_n[i]))
-            new_discrete_state = get_discrete_state(obs_n[i])
-
-        print(act_n)
-        #print(obs_n)
-        # step environment
-        obs_n, reward_n, done_n, _ = env.step(act_n)
-        # render all agent views
-        env.render()
-        # display rewards
-        #for agent in env.world.agents:
-        #    print(agent.name + " reward: %0.3f" % env._get_reward(agent))
-
-        
-        if True:
-            for i, policy in enumerate(policies):
-                #print(q_tables[tuple([0])+(new_discrete_state,)])
-                max_future_q = np.max(q_tables[tuple([i])+new_discrete_state])
-                current_q = q_tables[tuple([i])+new_discrete_state]
-                new_q = (1 - LEARNING_RATE) * current_q + LEARNING_RATE * (reward_n[i] + DISCOUNT * max_future_q)
-                q_tables[tuple([i])+ new_discrete_state+(act_n[i], )] = new_q
-        
-    
diff --git a/agents_using_gym/gymMountainCarv0/policy.py b/agents_using_gym/gymMountainCarv0/policy.py
deleted file mode 100644
index d8b7ef8ce..000000000
--- a/agents_using_gym/gymMountainCarv0/policy.py
+++ /dev/null
@@ -1,88 +0,0 @@
-import numpy as np
-from pyglet.window import key
-
-from multiagent.scenarios.simple import Scenario
-
-# individual agent policy
-class Policy(object):
-    def __init__(self):
-        self.move = [False for i in range(4)]
-    def action(self, obs):
-        #agent = env.agents
-        raise NotImplementedError()
-
-# interactive policy based on keyboard input
-# hard-coded to deal only with movement, not communication
-class InteractivePolicy(Policy):
-    def __init__(self, env, agent_index):
-        super(InteractivePolicy, self).__init__()
-        self.env = env
-        #self.agent_index = agent_index
-        # hard-coded keyboard events
-        self.move = [False for i in range(4)]
-        self.comm = [False for i in range(env.world.dim_c)]
-        # register keyboard events with this environment's window
-        env.viewers[agent_index].window.on_key_press = self.key_press
-        env.viewers[agent_index].window.on_key_release = self.key_release
-
-    def action(self, obs):
-        # ignore observation and just act based on keyboard events
-
-        
-        #x_axis = self.env.agents[self.agent_index].state.p_pos[0]
-        #y_axis = self.env.agents[self.agent_index].state.p_pos[1]
-
-        '''
-        If we try to implement Q-learning in Interactive.action(self, obs),
-        we may first need to have a get_reward() function for each agent.
-
-        Or a simpler way is to have Interactive.action(self, obs) return the action space
-        each time. Then implement the Q-learning algorithm in bin/interactive.py since interactive.py have access to everything
-        and it's more convinient to implement.
-        '''
-        
-        #obs[2] is the x-axis of the relative position between first landmark and the agent
-        if obs[2] < 0:
-            self.move[1] = True
-        elif obs[2] > 0:
-            self.move[0] = True
-        else:
-            self.move[0] = False
-            self.move[1] = False
-
-        if obs[3] > 0:
-            self.move[3] = True
-        elif obs[3] < 0:
-            self.move[2] = True
-        else:
-            self.move[2] = False
-            self.move[3] = False
-        
-
-        if self.env.discrete_action_input:
-            u = 0
-            if self.move[0]: u = 1
-            if self.move[1]: u = 2
-            if self.move[2]: u = 4
-            if self.move[3]: u = 3
-        else:
-            u = np.zeros(5) # 5-d because of no-move action
-            if self.move[0]: u[1] += 1.0
-            if self.move[1]: u[2] += 1.0
-            if self.move[3]: u[3] += 1.0
-            if self.move[2]: u[4] += 1.0
-            if True not in self.move:
-                u[0] += 1.0
-        return np.concatenate([u, np.zeros(self.env.world.dim_c)])
-
-    # keyboard event callbacks
-    def key_press(self, k, mod):
-        if k==key.LEFT:  self.move[0] = True
-        if k==key.RIGHT: self.move[1] = True
-        if k==key.UP:    self.move[2] = True
-        if k==key.DOWN:  self.move[3] = True
-    def key_release(self, k, mod):
-        if k==key.LEFT:  self.move[0] = False
-        if k==key.RIGHT: self.move[1] = False
-        if k==key.UP:    self.move[2] = False
-        if k==key.DOWN:  self.move[3] = False
diff --git a/agents_using_gym/gymMountainCarv0/simpleqlearning.py b/agents_using_gym/gymMountainCarv0/simpleqlearning.py
deleted file mode 100644
index 3e8ccd7d1..000000000
--- a/agents_using_gym/gymMountainCarv0/simpleqlearning.py
+++ /dev/null
@@ -1,62 +0,0 @@
-import gym
-import numpy 
-
-env = gym.make("MountainCar-v0")
-
-learningrate = 0.7
-discount = 0.90
-#initiallize the Q table [40,40,3] with random values.  The meaning of the q table is the q value of a set of [state of positions,state of velocity, action you take]. 
-#Note that the game is continous but the states of our q table are discrete(since we can only deal with finite states), So I also need a getstate function to turn the continous states into deiscrete states.
-#all q values are initialized between -2 and 0 because the reward is always -1 in the mountaincar game.
-q_table = numpy.random.uniform(-2, 0, [40,40,3])
-
-
-def getstate(state):
-    discrete_state = (state - env.observation_space.low)/((env.observation_space.high-env.observation_space.low)/[40,40])
-    return tuple(discrete_state.astype(numpy.int))  # we use this tuple to look up the 3 Q values for the available actions in the q-table
-
-
-for episode in range(2700):
-    currentstate = getstate(env.reset())
-    done = False
-    #render every 300 episodes to save time.
-    if episode % 300 == 0:
-        render = True
-        print(episode)
-    else:
-        render = False
-
-    while not done:
-        action = numpy.argmax(q_table[currentstate])
-        new_state, reward, done,info = env.step(action)
-        #nextstate is the discrete mapping from the new state to the q table
-        nextstate = getstate(new_state)
-
-        if render:
-            env.render()
-
-        # Update Q table
-        if not done:
-            # Maximum possible Q value in next step (for new state)
-            maxnextq = numpy.max(q_table[nextstate])
-            # Current Q value (for current state and performed action)
-            current_q = q_table[currentstate + (action,)]
-            # the qlearning function
-            new_q = (1 - learningrate) * current_q + learningrate * (reward + discount * maxnextq)
-            # Update Q table with new Q value
-            q_table[currentstate + (action,)] = new_q
-
-
-        # Simulation ended (for any reson) - if goal position is achived - update Q value with reward directly
-        elif new_state[0] >= 0.5:
-            print("We make it!")
-            print(episode)
-            q_table[currentstate + (action,)] = 0
-            
-
-        currentstate = nextstate
-
-   
-
-
-env.close()
\ No newline at end of file
diff --git a/changes.txt b/changes.txt
deleted file mode 100644
index 95529c034..000000000
--- a/changes.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-multi_discrete.py:
-- [FIXED] Changed random_array assignment in sample() to use gym.utils.seeding
-
-rendering.py:
-- [FIXED] The reraise function appears to no longer exist. Fortunately, all this does is add additional information to a raised
-  exception, so should be simple to reimplement (or ignore)
-
-environment.py:
-- [FIXED] line 234: `geom.set_color(*entity.color, alpha=0.5)` - receives multiple arguments. This is a pain, because it is
-  expanding the color argument, a 3-tuple or 4-tuple, but we want to set alpha to 0.5. A dumb fix is to make a new
-  tuple with the first three arguments of the color, and 0.5 for alpha.
-
-simple_crypto.py:
-- line 121: array in conditional can potentially be a boolean rather than an ndarray so it will have no .all() method
-    - Similar statements appear on lines 104, 109
-- line 122: the sizes in the expression `agent.state.c - agent.goal_a.color` are mismatched and it is unclear where they
-  come from
-    - Similar statements appear on lines 107, 112
-- [Austen] I got it to run by removing the .all() calls and changing lines like 
-        np.sum(np.square(agent.state.c - agent.goal_a.color))
-            to
-        np.square(len(agent.state.c)-len(agent.goal_a.color))
-        Not sure if scenario still functions correctly / if reward calculations are accurate
diff --git a/documentation.md b/documentation.md
deleted file mode 100644
index f7e697ccf..000000000
--- a/documentation.md
+++ /dev/null
@@ -1,77 +0,0 @@
-# Execution:
-
-In a simulation with `n` agents:
-
-1. bin/script.py loads - acts as main script
-2. Loads scenario
-    - `./multiagent/scenario.py.make_world()`
-3. Loads multi-agent enviroment given scenario settings and world
-    - `./multiagent/environment.py.MultiAgentEnv(Scenario.world())`
-4. Renders environment (initial render)
-    - `./multiagent/environment.py.render()`
-5. Assigns policies (algorithms) for each agent
-    - stored as policies[] list
-    - policy[agent_index] = ./multiagent/policies/template.py.TemplatePolicy(env,agent_index)
-        - Note: Template not implemented yet, see `./multiagent/policy.py.InteractivePolicy()` for now
-        - For more information, see [Policies](#POLICIES)
-6. Resets environment
-7. Infinite while loop
-    1. Makes a list of actions, one action per policy
-        - actions[i]
-    2. Performs one environment step using entire action list
-        - `multiagent/environment.py.step()` returns:
-            - n observations
-            - n rewards
-            - n done states
-            - n debug objects
-    3. Re-render
-        - `multiagent/environment.py.render()`
-
-## Environment
-
-The main class in use during execution. The environment interacts with the scenario and the agents. There is one environment that all scenarios use. Each scenario implements reward() and observation() which the environment calls.
-
-- `./make_env.py`: contains code for importing a multiagent environment as an OpenAI Gym-like object.
-
-- `./multiagent/environment.py`: contains code for environment simulation (interaction physics, `_step()` function, etc.)
-
-## Policy <a name="POLICIES"></a>
-
-A policy seems to be a system to control an agent. The interactive policy allows control of an agent with keyboard and mouse, but if we wish to implement algorithms we will most likely be implementing them as a policy. **NOTE: Policies are enumerable**
-
-- `./multiagent/policy.py`: contains code for interactive policy based on keyboard input.
-
-A Policy has two functions:
-
-- `__init__()` passes the environment to the policy class
-- `action(obs)` performs an action given an observation
-
-
-## Scenarios
-
-A BaseScenario `multiagent/scenario.py` incorporates at least `make_world()` and `reset_world()`. An implemented Scenario will incorporate reward() and observation(). All scenario calls are made through the environment.
-
-- `./multiagent/scenario.py`: contains base scenario object that is extended for all scenarios.
-
-- `./multiagent/scenarios/`: folder where various scenarios/ environments are stored. scenario code consists of several functions:
-    1) `make_world()`: creates all of the entities that inhabit the world (landmarks, agents, etc.), assigns their capabilities (whether they can communicate, or move, or both).
-     called once at the beginning of each training session
-    2) `reset_world()`: resets the world by assigning properties (position, color, etc.) to all entities in the world
-    called before every episode (including after make_world() before the first episode)
-    3) `reward(agent,world)`: defines the reward function for a given agent
-    4) `observation(agent, world)`: defines the observation space of a given agent
-    5) (optional) `benchmark_data()`: provides diagnostic data for policies trained on the environment (e.g. evaluation metrics)
-
-You can create new scenarios by implementing the first 4 functions above (`make_world()`, `reset_world()`, `reward()`, and `observation()`), and have to keep the same function signature(can't not change parameters), unless we all make changes to multiagent/environment.
-
-## Miscellaneous
-
-- `./multiagent/core.py`: contains classes for various objects (Entities, Landmarks, Agents, etc.) that are used throughout the code.(used for creating a scenario. We might need customized entities, agents for our own scenarios.)
-
-- `./multiagent/rendering.py`: used for displaying agent behaviors on the screen.
-
-## Visualization:
-
-1. Each agent will have one corresponding window generated for itself, agents always locate at the center of the camera in     its own wondow.    
-2.  In the interactive policy, pressing -> will make the agent go left in the world, but everything else goes right in its  own window(since it's always at the center of its own window).  
-
diff --git a/multiagent/scenarios/cus.py b/multiagent/scenarios/cus.py
deleted file mode 100644
index c32da49e2..000000000
--- a/multiagent/scenarios/cus.py
+++ /dev/null
@@ -1,64 +0,0 @@
-import numpy as np
-from multiagent.core import World, Agent, Landmark
-from multiagent.scenario import BaseScenario
-
-class Scenario(BaseScenario):
-    def __init__(self):
-    	super(Scenario, self).__init__()
-    	self.agentsToLandMarks = {}
-
-    def make_world(self):
-        world = World()
-        # add agents
-        numberOfAgents = 2;
-        world.agents = [Agent() for i in range(numberOfAgents)]
-        for i, agent in enumerate(world.agents):
-            agent.name = 'agent %d' % i
-            agent.collide = False
-            agent.silent = True
-        # add landmarks
-        world.landmarks = [Landmark() for i in range(numberOfAgents)]
-        for i, landmark in enumerate(world.landmarks):
-            landmark.name = 'landmark %d' % i
-            landmark.collide = False
-            landmark.movable = False
-        #fill in the dictionary
-        for i in range(numberOfAgents):
-        	self.agentsToLandMarks.update({ world.agents[i]: world.landmarks[i] })
-
-        # make initial conditions
-        self.reset_world(world)
-        return world
-
-    def reset_world(self, world):
-        # random properties for agents
-        for i, agent in enumerate(world.agents):
-            agent.color = np.array([0.25,0.25,0.25])
-        # random properties for landmarks
-        for i, landmark in enumerate(world.landmarks):
-            landmark.color = np.array([0.75,0.25,0.25])
-        world.landmarks[0].color = np.array([0.75,0.25,0.25])
-        # set random initial states
-        for i,agent in enumerate(world.agents):
-            # agent.state.p_pos = np.random.uniform(-1,+1, world.dim_p)
-            agent.state.p_pos = np.array([i/2,0])
-            agent.state.p_vel = np.zeros(world.dim_p)
-            agent.state.c = np.zeros(world.dim_c)
-        for i, landmark in enumerate(world.landmarks):
-            # landmark.state.p_pos = np.random.uniform(-1,+1, world.dim_p)
-            landmark.state.p_pos = np.array([i/2,0.75])
-            landmark.state.p_vel = np.zeros(world.dim_p)
-
-    def reward(self, agent, world):
-        # dist2 = np.sum(np.square(agent.state.p_pos - world.landmarks[0].state.p_pos))
-        # dist2 = world.landmarks[0].state.p_pos
-        delta_pos = agent.state.p_pos - self.agentsToLandMarks[agent].state.p_pos
-        dist = np.sqrt(np.sum(np.square(delta_pos)))
-        return -dist
-
-    def observation(self, agent, world):
-        # get positions of all entities in this agent's reference frame
-        entity_pos = []
-        for entity in world.landmarks:
-            entity_pos.append(entity.state.p_pos - agent.state.p_pos)
-        return np.concatenate([agent.state.p_vel] + entity_pos)
diff --git a/multiagent/scenarios/race.py b/multiagent/scenarios/race.py
deleted file mode 100644
index 2db68db46..000000000
--- a/multiagent/scenarios/race.py
+++ /dev/null
@@ -1,70 +0,0 @@
-import numpy as np
-from multiagent.core import World, Agent, Landmark
-from multiagent.scenario import BaseScenario
-
-class Scenario(BaseScenario):
-    def make_world(self):
-        world = World()
-        # add agents
-        world.agents = [Agent() for i in range(2)]
-        for i, agent in enumerate(world.agents):
-            agent.name = 'agent %d' % i
-            agent.collide = False
-            agent.silent = True
-        # add landmarks
-        world.landmarks = [Landmark() for i in range(2)]
-        for i, landmark in enumerate(world.landmarks):
-            landmark.name = 'landmark %d' % i
-            landmark.collide = False
-            landmark.movable = False
-        # make initial conditions
-        self.reset_world(world)
-        return world
-
-    def reset_world(self, world):
-        # random properties for agents
-        for i, agent in enumerate(world.agents):
-            agent.color = np.array([0.25,0.25,0.25])
-        # random properties for landmarks
-        for i, landmark in enumerate(world.landmarks):
-            landmark.color = np.array([0.75,0.75,0.75])
-        world.landmarks[0].color = np.array([0.75,0.25,0.25])
-        world.landmarks[1].color = np.array([0.75,0.25,0.25])
-        # set random initial states
-        #for agent in world.agents:
-            #agent.state.p_pos = np.random.uniform(-1,+1, world.dim_p)
-        world.agents[0].state.p_pos = np.array([0.0,0.0])
-        world.agents[0].state.p_vel = np.zeros(world.dim_p)
-        world.agents[0].state.c = np.zeros(world.dim_c)
-
-        world.agents[1].state.p_pos = np.array([0.5,0.0])
-        world.agents[1].state.p_vel = np.zeros(world.dim_p)
-        world.agents[1].state.c = np.zeros(world.dim_c)
-
-        for i, landmark in enumerate(world.landmarks):
-            #landmark.state.p_pos = np.random.uniform(-1,+1, world.dim_p)
-            landmark.state.p_pos = np.array([0.0 + i*0.5, 5.0])
-            landmark.state.p_vel = np.zeros(world.dim_p)
-
-    def reward(self, agent, world):
-        # dist2 = np.sum(np.square(agent.state.p_pos - np.array([agent.state.p_pos[0], 5.0])))
-        agentCheated = False
-        theOtherAgentCheated = False
-        if agentCheated and theOtherAgentCheated:
-            return 1
-        if agentCheated and !theOtherAgentCheated:
-            return 5
-        if !agentCheated and theOtherAgentCheated:
-            return -3
-        else:
-            return 3
-        # if !agentCheated and !theOtherAgentCheated:
-        #     return 3
-        # return -dist2
-
-    def observation(self, agent, world):
-        # get positions of all entities in this agent's reference frame
-        entity_pos = []
-        for entity in world.landmarks:
-            entity_pos.append(entity.state.p_pos - agent.state.p_pos)
-        return np.concatenate([agent.state.p_vel] + entity_pos)
diff --git a/multiagent/scenarios/testing.py b/multiagent/scenarios/testing.py
deleted file mode 100644
index 51c2d4f20..000000000
--- a/multiagent/scenarios/testing.py
+++ /dev/null
@@ -1,145 +0,0 @@
-import numpy as np
-from multiagent.core import World, Agent, Landmark
-from multiagent.scenario import BaseScenario
-
-
-class Scenario(BaseScenario):
-    goalDist = 5.0; #Currently the distance to landmark
-
-    def make_world(self):
-        world = World()	#World has agents and landmarks
-        # set any world properties first
-        world.dim_c = 0
-        num_agents = 2      #Change this to add agents
-        world.num_agents = num_agents
-        num_adversaries = 0
-        num_landmarks = num_agents
-        # add agents
-        world.agents = [Agent() for i in range(num_agents)]
-        for i, agent in enumerate(world.agents):
-            agent.name = 'agent %d' % i
-            agent.collide = False
-            agent.silent = True
-            agent.adversary = True if i < num_adversaries else False
-            agent.size = 0.15
-        # add landmarks
-        world.landmarks = [Landmark() for i in range(num_landmarks)]
-        for i, landmark in enumerate(world.landmarks):
-            landmark.name = 'landmark %d' % i
-            landmark.collide = False
-            landmark.movable = False
-            landmark.size = 0.08
-        # make initial conditions
-        self.reset_world(world)
-        return world
-
-    def reset_world(self, world):
-        # random properties for agents
-        world.agents[0].color = np.array([0.85, 0.35, 0.35])
-        for i in range(1, world.num_agents):
-            world.agents[i].color = np.array([0.35, 0.35, 0.85])
-        # random properties for landmarks
-        for i, landmark in enumerate(world.landmarks):
-            landmark.color = np.array([0.15, 0.15, 0.15])
-        for agent in world.agents:
-            agent.goal_a = goal
-        # set random initial states     TODO: Initialize agents + landmarks to set positions with 0 velocity
-        for i, agent in enumerate(world.agents):
-            agent.state.p_pos = np.array([i/2,0])
-            agent.state.p_vel = 0
-            agent.state.c = np.zeros(world.dim_c)
-        for i, landmark in enumerate(world.landmarks):
-            landmark.state.p_pos = np.array([i,goalDist])
-            landmark.state.p_vel = 0
-
-    def benchmark_data(self, agent, world):
-        # returns data for benchmarking purposes
-        if agent.adversary:
-            return np.sum(np.square(agent.state.p_pos - agent.goal_a.state.p_pos))
-        else:
-            dists = []
-            for l in world.landmarks:
-                dists.append(np.sum(np.square(agent.state.p_pos - l.state.p_pos)))
-            dists.append(np.sum(np.square(agent.state.p_pos - agent.goal_a.state.p_pos)))
-            return tuple(dists)
-
-    # return all agents that are not adversaries
-    def good_agents(self, world):
-        return [agent for agent in world.agents if not agent.adversary]
-
-    # # return all adversarial agents
-    # def adversaries(self, world):
-    #     return [agent for agent in world.agents if agent.adversary]
-
-    #Simplified to just distance from y = 5;
-    def reward(self, agent, world):
-        alpha = 0.5 #Can be adjusted to determine whether individual performance, or ranked importance is more important [0,1]
-        return alpha * agent.state.p_pos[1] - (1-alpha) * 1/(world.num_agents-1)*sum([other.state.p_pos[1] for other in world.agents if other is not agent])
-        #Right now + for distance - average of the distance covered by other agents.
-
-
-        # return self.agent_reward(agent,world)
-        # Agents are rewarded based on minimum agent distance to each landmark
-        # return self.adversary_reward(agent, world) if agent.adversary else self.agent_reward(agent, world)
-
-    # def agent_reward(self, agent, world):   #TODO: set reward to distance to their landmark, remove adversary stuff
-    #     # Rewarded based on how close any good agent is to the goal landmark, and how far the adversary is from it
-    #     shaped_reward = True
-    #     shaped_adv_reward = True
-
-    #     # # Calculate negative reward for adversary
-    #     # adversary_agents = self.adversaries(world)
-    #     # if shaped_adv_reward:  # distance-based adversary reward
-    #     #     adv_rew = sum([np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in adversary_agents])
-    #     # else:  # proximity-based adversary reward (binary)
-    #     #     adv_rew = 0
-    #     #     for a in adversary_agents:
-    #     #         if np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) < 2 * a.goal_a.size:
-    #     #             adv_rew -= 5
-
-    #     # Calculate positive reward for agents
-    #     good_agents = self.good_agents(world)
-    #     if shaped_reward:  # distance-based agent reward
-    #         pos_rew = -min(
-    #             [np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in good_agents])
-    #     else:  # proximity-based agent reward (binary)
-    #         pos_rew = 0
-    #         if min([np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in good_agents]) \
-    #                 < 2 * agent.goal_a.size:
-    #             pos_rew += 5
-    #         pos_rew -= min(
-    #             [np.sqrt(np.sum(np.square(a.state.p_pos - a.goal_a.state.p_pos))) for a in good_agents])
-    #     return pos_rew + adv_rew   #Rewards are a simple int
-
-    #Adversaries are given rewards
-    # def adversary_reward(self, agent, world):
-    #     # Rewarded based on proximity to the goal landmark
-    #     shaped_reward = True
-    #     if shaped_reward:  # distance-based reward
-    #         return -np.sum(np.square(agent.state.p_pos - agent.goal_a.state.p_pos))
-    #     else:  # proximity-based reward (binary)
-    #         adv_rew = 0
-    #         if np.sqrt(np.sum(np.square(agent.state.p_pos - agent.goal_a.state.p_pos))) < 2 * agent.goal_a.size:
-    #             adv_rew += 5
-    #         return adv_rew
-
-
-    #What is passed to the agent ie How they see the world
-    def observation(self, agent, world):
-        # get positions of all entities in this agent's reference frame
-        # entity_pos = []
-        # for entity in world.landmarks:
-        #     entity_pos.append(entity.state.p_pos - agent.state.p_pos)
-        entity_pos = [goalDist - agent.state.p_pos[1]] #Should only need the distance to it's own landmark goal
-
-
-        # communication of all other Agents
-        other_pos = []
-        for other in world.agents:
-            # if other is agent: continue
-            other_pos.append(goalDist - other.state.p_pos[1]) #Agents know how far other agents are from their goals
-
-        if not agent.adversary:
-            return np.concatenate([agent.goal_a.state.p_pos - agent.state.p_pos] + entity_pos + other_pos)
-        else:
-            return np.concatenate(entity_pos + other_pos)

From b82e19b2806f3396a6f9c769446ca6ef71f99917 Mon Sep 17 00:00:00 2001
From: jarbus <jarbus@tutanota.com>
Date: Thu, 25 Jul 2019 22:12:49 -0400
Subject: [PATCH 55/56] push cleanup

---
 multiagent/policy.py | 22 ----------------------
 1 file changed, 22 deletions(-)

diff --git a/multiagent/policy.py b/multiagent/policy.py
index ff2e8997a..fd94e6ac5 100644
--- a/multiagent/policy.py
+++ b/multiagent/policy.py
@@ -26,28 +26,6 @@ def __init__(self, env, agent_index):
 
     def action(self, obs):
         # ignore observation and just act based on keyboard events
-
-        
-        #x_axis = self.env.agents[self.agent_index].state.p_pos[0]
-        #y_axis = self.env.agents[self.agent_index].state.p_pos[1]
-
-        # if obs[2] < 0:
-        #     self.move[1] = True
-        # elif obs[2] > 0:
-        #     self.move[0] = True
-        # else:
-        #     self.move[0] = False
-        #     self.move[1] = False
-
-        # if obs[3] > 0:
-        #     self.move[3] = True
-        # elif obs[3] < 0:
-        #     self.move[2] = True
-        # else:
-        #     self.move[2] = False
-        #     self.move[3] = False
-        
-
         if self.env.discrete_action_input:
             u = 0
             if self.move[0]: u = 1

From 6ec57e79add813d36b345050d9512a87250d763b Mon Sep 17 00:00:00 2001
From: jarbus <jarbus@tutanota.com>
Date: Thu, 25 Jul 2019 22:17:33 -0400
Subject: [PATCH 56/56] push cleanup

---
 multiagent/policy.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/multiagent/policy.py b/multiagent/policy.py
index fd94e6ac5..9c9cc783b 100644
--- a/multiagent/policy.py
+++ b/multiagent/policy.py
@@ -34,12 +34,12 @@ def action(self, obs):
             if self.move[3]: u = 3
         else:
             u = np.zeros(5) # 5-d because of no-move action
-            if self.move[0]: u[1] += 0.01
-            if self.move[1]: u[2] += 0.01
-            if self.move[3]: u[3] += 0.01
-            if self.move[2]: u[4] += 0.01
+            if self.move[0]: u[1] += 1.0
+            if self.move[1]: u[2] += 1.0
+            if self.move[3]: u[3] += 1.0
+            if self.move[2]: u[4] += 1.0
             if True not in self.move:
-                u[0] += 0.01
+                u[0] += 1.0
         return np.concatenate([u, np.zeros(self.env.world.dim_c)])
 
     # keyboard event callbacks