JuliaReinforcementLearning
diff --git a/‎docs/experiments/experiments/NFSP/JuliaRL_NFSP_KuhnPoker.jl‎
Lines changed: 1 addition & 1 deletion b/‎docs/experiments/experiments/NFSP/JuliaRL_NFSP_KuhnPoker.jl‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/experiments/experiments/Policy Gradient/JuliaRL_MADDPG_KuhnPoker.jl‎
Lines changed: 3 additions & 3 deletions b/‎docs/experiments/experiments/Policy Gradient/JuliaRL_MADDPG_KuhnPoker.jl‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎docs/homepage/blog/index.md‎
Lines changed: 2 additions & 0 deletions b/‎docs/homepage/blog/index.md‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎docs/homepage/blog/ospp_mid-term_report_210370190/JuliaRL_MADDPG_KuhnPoker.png‎
20 KB b/‎docs/homepage/blog/ospp_mid-term_report_210370190/JuliaRL_MADDPG_KuhnPoker.png‎
20 KB
diff --git a/‎docs/homepage/blog/ospp_mid-term_report_210370190/JuliaRL_NFSP_KuhnPoker.png‎
21.8 KB b/‎docs/homepage/blog/ospp_mid-term_report_210370190/JuliaRL_NFSP_KuhnPoker.png‎
21.8 KB
diff --git a/‎docs/homepage/blog/ospp_mid-term_report_210370190/MADDPG.png‎
510 KB b/‎docs/homepage/blog/ospp_mid-term_report_210370190/MADDPG.png‎
510 KB
diff --git a/‎docs/homepage/blog/ospp_mid-term_report_210370190/NFSP.png‎
244 KB b/‎docs/homepage/blog/ospp_mid-term_report_210370190/NFSP.png‎
244 KB
diff --git a/‎docs/homepage/blog/ospp_mid-term_report_210370190/bibliography.bib‎
Lines changed: 133 additions & 0 deletions b/‎docs/homepage/blog/ospp_mid-term_report_210370190/bibliography.bib‎
Lines changed: 133 additions & 0 deletions
@@ -118,7 +118,7 @@ function RL.Experiment(
                 rng,
                 128, # update_freq
                 0, # initial update_step
-                true, # initial NFSPAgent's learn mode
+                true, # initial NFSPAgent's training mode
             )) for player in players(wrapped_env) if player != chance_player(wrapped_env)
         )
     )
 
@@ -42,11 +42,11 @@ function RL.Experiment(
             state_mapping = s -> [findfirst(==(s), state_space(env))],
             state_space_mapping = ss -> [[findfirst(==(s), state_space(env))] for s in state_space(env)]
             ),
-        ## add a dummy action for the other agent.
+        ## drop the dummy action of the other agent.
         action_mapping = x -> length(x) == 1 ? x : Int(x[current_player(env)] + 1),
     )
-    ns, na = 1, 1
-    n_players = 2
+    ns, na = 1, 1 # dimension of the state and action.
+    n_players = 2 # number of players
 
     init = glorot_uniform(rng)
 
 
@@ -2,6 +2,8 @@
 @def description = ""
 @def is_enable_toc = false
 
+- [Implement Multi-Agent Reinforcement Learning Algorithms in Julia (Summer OSPP Project 210370190) Mid-term Report](/blog/ospp_mid-term_report_210370190)
+
 - [An Introduction to ReinforcementLearning.jl: Design, Implementations and Thoughts](/blog/an_introduction_to_reinforcement_learning_jl_design_implementations_thoughts)
 
 - [Phase 1 Technical Report of Enriching Offline Reinforcement Learning Algorithms in ReinforcementLearning.jl](/blog/offline_reinforcement_learning_algorithm_phase1)
 
@@ -0,0 +1,133 @@
+@article{DBLP:journals/corr/LoweWTHAM17,
+  author    = {Ryan Lowe and
+               Yi Wu and
+               Aviv Tamar and
+               Jean Harb and
+               Pieter Abbeel and
+               Igor Mordatch},
+  title     = {Multi-Agent Actor-Critic for Mixed Cooperative-Competitive Environments},
+  journal   = {CoRR},
+  volume    = {abs/1706.02275},
+  year      = {2017},
+  url       = {http://arxiv.org/abs/1706.02275},
+  archivePrefix = {arXiv},
+  eprint    = {1706.02275},
+  timestamp = {Mon, 13 Aug 2018 16:47:09 +0200},
+  biburl    = {https://dblp.org/rec/journals/corr/LoweWTHAM17.bib},
+  bibsource = {dblp computer science bibliography, https://dblp.org}
+}
+@article{DBLP:journals/corr/FoersterFANW17,
+  author    = {Jakob N. Foerster and
+               Gregory Farquhar and
+               Triantafyllos Afouras and
+               Nantas Nardelli and
+               Shimon Whiteson},
+  title     = {Counterfactual Multi-Agent Policy Gradients},
+  journal   = {CoRR},
+  volume    = {abs/1705.08926},
+  year      = {2017},
+  url       = {http://arxiv.org/abs/1705.08926},
+  archivePrefix = {arXiv},
+  eprint    = {1705.08926},
+  timestamp = {Mon, 13 Aug 2018 16:47:20 +0200},
+  biburl    = {https://dblp.org/rec/journals/corr/FoersterFANW17.bib},
+  bibsource = {dblp computer science bibliography, https://dblp.org}
+}
+@article{DBLP:journals/corr/HeinrichS16,
+  author    = {Johannes Heinrich and
+               David Silver},
+  title     = {Deep Reinforcement Learning from Self-Play in Imperfect-Information
+               Games},
+  journal   = {CoRR},
+  volume    = {abs/1603.01121},
+  year      = {2016},
+  url       = {http://arxiv.org/abs/1603.01121},
+  archivePrefix = {arXiv},
+  eprint    = {1603.01121},
+  timestamp = {Mon, 13 Aug 2018 16:47:19 +0200},
+  biburl    = {https://dblp.org/rec/journals/corr/HeinrichS16.bib},
+  bibsource = {dblp computer science bibliography, https://dblp.org}
+}
+@article{DBLP:journals/corr/abs-1909-12823,
+  author    = {Paul Muller and
+               Shayegan Omidshafiei and
+               Mark Rowland and
+               Karl Tuyls and
+               Julien P{\'{e}}rolat and
+               Siqi Liu and
+               Daniel Hennes and
+               Luke Marris and
+               Marc Lanctot and
+               Edward Hughes and
+               Zhe Wang and
+               Guy Lever and
+               Nicolas Heess and
+               Thore Graepel and
+               R{\'{e}}mi Munos},
+  title     = {A Generalized Training Approach for Multiagent Learning},
+  journal   = {CoRR},
+  volume    = {abs/1909.12823},
+  year      = {2019},
+  url       = {http://arxiv.org/abs/1909.12823},
+  archivePrefix = {arXiv},
+  eprint    = {1909.12823},
+  timestamp = {Fri, 09 Oct 2020 09:46:58 +0200},
+  biburl    = {https://dblp.org/rec/journals/corr/abs-1909-12823.bib},
+  bibsource = {dblp computer science bibliography, https://dblp.org}
+}
+@article{DBLP:journals/corr/abs-1903-05614,
+  author    = {Edward Lockhart and
+               Marc Lanctot and
+               Julien P{\'{e}}rolat and
+               Jean{-}Baptiste Lespiau and
+               Dustin Morrill and
+               Finbarr Timbers and
+               Karl Tuyls},
+  title     = {Computing Approximate Equilibria in Sequential Adversarial Games by
+               Exploitability Descent},
+  journal   = {CoRR},
+  volume    = {abs/1903.05614},
+  year      = {2019},
+  url       = {http://arxiv.org/abs/1903.05614},
+  archivePrefix = {arXiv},
+  eprint    = {1903.05614},
+  timestamp = {Sun, 31 Mar 2019 19:01:24 +0200},
+  biburl    = {https://dblp.org/rec/journals/corr/abs-1903-05614.bib},
+  bibsource = {dblp computer science bibliography, https://dblp.org}
+}
+@article{1406126,
+  author={Shamma, J.S. and Arslan, G.},
+  journal={IEEE Transactions on Automatic Control}, 
+  title={Dynamic fictitious play, dynamic gradient play, and distributed convergence to Nash equilibria}, 
+  year={2005},
+  volume={50},
+  number={3},
+  pages={312-327},
+  doi={10.1109/TAC.2005.843878}
+}
+@article{DBLP:journals/corr/abs-2104-10845,
+  author    = {Yuxuan Chen and
+               Li Zhang and
+               Shijian Li and
+               Gang Pan},
+  title     = {Optimize Neural Fictitious Self-Play in Regret Minimization Thinking},
+  journal   = {CoRR},
+  volume    = {abs/2104.10845},
+  year      = {2021},
+  url       = {https://arxiv.org/abs/2104.10845},
+  archivePrefix = {arXiv},
+  eprint    = {2104.10845},
+  timestamp = {Tue, 27 Apr 2021 14:34:45 +0200},
+  biburl    = {https://dblp.org/rec/journals/corr/abs-2104-10845.bib},
+  bibsource = {dblp computer science bibliography, https://dblp.org}
+}
+@article{8846699,
+  author={Qie, Han and Shi, Dianxi and Shen, Tianlong and Xu, Xinhai and Li, Yuan and Wang, Liujing},
+  journal={IEEE Access}, 
+  title={Joint Optimization of Multi-UAV Target Assignment and Path Planning Based on Multi-Agent Reinforcement Learning}, 
+  year={2019},
+  volume={7},
+  number={},
+  pages={146264-146272},
+  doi={10.1109/ACCESS.2019.2943253}
+}
Original file line number	Diff line number	Diff line change
`@@ -118,7 +118,7 @@ function RL.Experiment(`
`118`	`118`	`rng,`
`119`	`119`	`128, # update_freq`
`120`	`120`	`0, # initial update_step`
`121`		`- true, # initial NFSPAgent's learn mode`
	`121`	`+ true, # initial NFSPAgent's training mode`
`122`	`122`	`)) for player in players(wrapped_env) if player != chance_player(wrapped_env)`
`123`	`123`	`)`
`124`	`124`	`)`