Skip to content

Commit 9208cb0

Browse files
Summer ospp project 210370190 mid-term report (#449)
* update nfspagent's behavior of PreEpisodeStage * upload the draft mid-term blog * update files * correct the spell * update report * minor modifications * update report * update the maddpg experiment * update the report * update the date and usage * update the experiment * update BC and rl_learn! in nfsp * update the report * add the figure of MADDPG Co-authored-by: Jun Tian <tianjun.cpp@gmail.com>
1 parent 64c87bc commit 9208cb0

File tree

18 files changed

+555
-455
lines changed

18 files changed

+555
-455
lines changed

docs/experiments/experiments/NFSP/JuliaRL_NFSP_KuhnPoker.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ function RL.Experiment(
118118
rng,
119119
128, # update_freq
120120
0, # initial update_step
121-
true, # initial NFSPAgent's learn mode
121+
true, # initial NFSPAgent's training mode
122122
)) for player in players(wrapped_env) if player != chance_player(wrapped_env)
123123
)
124124
)

docs/experiments/experiments/Policy Gradient/JuliaRL_MADDPG_KuhnPoker.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,11 @@ function RL.Experiment(
4242
state_mapping = s -> [findfirst(==(s), state_space(env))],
4343
state_space_mapping = ss -> [[findfirst(==(s), state_space(env))] for s in state_space(env)]
4444
),
45-
## add a dummy action for the other agent.
45+
## drop the dummy action of the other agent.
4646
action_mapping = x -> length(x) == 1 ? x : Int(x[current_player(env)] + 1),
4747
)
48-
ns, na = 1, 1
49-
n_players = 2
48+
ns, na = 1, 1 # dimension of the state and action.
49+
n_players = 2 # number of players
5050

5151
init = glorot_uniform(rng)
5252

docs/homepage/blog/index.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
@def description = ""
33
@def is_enable_toc = false
44

5+
- [Implement Multi-Agent Reinforcement Learning Algorithms in Julia (Summer OSPP Project 210370190) Mid-term Report](/blog/ospp_mid-term_report_210370190)
6+
57
- [An Introduction to ReinforcementLearning.jl: Design, Implementations and Thoughts](/blog/an_introduction_to_reinforcement_learning_jl_design_implementations_thoughts)
68

79
- [Phase 1 Technical Report of Enriching Offline Reinforcement Learning Algorithms in ReinforcementLearning.jl](/blog/offline_reinforcement_learning_algorithm_phase1)
20 KB
Loading
21.8 KB
Loading
510 KB
Loading
244 KB
Loading
Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
@article{DBLP:journals/corr/LoweWTHAM17,
2+
author = {Ryan Lowe and
3+
Yi Wu and
4+
Aviv Tamar and
5+
Jean Harb and
6+
Pieter Abbeel and
7+
Igor Mordatch},
8+
title = {Multi-Agent Actor-Critic for Mixed Cooperative-Competitive Environments},
9+
journal = {CoRR},
10+
volume = {abs/1706.02275},
11+
year = {2017},
12+
url = {http://arxiv.org/abs/1706.02275},
13+
archivePrefix = {arXiv},
14+
eprint = {1706.02275},
15+
timestamp = {Mon, 13 Aug 2018 16:47:09 +0200},
16+
biburl = {https://dblp.org/rec/journals/corr/LoweWTHAM17.bib},
17+
bibsource = {dblp computer science bibliography, https://dblp.org}
18+
}
19+
@article{DBLP:journals/corr/FoersterFANW17,
20+
author = {Jakob N. Foerster and
21+
Gregory Farquhar and
22+
Triantafyllos Afouras and
23+
Nantas Nardelli and
24+
Shimon Whiteson},
25+
title = {Counterfactual Multi-Agent Policy Gradients},
26+
journal = {CoRR},
27+
volume = {abs/1705.08926},
28+
year = {2017},
29+
url = {http://arxiv.org/abs/1705.08926},
30+
archivePrefix = {arXiv},
31+
eprint = {1705.08926},
32+
timestamp = {Mon, 13 Aug 2018 16:47:20 +0200},
33+
biburl = {https://dblp.org/rec/journals/corr/FoersterFANW17.bib},
34+
bibsource = {dblp computer science bibliography, https://dblp.org}
35+
}
36+
@article{DBLP:journals/corr/HeinrichS16,
37+
author = {Johannes Heinrich and
38+
David Silver},
39+
title = {Deep Reinforcement Learning from Self-Play in Imperfect-Information
40+
Games},
41+
journal = {CoRR},
42+
volume = {abs/1603.01121},
43+
year = {2016},
44+
url = {http://arxiv.org/abs/1603.01121},
45+
archivePrefix = {arXiv},
46+
eprint = {1603.01121},
47+
timestamp = {Mon, 13 Aug 2018 16:47:19 +0200},
48+
biburl = {https://dblp.org/rec/journals/corr/HeinrichS16.bib},
49+
bibsource = {dblp computer science bibliography, https://dblp.org}
50+
}
51+
@article{DBLP:journals/corr/abs-1909-12823,
52+
author = {Paul Muller and
53+
Shayegan Omidshafiei and
54+
Mark Rowland and
55+
Karl Tuyls and
56+
Julien P{\'{e}}rolat and
57+
Siqi Liu and
58+
Daniel Hennes and
59+
Luke Marris and
60+
Marc Lanctot and
61+
Edward Hughes and
62+
Zhe Wang and
63+
Guy Lever and
64+
Nicolas Heess and
65+
Thore Graepel and
66+
R{\'{e}}mi Munos},
67+
title = {A Generalized Training Approach for Multiagent Learning},
68+
journal = {CoRR},
69+
volume = {abs/1909.12823},
70+
year = {2019},
71+
url = {http://arxiv.org/abs/1909.12823},
72+
archivePrefix = {arXiv},
73+
eprint = {1909.12823},
74+
timestamp = {Fri, 09 Oct 2020 09:46:58 +0200},
75+
biburl = {https://dblp.org/rec/journals/corr/abs-1909-12823.bib},
76+
bibsource = {dblp computer science bibliography, https://dblp.org}
77+
}
78+
@article{DBLP:journals/corr/abs-1903-05614,
79+
author = {Edward Lockhart and
80+
Marc Lanctot and
81+
Julien P{\'{e}}rolat and
82+
Jean{-}Baptiste Lespiau and
83+
Dustin Morrill and
84+
Finbarr Timbers and
85+
Karl Tuyls},
86+
title = {Computing Approximate Equilibria in Sequential Adversarial Games by
87+
Exploitability Descent},
88+
journal = {CoRR},
89+
volume = {abs/1903.05614},
90+
year = {2019},
91+
url = {http://arxiv.org/abs/1903.05614},
92+
archivePrefix = {arXiv},
93+
eprint = {1903.05614},
94+
timestamp = {Sun, 31 Mar 2019 19:01:24 +0200},
95+
biburl = {https://dblp.org/rec/journals/corr/abs-1903-05614.bib},
96+
bibsource = {dblp computer science bibliography, https://dblp.org}
97+
}
98+
@article{1406126,
99+
author={Shamma, J.S. and Arslan, G.},
100+
journal={IEEE Transactions on Automatic Control},
101+
title={Dynamic fictitious play, dynamic gradient play, and distributed convergence to Nash equilibria},
102+
year={2005},
103+
volume={50},
104+
number={3},
105+
pages={312-327},
106+
doi={10.1109/TAC.2005.843878}
107+
}
108+
@article{DBLP:journals/corr/abs-2104-10845,
109+
author = {Yuxuan Chen and
110+
Li Zhang and
111+
Shijian Li and
112+
Gang Pan},
113+
title = {Optimize Neural Fictitious Self-Play in Regret Minimization Thinking},
114+
journal = {CoRR},
115+
volume = {abs/2104.10845},
116+
year = {2021},
117+
url = {https://arxiv.org/abs/2104.10845},
118+
archivePrefix = {arXiv},
119+
eprint = {2104.10845},
120+
timestamp = {Tue, 27 Apr 2021 14:34:45 +0200},
121+
biburl = {https://dblp.org/rec/journals/corr/abs-2104-10845.bib},
122+
bibsource = {dblp computer science bibliography, https://dblp.org}
123+
}
124+
@article{8846699,
125+
author={Qie, Han and Shi, Dianxi and Shen, Tianlong and Xu, Xinhai and Li, Yuan and Wang, Liujing},
126+
journal={IEEE Access},
127+
title={Joint Optimization of Multi-UAV Target Assignment and Path Planning Based on Multi-Agent Reinforcement Learning},
128+
year={2019},
129+
volume={7},
130+
number={},
131+
pages={146264-146272},
132+
doi={10.1109/ACCESS.2019.2943253}
133+
}

0 commit comments

Comments
 (0)