Skip to content

Commit 61eef70

Browse files
committed
minor fix
1 parent 880b2f2 commit 61eef70

24 files changed

+98
-97
lines changed

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@ FROM julia:1.1
22

33
ADD . /RLIntro
44
WORKDIR /RLIntro
5-
RUN ["julia", "-e", "using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate(); pkg\"precompile\""]
5+
RUN ["julia", "-e", "using Pkg; Pkg.Registry.add(\"General\"); Pkg.Registry.add(RegistrySpec(url = \"https://github.com/Ju-jl/Registry.git\")); Pkg.add(\"Plots\"); Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate(); pkg\"precompile\""]
66
CMD ["julia"]

src/RLIntro.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ include("chapter13/chapter13.jl")
2222
function plot_all(fig_dir=".")
2323
for f in names(RLIntro)
2424
if startswith(string(f), "fig")
25-
@eval $f()
25+
@eval $f($fig_dir)
2626
end
2727
end
2828
end

src/chapter02/chapter02.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
@reexport module Chapter02TenArmedTestbed
22

3-
export fig_2_1, fig_2_2, fig_2_3, fig_2_4, fig_2_5, fig_2_6
3+
# export fig_2_1
4+
export fig_2_2, fig_2_3, fig_2_4, fig_2_5, fig_2_6
45

56
include("ten_armed_testbed.jl")
67

src/chapter02/ten_armed_testbed.jl

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -25,59 +25,59 @@ end
2525

2626
##############################
2727

28-
# function fig_2_1()
28+
# function fig_2_1(fig_dir=".")
2929
# env = MultiArmBanditsEnv()
3030
# f = render(env)
31-
# savefig(f, "figure_2_1.png")
31+
# savefig(f, joinpath(fig_dir, "figure_2_1.png"))
3232
# f
3333
# end
3434

3535

36-
function fig_2_2()
36+
function fig_2_2(fig_dir=".")
3737
learner(ϵ) = QLearner(TabularQ(1, 10), EpsilonGreedySelector(ϵ), 0., cached_inverse_decay())
3838
p = plot(layout=(2, 1), dpi=200)
3939
for ϵ in [0.1, 0.01, 0.0]
4040
stats = [bandit_testbed(learner(ϵ)) for _ in 1:2000]
4141
plot!(p, mean(x[1] for x in stats), subplot=1, legend=:bottomright, label="epsilon=")
4242
plot!(p, mean(x[2] for x in stats), subplot=2, legend=:bottomright, label="epsilon=")
4343
end
44-
savefig(p, "figure_2_2.png")
44+
savefig(p, joinpath(fig_dir, "figure_2_2.png"))
4545
p
4646
end
4747

48-
function fig_2_3()
48+
function fig_2_3(fig_dir=".")
4949
learner1() = QLearner(TabularQ(1, 10, 5.), EpsilonGreedySelector(0.0), 0., 0.1)
5050
learner2() = QLearner(TabularQ(1, 10), EpsilonGreedySelector(0.1), 0., 0.1)
5151
p = plot(legend=:bottomright, dpi=200)
5252
plot!(p, mean(bandit_testbed(learner1())[2] for _ in 1:2000), label="Q_1=5, epsilon=0.")
5353
plot!(p, mean(bandit_testbed(learner2())[2] for _ in 1:2000), label="Q_1=0, epsilon=0.1")
54-
savefig(p, "figure_2_3.png")
54+
savefig(p, joinpath(fig_dir, "figure_2_3.png"))
5555
p
5656
end
5757

58-
function fig_2_4()
58+
function fig_2_4(fig_dir=".")
5959
learner1() = QLearner(TabularQ(1, 10), UpperConfidenceBound(10), 0., 0.1)
6060
learner2() = QLearner(TabularQ(1, 10), EpsilonGreedySelector(0.1), 0., 0.1)
6161
p = plot(legend=:bottomright, dpi=200)
6262
plot!(p, mean(bandit_testbed(learner1())[1] for _ in 1:2000), label="UpperConfidenceBound, c=2")
6363
plot!(p, mean(bandit_testbed(learner2())[1] for _ in 1:2000), label="epsilon-greedy, epsilon=0.1")
64-
savefig(p, "figure_2_4.png")
64+
savefig(p, joinpath(fig_dir, "figure_2_4.png"))
6565
p
6666
end
6767

68-
function fig_2_5()
68+
function fig_2_5(fig_dir=".")
6969
learner(alpha, baseline) = GradientBanditLearner(TabularQ(1, 10), WeightedSample(), alpha, baseline)
7070
truevalue = 4.0
7171
p = plot(legend=:bottomright, dpi=200)
7272
plot!(p, mean(bandit_testbed(learner(0.1, sample_avg()), truevalue)[2] for _ in 1:2000), label="alpha = 0.1, with baseline")
7373
plot!(p, mean(bandit_testbed(learner(0.4, sample_avg()), truevalue)[2] for _ in 1:2000), label="alpha = 0.4, with baseline")
7474
plot!(p, mean(bandit_testbed(learner(0.1, 0.), truevalue)[2] for _ in 1:2000), label="alpha = 0.1, without baseline")
7575
plot!(p, mean(bandit_testbed(learner(0.4, 0.), truevalue)[2] for _ in 1:2000), label="alpha = 0.4, without baseline")
76-
savefig(p, "figure_2_5.png")
76+
savefig(p, joinpath(fig_dir, "figure_2_5.png"))
7777
p
7878
end
7979

80-
function fig_2_6()
80+
function fig_2_6(fig_dir=".")
8181
ϵ_greedy_learner(ϵ) = QLearner(TabularQ(1, 10), EpsilonGreedySelector(ϵ), 0., cached_inverse_decay())
8282
gradient_learner(alpha) = GradientBanditLearner(TabularQ(1, 10), WeightedSample(), alpha, sample_avg())
8383
UpperConfidenceBound_learner(c) = QLearner(TabularQ(1, 10), UpperConfidenceBound(10, c), 0., cached_inverse_decay())
@@ -88,6 +88,6 @@ function fig_2_6()
8888
plot!(p, -5:1, [mean(mean(bandit_testbed(gradient_learner(2.0^i))[1] for _ in 1:2000)) for i in -5:1], label="gradient")
8989
plot!(p, -4:2, [mean(mean(bandit_testbed(UpperConfidenceBound_learner(2.0^i))[1] for _ in 1:2000)) for i in -4:2], label="UCB")
9090
plot!(p, -2:2, [mean(mean(bandit_testbed(greedy_with_init_learner(2.0^i))[1] for _ in 1:2000)) for i in -2:2], label="greedy with initialization")
91-
savefig(p, "figure_2_6.png")
91+
savefig(p, joinpath(fig_dir, "figure_2_6.png"))
9292
p
9393
end

src/chapter03/grid_world.jl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,18 +30,18 @@ const GridWorldActions = [CartesianIndex(-1, 0),
3030

3131
const GridWorldEnvModel = DeterministicDistributionModel([nextstep(GridWorldCartesianIndices[s], a) for s in 1:25, a in GridWorldActions])
3232

33-
function fig_3_2()
33+
function fig_3_2(fig_dir=".")
3434
V, π = TabularV(25), RandomPolicy(fill(0.25, 25, 4))
3535
policy_evaluation!(V, π, GridWorldEnvModel)
3636
p = heatmap(1:5, 1:5, reshape(V.table, 5,5), yflip=true)
37-
savefig(p, "figure_3_2.png")
37+
savefig(p, joinpath(fig_dir, "figure_3_2.png"))
3838
p
3939
end
4040

41-
function fig_3_5()
41+
function fig_3_5(fig_dir=".")
4242
V, π = TabularV(25), DeterministicPolicy(rand(1:4, 25), 4)
4343
policy_iteration!(V, π, GridWorldEnvModel)
4444
p = heatmap(1:5, 1:5, reshape(V.table, 5,5), yflip=true)
45-
savefig(p, "figure_3_5.png")
45+
savefig(p, joinpath(fig_dir, "figure_3_5.png"))
4646
p
4747
end

src/chapter04/car_rental.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,12 +48,12 @@ end
4848

4949
const CarRentalEnvModel = DeterministicDistributionModel([nextstep(s, a) for s in 1:(MaxCars+1)^2, a in 1:length(Actions)])
5050

51-
function fig_4_2(max_iter=100)
51+
function fig_4_2(fig_dir=".", max_iter=100)
5252
V, π = TabularV((1+MaxCars)^2), DeterministicPolicy(zeros(Int,21^2), length(Actions))
5353
policy_iteration!(V, π, CarRentalEnvModel; γ=0.9, max_iter=max_iter)
5454
p1 = heatmap(0:MaxCars, 0:MaxCars, reshape([decode_action(x) for x in π.table], 1+MaxCars,1+MaxCars))
55-
savefig(p1, "figure_4_2_policy.png")
55+
savefig(p1, joinpath(fig_dir, "figure_4_2_policy.png"))
5656
p2 = heatmap(0:MaxCars, 0:MaxCars, reshape(V.table, 1+MaxCars,1+MaxCars))
57-
savefig(p2, "figure_4_2_value.png")
57+
savefig(p2, joinpath(fig_dir, "figure_4_2_value.png"))
5858
p1, p2
5959
end

src/chapter04/gambler_problem.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,10 @@ end
2626

2727
const GamblerProblemEnvModel = DeterministicDistributionModel([nextstep(s, a) for s in 1:(WinCapital+1), a in 1:WinCapital])
2828

29-
function fig_4_3(max_iter=typemax(Int))
29+
function fig_4_3(fig_dir=".", max_iter=typemax(Int))
3030
V = TabularV(1+WinCapital)
3131
value_iteration!(V, GamblerProblemEnvModel; γ=1.0, max_iter=max_iter)
3232
p = plot(V.table[2:end-1])
33-
savefig(p, "figure_4_3.png")
33+
savefig(p, joinpath(fig_dir, "figure_4_3.png"))
3434
p
3535
end

src/chapter04/grid_world.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,10 @@ const GridWorldActions = [CartesianIndex(-1, 0),
2525

2626
const GridWorldEnvModel = DeterministicDistributionModel([nextstep(GridWorldCartesianIndices[s], a) for s in 1:16, a in GridWorldActions])
2727

28-
function fig_4_1()
28+
function fig_4_1(fig_dir=".")
2929
V, π = TabularV(16), RandomPolicy(fill(0.25, 16, 4))
3030
policy_evaluation!(V, π, GridWorldEnvModel; γ=1.0)
3131
p = heatmap(1:4, 1:4, reshape(V.table, 4,4), yflip=true)
32-
savefig(p, "figure_4_1.png")
32+
savefig(p, joinpath(fig_dir, "figure_4_1.png"))
3333
p
3434
end

src/chapter05/blackjack.jl

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ const player_policy = begin
1818
DeterministicPolicy(table, 2)
1919
end
2020

21-
function fig_5_1(n=10000)
21+
function fig_5_1(fig_dir=".", n=10000)
2222
agent = Agent(MonteCarloLearner(TabularV(length(Indices)), player_policy, 1.0),
2323
EpisodeSARDBuffer(),
2424
preprocessor)
@@ -29,13 +29,13 @@ function fig_5_1(n=10000)
2929
for dealer_card in 2:11, player_sum in 11:21]
3030
p1 = heatmap(usable_ace_values)
3131
p2 = heatmap(no_usable_ace_values)
32-
savefig(p1, "figure_5_1_usable_ace_n_$n.png")
33-
savefig(p2, "figure_5_1_no_usable_ace_n_$n.png")
32+
savefig(p1, joinpath(fig_dir, "figure_5_1_usable_ace_n_$n.png"))
33+
savefig(p2, joinpath(fig_dir, "figure_5_1_no_usable_ace_n_$n.png"))
3434
p1, p2
3535
end
3636

3737
"TODO: WARNING!!! result is not the same with the implementation by Python"
38-
function fig_5_2(n=1000000)
38+
function fig_5_2(fig_dir=".", n=1000000)
3939
agent = Agent(MonteCarloExploringStartLearner(TabularQ(length(Indices), length(actionspace(BlackJackEnv))),
4040
player_policy,
4141
RandomPolicy(fill(0.5, length(Indices), length(actionspace(BlackJackEnv)))),
@@ -56,14 +56,14 @@ function fig_5_2(n=1000000)
5656
p2 = heatmap(no_usable_ace_values)
5757
p3 = heatmap(usable_ace_policy)
5858
p4 = heatmap(no_usable_ace_policy)
59-
savefig(p1, "figure_5_2_usable_ace_n_$n.png")
60-
savefig(p2, "figure_5_2_no_usable_ace_n_$n.png")
61-
savefig(p3, "figure_5_2_usable_ace_policy_n_$n.png")
62-
savefig(p4, "figure_5_2_no_usable_ace_policy_n_$n.png")
59+
savefig(p1, joinpath(fig_dir, "figure_5_2_usable_ace_n_$n.png"))
60+
savefig(p2, joinpath(fig_dir, "figure_5_2_no_usable_ace_n_$n.png"))
61+
savefig(p3, joinpath(fig_dir, "figure_5_2_usable_ace_policy_n_$n.png"))
62+
savefig(p4, joinpath(fig_dir, "figure_5_2_no_usable_ace_policy_n_$n.png"))
6363
p1, p2, p3, p4
6464
end
6565

66-
function fig_5_3(n=10000)
66+
function fig_5_3(fig_dir=".", n=10000)
6767
init_internal_state = [1, 13, 2]
6868
s = preprocessor(BlackJack.encode(init_internal_state...))
6969

@@ -93,6 +93,6 @@ function fig_5_3(n=10000)
9393
end
9494
p = plot(mean((run() .- (-0.27726)).^2 for _ in 1:100), label="Weighted Importance Sampling")
9595
p = plot!(p, mean((run(:OrdinaryImportanceSampling) .- (-0.27726)).^2 for _ in 1:100), xscale=:log10, label="Ordinary Importance Sampling")
96-
savefig(p, "figure_5_3.png")
96+
savefig(p, joinpath(fig_dir, "figure_5_3.png"))
9797
p
9898
end

src/chapter05/leftright.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ gr()
66

77

88

9-
function fig_5_4()
9+
function fig_5_4(fig_dir=".")
1010
function value_collect()
1111
values = []
1212
function f(env, agent)
@@ -31,6 +31,6 @@ function fig_5_4()
3131
train!(LeftRightEnv(), agent; callbacks = callbacks)
3232
plot!(p, callbacks[2](), xscale = :log10)
3333
end
34-
savefig(p, "figure_5_4.png")
34+
savefig(p, joinpath(fig_dir, "figure_5_4.png"))
3535
p
3636
end

0 commit comments

Comments
 (0)