add tests

findmyway · findmyway · commit 1ff0d8fb46b9 · 2022-06-10T12:40:36.000+08:00
diff --git a/src/common/common.jl b/src/common/common.jl
@@ -8,6 +8,7 @@ const AA = (:action, :next_action)
 const RT = (:reward, :terminal)
 const SSART = (SS..., :action, RT...)
 const SSAART = (SS..., AA..., RT...)
+const SSLART = (SS..., :legal_actions_mask, :action, RT...)
 const SSLLAART = (SS..., LL..., AA..., RT...)
 
 include("sum_tree.jl")
diff --git a/src/normalization.jl b/src/normalization.jl
@@ -15,7 +15,7 @@ end
 
 #Treats last dim as batch dim
 function OnlineStats.fit!(n::Normalizer, data::AbstractArray)
-    for d in eachslice(data, dims = ndims(data))
+    for d in eachslice(data, dims=ndims(data))
         fit!(n.os, vec(d))
     end
     n
@@ -72,17 +72,17 @@ function normalize(os::Group{<:AbstractVector{<:Moments}}, x::AbstractVector)
     return (x .- m) ./ s
 end
 
-function normalize(os::Group{<:AbstractVector{<:Moments}}, x::AbstractArray) 
+function normalize(os::Group{<:AbstractVector{<:Moments}}, x::AbstractArray)
     xn = similar(x)
-    for (i, slice) in enumerate(eachslice(x, dims = ndims(x)))
-        xn[repeat([:], ndims(x)-1)..., i] .= reshape(normalize(os, vec(slice)), size(x)[1:end-1]...) 
+    for (i, slice) in enumerate(eachslice(x, dims=ndims(x)))
+        xn[repeat([:], ndims(x) - 1)..., i] .= reshape(normalize(os, vec(slice)), size(x)[1:end-1]...)
     end
     return xn
 end
 
 function normalize(os::Group{<:AbstractVector{<:Moments}}, x::AbstractVector{<:AbstractArray})
     xn = similar(x)
-    for (i,el) in enumerate(x)
+    for (i, el) in enumerate(x)
         xn[i] = normalize(os, vec(el))
     end
     return xn
@@ -96,7 +96,7 @@ have equal weights in the computation of the moments.
 See the [OnlineStats documentation](https://joshday.github.io/OnlineStats.jl/stable/weights/) 
 to use variants such as exponential weights to favor the most recent observations.
 """
-scalar_normalizer(; weight::Weight = EqualWeight()) = Normalizer(Moments(weight = weight))
+scalar_normalizer(; weight::Weight=EqualWeight()) = Normalizer(Moments(weight=weight))
 
 """
     array_normalizer(size::Tuple{Int}; weights = OnlineStats.EqualWeight())
@@ -108,7 +108,7 @@ By default, all samples have equal weights in the computation of the moments.
 See the [OnlineStats documentation](https://joshday.github.io/OnlineStats.jl/stable/weights/) 
 to use variants such as exponential weights to favor the most recent observations.
 """
-array_normalizer(size::NTuple{N,Int}; weight::Weight = EqualWeight()) where N = Normalizer(Group([Moments(weight = weight) for _ in 1:prod(size)]))
+array_normalizer(size::NTuple{N,Int}; weight::Weight=EqualWeight()) where {N} = Normalizer(Group([Moments(weight=weight) for _ in 1:prod(size)]))
 
 """
     NormalizedTraces(traces::AbstractTraces, normalizers::NamedTuple)
@@ -142,12 +142,12 @@ traj = Trajectory(
 )
 ```
 """
-struct NormalizedTraces{names, TT, T <: AbstractTraces{names, TT}, normnames, N} <: AbstractTraces{names, TT}
+struct NormalizedTraces{names,TT,T<:AbstractTraces{names,TT},normnames,N} <: AbstractTraces{names,TT}
     traces::T
-    normalizers::NamedTuple{normnames, N}
-end 
+    normalizers::NamedTuple{normnames,N}
+end
 
-function NormalizedTraces(traces::AbstractTraces{names, TT}; trace_normalizer_pairs...) where names where TT
+function NormalizedTraces(traces::AbstractTraces{names,TT}; trace_normalizer_pairs...) where {names} where {TT}
     for key in keys(trace_normalizer_pairs)
         @assert key in keys(traces) "Traces do not have key $key, valid keys are $(keys(traces))."
     end
@@ -160,11 +160,11 @@ function NormalizedTraces(traces::AbstractTraces{names, TT}; trace_normalizer_pa
             else #if not then one is missing
                 present_key = only(intersect(keys(trace), keys(trace_normalizer_pairs)))
                 absent_key = only(setdiff(keys(trace), keys(trace_normalizer_pairs)))
-                nt = merge(nt, (;(absent_key => nt[present_key],)...)) #assign the same normalizer
+                nt = merge(nt, (; (absent_key => nt[present_key],)...)) #assign the same normalizer
             end
         end
     end
-    NormalizedTraces{names, TT, typeof(traces), keys(nt), typeof(values(nt))}(traces, nt)
+    NormalizedTraces{names,TT,typeof(traces),keys(nt),typeof(values(nt))}(traces, nt)
 end
 
 function Base.show(io::IO, ::MIME"text/plain", t::NormalizedTraces{names,T}) where {names,T}
@@ -193,6 +193,6 @@ end
 
 function sample(s::BatchSampler, nt::NormalizedTraces, names)
     inds = rand(s.rng, 1:length(nt), s.batch_size)
-    maybe_normalize(data, key) = key in keys(nt.normalizers) ? normalize(nt.normalizers[key], data) : data 
-    NamedTuple{names}(s.transformer(maybe_normalize(nt[x][inds], x) for x in names))
+    maybe_normalize(data, key) = key in keys(nt.normalizers) ? normalize(nt.normalizers[key], data) : data
+    NamedTuple{names}(maybe_normalize(nt[x][inds], x) for x in names)
 end
diff --git a/src/samplers.jl b/src/samplers.jl
@@ -91,49 +91,49 @@ sample(m::MultiBatchSampler, t) = [sample(m.sampler, t) for _ in 1:m.n]
 
 export NStepBatchSampler
 
-Base.@kwdef mutable struct NStepBatchSampler{traces}
+mutable struct NStepBatchSampler{traces}
     n::Int # !!! n starts from 1
     γ::Float32
-    batch_size::Int = 32
-    stack_size::Union{Nothing,Int} = nothing
-    rng::Any = Random.GLOBAL_RNG
+    batch_size::Int
+    stack_size::Union{Nothing,Int}
+    rng::Any
 end
 
-select_last_dim(xs::AbstractArray{T,N}, inds) where {T,N} = @views xs[ntuple(_ -> (:), Val(N - 1))..., inds]
-select_last_frame(xs::AbstractArray{T,N}) where {T,N} = select_last_dim(xs, size(xs, N))
-
-consecutive_view(cb, inds; n_stack=nothing, n_horizon=nothing) = consecutive_view(cb, inds, n_stack, n_horizon)
-consecutive_view(cb, inds, ::Nothing, ::Nothing) = select_last_dim(cb, inds)
-consecutive_view(cb, inds, n_stack::Int, ::Nothing) = select_last_dim(cb, [x + i for i in -n_stack+1:0, x in inds])
-consecutive_view(cb, inds, ::Nothing, n_horizon::Int) = select_last_dim(cb, [x + j for j in 0:n_horizon-1, x in inds])
-consecutive_view(cb, inds, n_stack::Int, n_horizon::Int) = select_last_dim(cb, [x + i + j for i in -n_stack+1:0, j in 0:n_horizon-1, x in inds])
+NStepBatchSampler(; kw...) = NStepBatchSampler{SSART}(; kw...)
+NStepBatchSampler{names}(; n, γ, batch_size=32, stack_size=nothing, rng=Random.GLOBAL_RNG) where {names} = NStepBatchSampler{names}(n, γ, batch_size, stack_size, rng)
 
 function sample(s::NStepBatchSampler{names}, ts) where {names}
     valid_range = isnothing(s.stack_size) ? (1:(length(ts)-s.n+1)) : (s.stack_size:(length(ts)-s.n+1))# think about the exteme case where s.stack_size == 1 and s.n == 1
     inds = rand(s.rng, valid_range, s.batch_size)
     sample(s, ts, Val(names), inds)
 end
 
-function sample(s::NStepBatchSampler, ts, ::Val{SSART}, inds)
-    s = consecutive_view(ts[:state], inds; n_stack=s.stack_size)
-    s′ = consecutive_view(ts[:next_state], inds .+ (s.n - 1); n_stack=s.stack_size)
-    a = consecutive_view(ts[:action], inds)
-    t_horizon = consecutive_view(ts[:terminal], inds; n_horizon=s.n)
-    r_horizon = consecutive_view(ts[:reward], inds; n_horizon=s.n)
+function sample(nbs::NStepBatchSampler, ts, ::Val{SSART}, inds)
+    if isnothing(nbs.stack_size)
+        s = ts[:state][inds]
+        s′ = ts[:next_state][inds.+(nbs.n-1)]
+    else
+        s = ts[:state][[x + i for i in -nbs.stack_size+1:0, x in inds]]
+        s′ = ts[:next_state][[x + nbs.n - 1 + i for i in -nbs.stack_size+1:0, x in inds]]
+    end
+
+    a = ts[:action][inds]
+    t_horizon = ts[:terminal][[x + j for j in 0:nbs.n-1, x in inds]]
+    r_horizon = ts[:reward][[x + j for j in 0:nbs.n-1, x in inds]]
 
     @assert ndims(t_horizon) == 2
-    t = any(t_horizon, dims=1)
+    t = any(t_horizon, dims=1) |> vec
 
     @assert ndims(r_horizon) == 2
     r = map(eachcol(r_horizon), eachcol(t_horizon)) do r⃗, t⃗
-        foldr((init, (rr, tt)) -> rr + f.γ * init * (1 - tt), zip(r⃗, t⃗); init=0.0f0)
+        foldr(((rr, tt), init) -> rr + nbs.γ * init * (1 - tt), zip(r⃗, t⃗); init=0.0f0)
     end
 
-    NamedTuple{names}(s, s′, a, r, t)
+    NamedTuple{SSART}((s, s′, a, r, t))
 end
 
 function sample(s::NStepBatchSampler, ts, ::Val{SSLART}, inds)
-    s, s′, a, r, t = sample(s, ts, Val(SSART), inds),
+    s, s′, a, r, t = sample(s, ts, Val(SSART), inds)
     l = consecutive_view(ts[:legal_actions_mask], inds)
-    NamedTuple{SSLART}(s, s′, l, a, r, t)
+    NamedTuple{SSLART}((s, s′, l, a, r, t))
 end
diff --git a/test/samplers.jl b/test/samplers.jl
@@ -58,4 +58,71 @@ end
     @test length(batches[1][:policy][:a]) == 3
     @test length(batches[1][:critic]) == 2 # we sampled 2 batches for critic
     @test length(batches[1][:critic][1][:b]) == 5 #each batch is 5 samples 
-end
+end
+
+#! format: off
+@testset "NStepSampler" begin
+    γ = 0.9
+    n_stack = 2
+    n_horizon = 3
+    batch_size = 4
+
+    t1 = MultiplexTraces{(:state, :next_state)}(1:10) +
+        MultiplexTraces{(:action, :next_action)}(iseven.(1:10)) +
+        Traces(
+            reward=1:9,
+            terminal=Bool[0, 0, 0, 1, 0, 0, 0, 0, 1],
+        )
+
+    s1 = NStepBatchSampler(n=n_horizon, γ=γ, stack_size=n_stack, batch_size=batch_size)
+
+    xs = RLTrajectories.sample(s1, t1)
+
+    @test size(xs.state) == (n_stack, batch_size)
+    @test size(xs.next_state) == (n_stack, batch_size)
+    @test size(xs.action) == (batch_size,)
+    @test size(xs.reward) == (batch_size,)
+    @test size(xs.terminal) == (batch_size,)
+
+    
+    state_size = (2,3)
+    n_state = reduce(*, state_size)
+    total_length = 10
+    t2 = MultiplexTraces{(:state, :next_state)}(
+            reshape(1:n_state * total_length, state_size..., total_length)
+        ) +
+        MultiplexTraces{(:action, :next_action)}(iseven.(1:total_length)) +
+        Traces(
+            reward=1:total_length-1,
+            terminal=Bool[0, 0, 0, 1, 0, 0, 0, 0, 1],
+        )
+
+    xs2 = RLTrajectories.sample(s1, t2)
+
+    @test size(xs2.state) == (state_size..., n_stack, batch_size)
+    @test size(xs2.next_state) == (state_size..., n_stack, batch_size)
+    @test size(xs2.action) == (batch_size,)
+    @test size(xs2.reward) == (batch_size,)
+    @test size(xs2.terminal) == (batch_size,)
+
+    inds = [3, 5, 7]
+    xs3 = RLTrajectories.sample(s1, t2, Val(SSART), inds)
+
+    @test xs3.state == cat(
+        (
+            reshape(n_state * (i-n_stack)+1: n_state * i, state_size..., n_stack)
+            for i in inds
+        )...
+        ;dims=length(state_size) + 2
+    ) 
+
+    @test xs3.next_state == xs3.state .+ (n_state * n_horizon)
+    @test xs3.action == iseven.(inds)
+    @test xs3.terminal == [any(t2[:terminal][i: i+n_horizon-1]) for i in inds]
+
+    # manual calculation
+    @test xs3.reward[1] ≈ 3 + γ * 4  # terminated at step 4
+    @test xs3.reward[2] ≈ 5 + γ * (6 + γ * 7)
+    @test xs3.reward[3] ≈ 7 + γ * (8 + γ * 9)
+end
+#! format: on