@@ -594,7 +594,7 @@ function apply!(o::InvDecay, x, Δ)
594594end
595595
596596"""
597- ExpDecay(η = 0.001, decay = 0.1, decay_step = 1000, clip = 1e-4)
597+ ExpDecay(η = 0.001, decay = 0.1, decay_step = 1000, clip = 1e-4, start = 1 )
598598
599599Discount the learning rate `η` by the factor `decay` every `decay_step` steps till
600600a minimum of `clip`.
@@ -606,6 +606,7 @@ a minimum of `clip`.
606606- `decay_step`: Schedule decay operations by setting the number of steps between
607607 two decay operations.
608608- `clip`: Minimum value of learning rate.
609+ - 'start': Step at which the decay starts.
609610
610611
611612See also the [Scheduling Optimisers](@ref) section of the docs
@@ -624,16 +625,17 @@ mutable struct ExpDecay <: AbstractOptimiser
624625 decay:: Float64
625626 step:: Int64
626627 clip:: Float64
628+ start:: Int64
627629 current:: IdDict
628630end
629631
630- ExpDecay (opt = 0.001 , decay = 0.1 , decay_step = 1000 , clip = 1e-4 ) =
631- ExpDecay (opt, decay, decay_step, clip, IdDict ())
632+ ExpDecay (opt = 0.001 , decay = 0.1 , decay_step = 1000 , clip = 1e-4 , start = 0 ) =
633+ ExpDecay (opt, decay, decay_step, clip, start, IdDict ())
632634
633635function apply! (o:: ExpDecay , x, Δ)
634- η, s, decay = o. eta, o. step, o. decay
636+ η, s, decay, start = o. eta, o. step, o. decay, o . start
635637 n = o. current[x] = get (o. current, x, 0 ) + 1
636- if o . current[x] % s == 0 && count (x -> x% s == 0 , values (o. current)) == 1
638+ if n > start && n % s == 0 && count (x -> x > start && x % s == 0 , values (o. current)) == 1
637639 η = max (η * decay, o. clip)
638640 o. eta = η
639641 end
0 commit comments