Skip to content

Commit 473e09e

Browse files
Merge pull request #17 from eriktaubeneck/master
major refactor in choice of epsilon for all finite_difference methods
2 parents d338f4a + c737736 commit 473e09e

File tree

1 file changed

+78
-28
lines changed

1 file changed

+78
-28
lines changed

src/finite_difference.jl

Lines changed: 78 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,6 @@
22
##
33
## Graveyard of Alternative Functions
44
##
5-
## In Nodedal and Wright, epsilon was calculated as
6-
##
7-
## epsilon = sqrt(eps())
8-
##
9-
## In the examples I've tested, this strategy is best when x is small
10-
## and becomes very bad when x is larger
11-
##
125
## The proper setting of epsilon depends on whether we're doing central
136
## differencing or forward differencing. See, for example, section 5.7 of
147
## Numerical Recipes.
@@ -21,28 +14,83 @@
2114
##
2215
##############################################################################
2316

17+
macro forwardrule(x, T, e)
18+
x, T, e = esc(x), esc(T), esc(e)
19+
quote
20+
$e = sqrt(eps($T)) * max(one($T), abs($x))
21+
end
22+
end
23+
24+
macro centralrule(x, T, e)
25+
x, T, e = esc(x), esc(T), esc(e)
26+
quote
27+
$e = cbrt(eps($T)) * max(one($T), abs($x))
28+
end
29+
end
30+
31+
macro hessianrule(x, T, e)
32+
x, T, e = esc(x), esc(T), esc(e)
33+
quote
34+
$e = eps($T)^(1/4) * max(one($T), abs($x))
35+
end
36+
end
37+
38+
macro complexrule(x, T, e)
39+
x, T, e = esc(x), esc(T), esc(e)
40+
quote
41+
$e = eps($x)
42+
end
43+
end
44+
2445
function finite_difference{T <: Number}(f::Function,
2546
x::T,
2647
dtype::Symbol = :central)
2748
if dtype == :forward
28-
epsilon = sqrt(eps(T))*max(one(T),abs(x))
49+
@forwardrule x T epsilon
2950
xplusdx = x + epsilon
30-
# use machine-representable numbers
3151
return (f(xplusdx) - f(x)) / epsilon
3252
elseif dtype == :central
33-
epsilon = cbrt(eps(T))*max(one(T), abs(x))
53+
@centralrule x T epsilon
3454
xplusdx, xminusdx = x + epsilon, x - epsilon
35-
# Is it better to do 2 * epsilon or xplusdx - xminusdx?
36-
return (f(xplusdx) - f(xminusdx)) / (2 * epsilon)
55+
return (f(xplusdx) - f(xminusdx)) / (epsilon + epsilon)
3756
elseif dtype == :complex
38-
epsilon = eps(x)
57+
@complexrule x T epsilon
3958
xplusdx = x + epsilon * im
4059
return imag(f(xplusdx)) / epsilon
4160
else
4261
error("dtype must be :forward, :central or :complex")
4362
end
4463
end
4564

65+
##############################################################################
66+
##
67+
## Complex Step Finite Differentiation Tools
68+
##
69+
## Martins, Sturdza, and Alonso (2003) suggest the only non-analytic
70+
## fuction of which complex step finite difference approximation
71+
## will fail and finite difference will not is abs().
72+
## They suggest redefining as follows for z = x + im*y
73+
##
74+
## if x < 0
75+
## -x - im * y
76+
## else
77+
## x + im * y
78+
##
79+
## This is provided below as complex_differentiable_abs (renaming encouraged!)
80+
##
81+
## Also, if your fuctions has control flow using < or >, you must compare
82+
## real(z) for your control flow.
83+
##
84+
##############################################################################
85+
86+
function complex_differentiable_abs{T <: Complex}(z::T)
87+
if real(z) < 0
88+
return -real(z) - im * imag(z)
89+
else
90+
return real(z) + im * imag(z)
91+
end
92+
end
93+
4694
##############################################################################
4795
##
4896
## Gradient of f: R^n -> R
@@ -63,7 +111,7 @@ function finite_difference!{S <: Number, T <: Number}(f::Function,
63111
# Establish a baseline value of f(x).
64112
f_x = f(x)
65113
for i = 1:n
66-
epsilon = sqrt(eps(max(one(T), abs(x[i]))))
114+
@forwardrule x[i] S epsilon
67115
oldx = x[i]
68116
x[i] = oldx + epsilon
69117
f_xplusdx = f(x)
@@ -72,14 +120,14 @@ function finite_difference!{S <: Number, T <: Number}(f::Function,
72120
end
73121
elseif dtype == :central
74122
for i = 1:n
75-
epsilon = cbrt(eps(max(one(T), abs(x[i]))))
123+
@centralrule x[i] S epsilon
76124
oldx = x[i]
77125
x[i] = oldx + epsilon
78126
f_xplusdx = f(x)
79127
x[i] = oldx - epsilon
80128
f_xminusdx = f(x)
81129
x[i] = oldx
82-
g[i] = (f_xplusdx - f_xminusdx) / (2 * epsilon)
130+
g[i] = (f_xplusdx - f_xminusdx) / (epsilon + epsilon)
83131
end
84132
else
85133
error("dtype must be :forward or :central")
@@ -119,7 +167,7 @@ function finite_difference_jacobian!{R <: Number,
119167
# Iterate over each dimension of the gradient separately.
120168
if dtype == :forward
121169
for i = 1:n
122-
epsilon = sqrt(eps(max(one(T), abs(x[i]))))
170+
@forwardrule x[i] R epsilon
123171
oldx = x[i]
124172
x[i] = oldx + epsilon
125173
f_xplusdx = f(x)
@@ -128,14 +176,14 @@ function finite_difference_jacobian!{R <: Number,
128176
end
129177
elseif dtype == :central
130178
for i = 1:n
131-
epsilon = cbrt(eps(max(one(T), abs(x[i]))))
179+
@centralrule x[i] R epsilon
132180
oldx = x[i]
133181
x[i] = oldx + epsilon
134182
f_xplusdx = f(x)
135183
x[i] = oldx - epsilon
136184
f_xminusdx = f(x)
137185
x[i] = oldx
138-
J[:, i] = (f_xplusdx - f_xminusdx) / (2 * epsilon)
186+
J[:, i] = (f_xplusdx - f_xminusdx) / (epsilon + epsilon)
139187
end
140188
else
141189
error("dtype must :forward or :central")
@@ -165,8 +213,9 @@ end
165213
##
166214
##############################################################################
167215

168-
function finite_difference_hessian{T <: Number}(f::Function, x::T)
169-
epsilon = eps(max(one(T), abs(x)))^(1/4)
216+
function finite_difference_hessian{T <: Number}(f::Function,
217+
x::T)
218+
@hessianrule x T epsilon
170219
(f(x + epsilon) - 2*f(x) + f(x - epsilon))/epsilon^2
171220
end
172221
function finite_difference_hessian(f::Function,
@@ -189,21 +238,22 @@ function finite_difference_hessian!{S <: Number,
189238
# What is the dimension of x?
190239
n = length(x)
191240

241+
epsilon = NaN
192242
# TODO: Remove all these copies
193243
xpp, xpm, xmp, xmm = copy(x), copy(x), copy(x), copy(x)
194244
fx = f(x)
195245
for i = 1:n
196246
xi = x[i]
197-
epsilon = eps(max(one(T), abs(x[i])))^(1/4)
247+
@hessianrule x[i] S epsilon
198248
xpp[i], xmm[i] = xi + epsilon, xi - epsilon
199249
H[i, i] = (f(xpp) - 2*fx + f(xmm)) / epsilon^2
200-
epsiloni = cbrt(eps(max(one(T), abs(x[i]))))
250+
@centralrule x[i] S epsiloni
201251
xp = xi + epsiloni
202252
xm = xi - epsiloni
203253
xpp[i], xpm[i], xmp[i], xmm[i] = xp, xp, xm, xm
204254
for j = i+1:n
205255
xj = x[j]
206-
epsilonj = cbrt(eps(max(one(T), abs(x[j]))))
256+
@centralrule x[j] S epsilonj
207257
xp = xj + epsilonj
208258
xm = xj - epsilonj
209259
xpp[j], xpm[j], xmp[j], xmm[j] = xp, xm, xp, xm
@@ -228,10 +278,10 @@ function finite_difference_hessian{T <: Number}(f::Function,
228278
# Return the Hessian
229279
return H
230280
end
231-
function finite_difference_hessian{T}(f::Function,
232-
g::Function,
233-
x::Vector{T},
234-
dtype::Symbol = :central)
281+
function finite_difference_hessian{T <: Number}(f::Function,
282+
g::Function,
283+
x::Vector{T},
284+
dtype::Symbol = :central)
235285
finite_difference_jacobian(g, x, dtype)
236286
end
237287

0 commit comments

Comments
 (0)