1+ program quadratic_fit
2+ ! This program fits a quadratic function using a small neural network using
3+ ! stochastic gradient descent, batch gradient descent, and minibatch gradient
4+ ! descent.
5+ use nf, only: dense, input, network
6+
7+ implicit none
8+ type (network) :: net_sgd, net_batch_sgd, net_minibatch_sgd
9+
10+ ! Training parameters
11+ integer , parameter :: num_epochs = 1000
12+ integer , parameter :: train_size = 1000
13+ integer , parameter :: test_size = 30
14+ integer , parameter :: batch_size = 10
15+ real , parameter :: learning_rate = 0.01
16+
17+ ! Input and output data
18+ real , allocatable :: x(:), y(:) ! training data
19+ real , allocatable :: xtest(:), ytest(:) ! testing data
20+ real , allocatable :: ypred_sgd(:), ypred_batch_sgd(:), ypred_minibatch_sgd(:)
21+
22+ integer :: i, n
23+
24+ print ' ("Fitting quadratic function")'
25+ print ' (60("="))'
26+
27+ allocate (xtest(test_size), ytest(test_size))
28+ xtest = [((i - 1 ) * 2 / test_size, i = 1 , test_size)]
29+ ytest = quadratic(xtest)
30+
31+ ! x and y as 1-D arrays
32+ allocate (x(train_size), y(train_size))
33+
34+ ! Generating the dataset
35+ do i = 1 , train_size
36+ call random_number (x(i))
37+ x(i) = x(i) * 2
38+ end do
39+ y = quadratic(x)
40+
41+ ! Instantiate a separate network for each optimization method.
42+ net_sgd = network([input(1 ), dense(3 ), dense(1 )])
43+ net_batch_sgd = network([input(1 ), dense(3 ), dense(1 )])
44+ net_minibatch_sgd = network([input(1 ), dense(3 ), dense(1 )])
45+
46+ ! Print network info to stdout; this will be the same for all three networks.
47+ call net_sgd % print_info()
48+
49+ ! SGD optimizer
50+ call sgd_optimizer(net_sgd, x, y, learning_rate, num_epochs)
51+
52+ ! Batch SGD optimizer
53+ call batch_gd_optimizer(net_batch_sgd, x, y, learning_rate, num_epochs)
54+
55+ ! Mini-batch SGD optimizer
56+ call minibatch_gd_optimizer(net_minibatch_sgd, x, y, learning_rate, num_epochs, batch_size)
57+
58+ ! Calculate predictions on the test set
59+ ypred_sgd = [(net_sgd % predict([xtest(i)]), i = 1 , test_size)]
60+ ypred_batch_sgd = [(net_batch_sgd % predict([xtest(i)]), i = 1 , test_size)]
61+ ypred_minibatch_sgd = [(net_minibatch_sgd % predict([xtest(i)]), i = 1 , test_size)]
62+
63+ ! Print the mean squared error
64+ print ' ("Stochastic gradient descent MSE:", f9.6)' , sum ((ypred_sgd - ytest)** 2 ) / size (ytest)
65+ print ' (" Batch gradient descent MSE: ", f9.6)' , sum ((ypred_batch_sgd - ytest)** 2 ) / size (ytest)
66+ print ' (" Minibatch gradient descent MSE: ", f9.6)' , sum ((ypred_minibatch_sgd - ytest)** 2 ) / size (ytest)
67+
68+ contains
69+
70+ real elemental function quadratic(x) result(y)
71+ ! Quadratic function
72+ real , intent (in ) :: x
73+ y = (x** 2 / 2 + x / 2 + 1 ) / 2
74+ end function quadratic
75+
76+ subroutine sgd_optimizer (net , x , y , learning_rate , num_epochs )
77+ ! In the stochastic gradient descent (SGD) optimizer, we run the forward
78+ ! and backward passes and update the weights for each training sample,
79+ ! one at a time.
80+ type (network), intent (inout ) :: net
81+ real , intent (in ) :: x(:), y(:)
82+ real , intent (in ) :: learning_rate
83+ integer , intent (in ) :: num_epochs
84+ integer :: i, n
85+
86+ print * , " Running SGD optimizer..."
87+
88+ do n = 1 , num_epochs
89+ do i = 1 , size (x)
90+ call net % forward([x(i)])
91+ call net % backward([y(i)])
92+ call net % update(learning_rate)
93+ end do
94+ end do
95+
96+ end subroutine sgd_optimizer
97+
98+ subroutine batch_gd_optimizer (net , x , y , learning_rate , num_epochs )
99+ ! Like the stochastic gradient descent (SGD) optimizer, except that here we
100+ ! accumulate the weight gradients for all training samples and update the
101+ ! weights once per epoch.
102+ type (network), intent (inout ) :: net
103+ real , intent (in ) :: x(:), y(:)
104+ real , intent (in ) :: learning_rate
105+ integer , intent (in ) :: num_epochs
106+ integer :: i, n
107+
108+ print * , " Running batch GD optimizer..."
109+
110+ do n = 1 , num_epochs
111+ do i = 1 , size (x)
112+ call net % forward([x(i)])
113+ call net % backward([y(i)])
114+ end do
115+ call net % update(learning_rate / size (x))
116+ end do
117+
118+ end subroutine batch_gd_optimizer
119+
120+ subroutine minibatch_gd_optimizer (net , x , y , learning_rate , num_epochs , batch_size )
121+ ! Like the batch SGD optimizer, except that here we accumulate the weight
122+ ! over a number of mini batches and update the weights once per mini batch.
123+ !
124+ ! Note: -O3 on GFortran must be accompanied with -fno-frontend-optimize for
125+ ! this subroutine to converge to a solution.
126+ type (network), intent (inout ) :: net
127+ real , intent (in ) :: x(:), y(:)
128+ real , intent (in ) :: learning_rate
129+ integer , intent (in ) :: num_epochs, batch_size
130+ integer :: i, j, n, num_samples, num_batches, start_index, end_index
131+ real , allocatable :: batch_x(:), batch_y(:)
132+ integer , allocatable :: batch_indices(:)
133+
134+ print * , " Running mini-batch GD optimizer..."
135+
136+ num_samples = size (x)
137+ num_batches = num_samples / batch_size
138+
139+ ! Generate shuffled indices for the mini-batches
140+ allocate (batch_x(batch_size), batch_y(batch_size))
141+ allocate (batch_indices(num_batches))
142+
143+ do j = 1 , num_batches
144+ batch_indices(j) = (j - 1 ) * batch_size + 1
145+ end do
146+
147+ call shuffle(batch_indices)
148+
149+ do n = 1 , num_epochs
150+ do j = 1 , num_batches
151+ start_index = batch_indices(j)
152+ end_index = min (start_index + batch_size - 1 , num_samples)
153+
154+ do i = start_index, end_index
155+ call net % forward([x(i)])
156+ call net % backward([y(i)])
157+ end do
158+
159+ call net % update(learning_rate / batch_size)
160+ end do
161+ end do
162+ end subroutine minibatch_gd_optimizer
163+
164+ subroutine shuffle (arr )
165+ ! Shuffle an array using the Fisher-Yates algorithm.
166+ integer , intent (inout ) :: arr(:)
167+ real :: a
168+ integer :: i, temp
169+
170+ do i = size (arr), 2 , - 1
171+ call random_number (a)
172+ a = floor (a * real (i)) + 1
173+ temp = arr(i)
174+ arr(i) = arr(int (a))
175+ arr(int (a)) = temp
176+ end do
177+ end subroutine shuffle
178+
179+ end program quadratic_fit
0 commit comments