Prototyping RNN layer based on Dense

castelao · castelao · commit 754f2fa0b9b0 · 2024-06-23T12:44:44.000-06:00
The dimensions don't match, but let's start with something that compile.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -59,6 +59,8 @@ add_library(neural-fortran
   src/nf/nf_random.f90
   src/nf/nf_reshape_layer.f90
   src/nf/nf_reshape_layer_submodule.f90
+  src/nf/nf_rnn_layer.f90
+  src/nf/nf_rnn_layer_submodule.f90
   src/nf/io/nf_io_binary.f90
   src/nf/io/nf_io_binary_submodule.f90
   src/nf/io/nf_io_hdf5.f90
diff --git a/src/nf/nf_rnn_layer.f90 b/src/nf/nf_rnn_layer.f90
@@ -0,0 +1,131 @@
+module nf_rnn_layer
+
+  !! This module provides the concrete dense layer type.
+  !! It is used internally by the layer type.
+  !! It is not intended to be used directly by the user.
+
+  use nf_activation, only: activation_function
+  use nf_base_layer, only: base_layer
+
+  implicit none
+
+  private
+  public :: rnn_layer
+
+  type, extends(base_layer) :: rnn_layer
+
+    !! Concrete implementation of a dense (fully-connected) layer type
+
+    integer :: input_size
+    integer :: output_size
+
+    real, allocatable :: weights(:,:)
+    real, allocatable :: recurrent(:,:)
+    real, allocatable :: biases(:)
+    real, allocatable :: state(:)
+    real, allocatable :: z(:) ! matmul(x, w) + b
+    real, allocatable :: output(:) ! activation(z)
+    real, allocatable :: gradient(:) ! matmul(w, db)
+    real, allocatable :: dw(:,:) ! weight gradients
+    real, allocatable :: db(:) ! bias gradients
+
+    class(activation_function), allocatable :: activation
+
+  contains
+
+    !procedure :: backward
+    !procedure :: forward
+    !procedure :: get_gradients
+    procedure :: get_num_params
+    !procedure :: get_params
+    procedure :: init
+    !procedure :: set_params
+
+  end type rnn_layer
+
+  interface rnn_layer
+    elemental module function rnn_layer_cons(output_size, activation) &
+      result(res)
+      !! This function returns the `dense_layer` instance.
+      integer, intent(in) :: output_size
+        !! Number of neurons in this layer
+      class(activation_function), intent(in) :: activation
+        !! Instance of the activation_function to use;
+        !! See nf_activation.f90 for available functions.
+      type(rnn_layer) :: res
+        !! dense_layer instance
+    end function rnn_layer_cons
+  end interface rnn_layer
+
+  interface
+
+    pure module subroutine backward(self, input, gradient)
+      !! Apply the backward gradient descent pass.
+      !! Only weight and bias gradients are updated in this subroutine,
+      !! while the weights and biases themselves are untouched.
+      class(rnn_layer), intent(in out) :: self
+        !! Dense layer instance
+      real, intent(in) :: input(:)
+        !! Input from the previous layer
+      real, intent(in) :: gradient(:)
+        !! Gradient from the next layer
+    end subroutine backward
+
+    pure module subroutine forward(self, input)
+      !! Propagate forward the layer.
+      !! Calling this subroutine updates the values of a few data components
+      !! of `dense_layer` that are needed for the backward pass.
+      class(rnn_layer), intent(in out) :: self
+        !! Dense layer instance
+      real, intent(in) :: input(:)
+        !! Input from the previous layer
+    end subroutine forward
+
+    pure module function get_num_params(self) result(num_params)
+       !! Return the number of parameters in this layer.
+       class(rnn_layer), intent(in) :: self
+         !! Dense layer instance
+       integer :: num_params
+         !! Number of parameters in this layer
+    end function get_num_params
+
+    pure module function get_params(self) result(params)
+      !! Return the parameters (weights and biases) of this layer.
+      !! The parameters are ordered as weights first, biases second.
+      class(rnn_layer), intent(in) :: self
+        !! Dense layer instance
+      real, allocatable :: params(:)
+        !! Parameters of this layer
+    end function get_params
+
+    pure module function get_gradients(self) result(gradients)
+      !! Return the gradients of this layer.
+      !! The gradients are ordered as weights first, biases second.
+      class(rnn_layer), intent(in) :: self
+        !! Dense layer instance
+      real, allocatable :: gradients(:)
+        !! Gradients of this layer
+    end function get_gradients
+
+    module subroutine set_params(self, params)
+      !! Set the parameters of this layer.
+      !! The parameters are ordered as weights first, biases second.
+      class(rnn_layer), intent(in out) :: self
+        !! Dense layer instance
+      real, intent(in) :: params(:)
+        !! Parameters of this layer
+    end subroutine set_params
+
+    module subroutine init(self, input_shape)
+      !! Initialize the layer data structures.
+      !!
+      !! This is a deferred procedure from the `base_layer` abstract type.
+      class(rnn_layer), intent(in out) :: self
+        !! Dense layer instance
+      integer, intent(in) :: input_shape(:)
+        !! Shape of the input layer
+    end subroutine init
+
+  end interface
+
+end module nf_rnn_layer
diff --git a/src/nf/nf_rnn_layer_submodule.f90 b/src/nf/nf_rnn_layer_submodule.f90
@@ -0,0 +1,143 @@
+submodule(nf_rnn_layer) nf_rnn_layer_submodule
+
+  use nf_activation, only: activation_function
+  use nf_base_layer, only: base_layer
+  use nf_random, only: random_normal
+
+  implicit none
+
+contains
+
+  elemental module function rnn_layer_cons(output_size, activation) &
+    result(res)
+    integer, intent(in) :: output_size
+    class(activation_function), intent(in) :: activation
+    type(rnn_layer) :: res
+
+    res % output_size = output_size
+    res % activation_name = activation % get_name()
+    allocate( res % activation, source = activation )
+
+  end function rnn_layer_cons
+
+
+  pure module subroutine backward(self, input, gradient)
+    class(rnn_layer), intent(in out) :: self
+    real, intent(in) :: input(:)
+    real, intent(in) :: gradient(:)
+    real :: db(self % output_size)
+    real :: dw(self % input_size, self % output_size)
+
+    db = gradient * self % activation % eval_prime(self % z)
+    dw = matmul(reshape(input, [size(input), 1]), reshape(db, [1, size(db)]))
+    self % gradient = matmul(self % weights, db)
+    self % dw = self % dw + dw
+    self % db = self % db + db
+
+  end subroutine backward
+
+
+  pure module subroutine forward(self, input)
+    class(rnn_layer), intent(in out) :: self
+    real, intent(in) :: input(:)
+
+    self % z = matmul(input, self % weights) + self % biases
+    self % output = self % activation % eval(self % z)
+
+  end subroutine forward
+
+
+  pure module function get_num_params(self) result(num_params)
+    class(rnn_layer), intent(in) :: self
+    integer :: num_params
+
+    ! Number of weigths times number of biases
+    num_params = self % input_size * self % output_size + self % output_size
+
+  end function get_num_params
+
+
+  pure module function get_params(self) result(params)
+    class(rnn_layer), intent(in) :: self
+    real, allocatable :: params(:)
+
+    params = [ &
+      pack(self % weights, .true.), &
+      pack(self % biases, .true.) &
+    ]
+
+  end function get_params
+
+
+  pure module function get_gradients(self) result(gradients)
+    class(rnn_layer), intent(in) :: self
+    real, allocatable :: gradients(:)
+
+    gradients = [ &
+      pack(self % dw, .true.), &
+      pack(self % db, .true.) &
+    ]
+
+  end function get_gradients
+
+
+  module subroutine set_params(self, params)
+    class(rnn_layer), intent(in out) :: self
+    real, intent(in) :: params(:)
+
+    ! check if the number of parameters is correct
+    if (size(params) /= self % get_num_params()) then
+      error stop 'Error: number of parameters does not match'
+    end if
+
+    ! reshape the weights
+    self % weights = reshape( &
+      params(:self % input_size * self % output_size), &
+      [self % input_size, self % output_size] &
+    )
+
+    ! reshape the biases
+    self % biases = reshape( &
+      params(self % input_size * self % output_size + 1:), &
+      [self % output_size] &
+    )
+
+  end subroutine set_params
+
+
+  module subroutine init(self, input_shape)
+    class(rnn_layer), intent(in out) :: self
+    integer, intent(in) :: input_shape(:)
+
+    self % input_size = input_shape(1)
+
+    ! Weights are a 2-d array of shape previous layer size
+    ! times this layer size.
+    allocate(self % weights(self % input_size, self % output_size))
+    call random_normal(self % weights)
+    self % weights = self % weights / self % input_size
+
+    ! Broadcast weights to all other images, if any.
+    call co_broadcast(self % weights, 1)
+
+    allocate(self % biases(self % output_size))
+    self % biases = 0
+
+    allocate(self % output(self % output_size))
+    self % output = 0
+
+    allocate(self % z(self % output_size))
+    self % z = 0
+
+    allocate(self % dw(self % input_size, self % output_size))
+    self % dw = 0
+
+    allocate(self % db(self % output_size))
+    self % db = 0
+
+    allocate(self % gradient(self % output_size))
+    self % gradient = 0
+
+  end subroutine init
+
+end submodule nf_rnn_layer_submodule