Reduce explicit types required when instantiating the SGD optimizer. (#28)

saeta · web-flow · commit 3e8d86ef6aa5 · 2019-02-26T09:09:56.000-08:00
Previously, in order to instantiate the Optimizer, you had to call
`type(of: model)` and pass that into the Optimizer constructor in order to get
type inference to pick the right type for `Model`. This could be a little
confusing for new users.

This commit proposes an alternate way to write this:

```swift
let optimizer = SGD(for: model, learningRate: 0.01, scalarType: Float.self)
```

The above formulation is clear and readable. It avoids any unnecessary typing
of generic argument types.

By annotating the model parameter as `__shared`, we ensure that we don't pay
for the cost of a model copy (which could eventually be very expensive).
diff --git a/Sources/DeepLearning/Optimizer.swift b/Sources/DeepLearning/Optimizer.swift
@@ -35,13 +35,13 @@ public class Adam<Model: Layer, Scalar: TensorFlowFloatingPoint>: Optimizer
     public let decay: Scalar
 
     public init(
+        for _: __shared Model,
         learningRate: Scalar = 1e-3,
         beta1: Scalar = 0.9,
         beta2: Scalar = 0.999,
         epsilon: Scalar = 1e-8,
         decay: Scalar = 0,
-        modelType: Model.Type = Model.self,
-        scalarType: Scalar.Type = Scalar.self
+        scalarType: Scalar.Type
     ) {
         precondition(learningRate >= 0, "Learning rate must be non-negative")
         precondition(0 <= beta1 && beta1 <= 1, "Beta parameter must be between 0 and 1")
@@ -84,12 +84,12 @@ public class RMSProp<Model: Layer, Scalar: TensorFlowFloatingPoint>: Optimizer
     public let decay: Scalar
 
     public init(
+        for _: __shared Model,
         learningRate: Scalar = 0.001,
         rho: Scalar = 0.9,
         epsilon: Scalar = 1e-8,
         decay: Scalar = 0,
-        modelType: Model.Type = Model.self,
-        scalarType: Scalar.Type = Scalar.self
+        scalarType: Scalar.Type
     ) {
         precondition(learningRate >= 0, "Learning rate must be non-negative")
         precondition(rho >= 0, "Rho must be non-negative")
@@ -125,12 +125,12 @@ public class SGD<Model: Layer, Scalar: TensorFlowFloatingPoint>: Optimizer
     public let nesterov: Bool
 
     public init(
+        for _: __shared Model,
         learningRate: Scalar = 0.01,
         momentum: Scalar = 0,
         decay: Scalar = 0,
         nesterov: Bool = false,
-        modelType: Model.Type = Model.self,
-        scalarType: Scalar.Type = Scalar.self
+        scalarType: Scalar.Type
     ) {
         precondition(learningRate >= 0, "Learning rate must be non-negative")
         precondition(momentum >= 0, "Momentum must be non-negative")
@@ -171,7 +171,7 @@ public class RiemannSGD<Model: Layer, Scalar: FloatingPoint>: Optimizer
     public init(
         learningRate: Scalar,
         modelType: Model.Type = Model.self,
-        scalarType: Scalar.Type = Scalar.self
+        scalarType: Scalar.Type
     ) {
         self.learningRate = learningRate
     }
diff --git a/Tests/DeepLearningTests/SequentialTests.swift b/Tests/DeepLearningTests/SequentialTests.swift
@@ -27,7 +27,7 @@ final class SequentialTests: XCTestCase {
             }
         }
         var model = Model()
-        let optimizer = SGD(learningRate: 0.02, modelType: type(of: model), scalarType: Float.self)
+        let optimizer = SGD(for: model, learningRate: 0.02, scalarType: Float.self)
         let x: Tensor<Float> = [[0, 0], [0, 1], [1, 0], [1, 1]]
         let y: Tensor<Float> = [0, 1, 1, 0]
         let context = Context(learningPhase: .training)
diff --git a/Tests/DeepLearningTests/TrivialModelTests.swift b/Tests/DeepLearningTests/TrivialModelTests.swift
@@ -40,8 +40,8 @@ final class TrivialModelTests: XCTestCase {
                 return l2.applied(to: h1, in: context)
             }
         }
-        let optimizer = SGD<Classifier, Float>(learningRate: 0.02)
         var classifier = Classifier(hiddenSize: 4)
+        let optimizer = SGD(for: classifier, learningRate: 0.02, scalarType: Float.self)
         let x: Tensor<Float> = [[0, 0], [0, 1], [1, 0], [1, 1]]
         let y: Tensor<Float> = [[0], [1], [1], [0]]
 

Original file line number	Diff line number	Diff line change
`@@ -27,7 +27,7 @@ final class SequentialTests: XCTestCase {`
`27`	`27`	`}`
`28`	`28`	`}`
`29`	`29`	`var model = Model()`
`30`		`- let optimizer = SGD(learningRate: 0.02, modelType: type(of: model), scalarType: Float.self)`
	`30`	`+ let optimizer = SGD(for: model, learningRate: 0.02, scalarType: Float.self)`
`31`	`31`	`let x: Tensor<Float> = [[0, 0], [0, 1], [1, 0], [1, 1]]`
`32`	`32`	`let y: Tensor<Float> = [0, 1, 1, 0]`
`33`	`33`	`let context = Context(learningPhase: .training)`
Original file line number	Diff line number	Diff line change
`@@ -40,8 +40,8 @@ final class TrivialModelTests: XCTestCase {`
`40`	`40`	`return l2.applied(to: h1, in: context)`
`41`	`41`	`}`
`42`	`42`	`}`
`43`		`- let optimizer = SGD<Classifier, Float>(learningRate: 0.02)`
`44`	`43`	`var classifier = Classifier(hiddenSize: 4)`
	`44`	`+ let optimizer = SGD(for: classifier, learningRate: 0.02, scalarType: Float.self)`
`45`	`45`	`let x: Tensor<Float> = [[0, 0], [0, 1], [1, 0], [1, 1]]`
`46`	`46`	`let y: Tensor<Float> = [[0], [1], [1], [0]]`
`47`	`47`