Add a helper function for sequential models. (#20)

saeta · web-flow · commit 9b3b60949914 · 2019-02-23T18:05:59.000-08:00
Many deep learning models are composed of sequential layers stacked one on
top of each other. It can be relatively tedious to write out the explicit
`applied(to:)` function because it's fairly repetitive and the underlying
intent is relatively obscured. (It can be especially bothersome because
it's the 2nd (or 3rd) time you're writing out all the layers. (The first time
is to declare all the instance variables, and the second time (if necessary)
is in the initializer.)

Fortunately, with helper functions, we can make everything both type
safe as well as convenient and easily expressible &amp; readable!

This commit adds a family of `sequenced(in:through:)` functions that take
in a context, an input, and a variable number of layers. It chains through the
output of one layer into the input of the next.

This API approach has a number of advantages:

 1. It avoids introducing new symbolic operators, which can be very confusing
    to new users.

 2. It works with today's AutoDiff implementation. (Yay!)

 3. It is very readable and clean.

 4. It avoids users "getting stuck". Concretely, if someone implemented a model
    using my previously proposed `&gt;&gt;&gt;` operator, if they wanted to add a
    residual (or skip) connection, they would have to basically re-write their
    whole model using a struct, etc. With this API structure, only "local"
    changes are required. (e.g. If only one skip-connection is required, they
    can split the sequential chain into two pieces.)

Downsides of this approach:

 1. It doesn't DRY-out the types required to define a model. (I have some
    thoughts here, but there isn't enough room in this
    margin^H^H^H^H^H^Hcommit message.)

 2. We should think hard about how things should look when we have loops.

 3. We should switch to gyb to generate the code for all the different arities.
diff --git a/Sources/DeepLearning/Layer.swift b/Sources/DeepLearning/Layer.swift
@@ -102,6 +102,80 @@ public extension Layer {
     }
 }
 
+/// Adds helpers for standard feed-forward, sequential models.
+public extension Differentiable {
+    @differentiable(wrt: (self, l1, l2))
+    func sequenced<L1: Layer, L2: Layer>(
+        in context: Context, through l1: L1, _ l2: L2)
+        -> L2.Output
+            where L1.Input == Self,
+                  L1.Output == L2.Input {
+        let o1 = l1.applied(to: self, in: context)
+        return l2.applied(to: o1, in: context)
+    }
+
+    @differentiable(wrt: (self, l1, l2, l3))
+    func sequenced<L1: Layer, L2: Layer, L3: Layer>(
+        in context: Context, through l1: L1, _ l2: L2, _ l3: L3)
+        -> L3.Output
+            where L1.Input == Self,
+                  L1.Output == L2.Input,
+                  L2.Output == L3.Input {
+        let o1 = l1.applied(to: self, in: context)
+        let o2 = l2.applied(to: o1, in: context)
+        return l3.applied(to: o2, in: context)
+    }
+
+    @differentiable(wrt: (self, l1, l2, l3, l4))
+    func sequenced<L1: Layer, L2: Layer, L3: Layer, L4: Layer>(
+        in context: Context, through l1: L1, _ l2: L2, _ l3: L3, _ l4: L4)
+        -> L4.Output
+            where L1.Input == Self,
+                  L1.Output == L2.Input,
+                  L2.Output == L3.Input,
+                  L3.Output == L4.Input {
+        let o1 = l1.applied(to: self, in: context)
+        let o2 = l2.applied(to: o1, in: context)
+        let o3 = l3.applied(to: o2, in: context)
+        return l4.applied(to: o3, in: context)
+    }
+
+    @differentiable(wrt: (self, l1, l2, l3, l4, l5))
+    func sequenced<L1: Layer, L2: Layer, L3: Layer, L4: Layer, L5: Layer>(
+        in context: Context, through l1: L1, _ l2: L2, _ l3: L3, _ l4: L4, _ l5: L5)
+        -> L5.Output
+            where L1.Input == Self,
+                  L1.Output == L2.Input,
+                  L2.Output == L3.Input,
+                  L3.Output == L4.Input,
+                  L4.Output == L5.Input {
+        let o1 = l1.applied(to: self, in: context)
+        let o2 = l2.applied(to: o1, in: context)
+        let o3 = l3.applied(to: o2, in: context)
+        let o4 = l4.applied(to: o3, in: context)
+        return l5.applied(to: o4, in: context)
+    }
+
+    @differentiable(wrt: (self, l1, l2, l3, l4, l5, l6))
+    func sequenced<L1: Layer, L2: Layer, L3: Layer, L4: Layer, L5: Layer, L6: Layer>(
+        in context: Context, through l1: L1, _ l2: L2, _ l3: L3, _ l4: L4, _ l5: L5, _ l6: L6)
+        -> L6.Output
+            where L1.Input == Self,
+                  L1.Output == L2.Input,
+                  L2.Output == L3.Input,
+                  L3.Output == L4.Input,
+                  L4.Output == L5.Input,
+                  L5.Output == L6.Input {
+        let o1 = l1.applied(to: self, in: context)
+        let o2 = l2.applied(to: o1, in: context)
+        let o3 = l3.applied(to: o2, in: context)
+        let o4 = l4.applied(to: o3, in: context)
+        let o5 = l5.applied(to: o4, in: context)
+        return l6.applied(to: o5, in: context)
+    }
+}
+
+
 /// A mutable, shareable, owning reference to a tensor.
 public final class Parameter<Scalar: TensorFlowScalar> {
     public var value: Tensor<Scalar>
diff --git a/Tests/DeepLearningTests/SequentialTests.swift b/Tests/DeepLearningTests/SequentialTests.swift
@@ -0,0 +1,47 @@
+// Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+import XCTest
+@testable import DeepLearning
+
+final class SequentialTests: XCTestCase {
+    func testSequential() {
+        struct Model: Layer {
+            var dense1 = Dense<Float>(inputSize: 2, outputSize: 4, activation: relu)
+            var dense2 = Dense<Float>(inputSize: 4, outputSize: 1, activation: relu)
+
+            @differentiable(wrt: (self, input))
+            func applied(to input: Tensor<Float>, in context: Context) -> Tensor<Float> {
+              return input.sequenced(in: context, through: dense1, dense2)
+            }
+        }
+        var model = Model()
+        let optimizer = SGD(learningRate: 0.02, modelType: type(of: model), scalarType: Float.self)
+        let x: Tensor<Float> = [[0, 0], [0, 1], [1, 0], [1, 1]]
+        let y: Tensor<Float> = [0, 1, 1, 0]
+        let context = Context(learningPhase: .training)
+        for _ in 0..<1000 {
+            let 𝛁model = model.gradient { model -> Tensor<Float> in
+                let ŷ = model.applied(to: x, in: context)
+                return meanSquaredError(predicted: ŷ, expected: y)
+            }
+            optimizer.update(&model.allDifferentiableVariables, along: 𝛁model)
+        }
+        print(model.inferring(from: [[0, 0], [0, 1], [1, 0], [1, 1]]))
+    }
+
+    static var allTests = [
+        ("testSequential", testSequential)
+    ]
+}
diff --git a/Tests/DeepLearningTests/XCTestManifests.swift b/Tests/DeepLearningTests/XCTestManifests.swift
@@ -19,6 +19,7 @@ public func allTests() -> [XCTestCaseEntry] {
     return [
         testCase(PRNGTests.allTests),
         testCase(TrivialModelTests.allTests),
+        testCase(SequentialTests.allTests),
     ]
 }
 #endif

Original file line number	Diff line number	Diff line change
`@@ -19,6 +19,7 @@ public func allTests() -> [XCTestCaseEntry] {`
`19`	`19`	`return [`
`20`	`20`	`testCase(PRNGTests.allTests),`
`21`	`21`	`testCase(TrivialModelTests.allTests),`
	`22`	`+ testCase(SequentialTests.allTests),`
`22`	`23`	`]`
`23`	`24`	`}`
`24`	`25`	`#endif`