Release 0.1.0/sparsify 2019 02 18 (#11)

stephenjfox · web-flow · commit 57d0433b9d70 · 2019-02-18T23:11:16.000-05:00
* Attempt to test for #4 PyTorch's boolean comparison crap isn't useful and makes it a pain to test exact tensor values. * Will resume later * Skipping sparsify test It's a painfully simple function that has worked every time I've used it. - No it doesn't handle every edge case + Yes, it gets the job done and can be packaged for the general case * Use instance `.nonzero()` instead of `torch.nonzero()` * Fix "type-check" in layer inspectors * WIP: Implement shrink() in terms of resize_layers() It was as easy as I wanted it to be. * The complexity is how to handle a given nested layer + Those will get implemented with a given feature - Need to program feature detection TODO: + Implement the resizing on a layer-by-layer case, to make the shrinking a bit different + Instead of applying the data transformation uniformly, each layer gets + Those factors will be computed as 1 - percent_waste(layer) * Lay out skeleton for the true shrinking algo #4 * shrink_layer() is simple * Justification for giving Shrinkage a 'input_dimensions' property: > The thought is that channel depth doesn't change the output dimensions for CNNs, and that's attribute we're concerned with in the convulotional case... * Linear layers only have two dimensions, so it's a huge deal there. * RNNs do linear things over 'timesteps', so it's a big deal there. * Residual/identity/skip-connections in CNNs need this. > __It's decided__. The attribute stays
diff --git a/check-prune-widen.ipynb b/check-prune-widen.ipynb
@@ -0,0 +1,171 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import morph"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<module 'morph.nn' from '/Users/stephen/Documents/Insight-AI/Insight-AI-Fellowship-Project/src/morph/nn/__init__.py'>"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "morph.nn"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "??morph.nn.once"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import morph.nn.shrink as ms"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from morph.testing.models import EasyMnist"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "some_linear = ms.nn.Linear(3, 2)\n",
+    "c = [c for c in some_linear.children()]\n",
+    "len(c)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "EasyMnist(\n",
+       "  (linear1): Linear(in_features=784, out_features=1000, bias=True)\n",
+       "  (linear2): Linear(in_features=1000, out_features=30, bias=True)\n",
+       "  (linear3): Linear(in_features=30, out_features=10, bias=True)\n",
+       ")"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "EasyMnist()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Module(\n",
+       "  (linear1): Linear(in_features=784, out_features=700, bias=True)\n",
+       "  (linear2): Linear(in_features=700, out_features=21, bias=True)\n",
+       "  (linear3): Linear(in_features=21, out_features=10, bias=True)\n",
+       ")"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "ms.prune(EasyMnist())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.2"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {},
+   "toc_section_display": true,
+   "toc_window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/morph/layers/sparse.py b/morph/layers/sparse.py
@@ -28,7 +28,7 @@ def percent_waste(layer: nn.Module) -> float:
       weight matrix/tensor to determine how many neurons can be spared
     """
     w = layer.weight
-    non_sparse_w = torch.nonzero(sparsify(w))
+    non_sparse_w = sparsify(w).nonzero()
     non_zero_count = non_sparse_w.numel() // len(non_sparse_w[0])
 
     percent_size = non_zero_count / w.numel()
diff --git a/morph/layers/sparse_test.py b/morph/layers/sparse_test.py
@@ -0,0 +1,16 @@
+from unittest import main as test_main, TestCase, skip
+
+from .sparse import sparsify, torch
+
+class TestSparseFunctions(TestCase):
+    
+    @skip("Skipping value-wise comparison until better solution than iterating all tensor values")
+    def test_sparsify_selected_indices_should_have_sub_threshold_values(self):
+        test_threshold = 0.1
+        test_tensor = torch.randn(3, 2)
+        expected = torch.where(test_tensor > test_threshold, test_tensor, torch.zeros(3, 2))
+        self.assertEqual(expected, sparsify(test_tensor, test_threshold))
+
+
+if __name__ == "__main__":
+    test_main()
diff --git a/morph/nn/resizing.py b/morph/nn/resizing.py
@@ -0,0 +1,6 @@
+from collections import namedtuple
+
+Resizing = namedtuple('Resizing', ['input_size', 'output_size'], defaults=[0, 0])
+Resizing.__doc__ += ': Baseclass for a type that encapsulates a resized layer'
+Resizing.input_size.__doc__ = "The layer's \"new\" input dimension size (Linear -> in_features, Conv2d -> in_channels)"
+Resizing.output_size.__doc__ = "The layer's \"new\" output dimension size (Linear -> out_features, Conv2d -> out_channels)"
diff --git a/morph/nn/shrink.py b/morph/nn/shrink.py
@@ -1,20 +1,103 @@
 from morph.layers.sparse import percent_waste
-from morph._utils import check, round
-from morph.nn.utils import in_dim, out_dim
+from morph.utils import check, round
+from .resizing import Resizing
+from .utils import in_dim, out_dim, group_layers_by_algo
+from .widen import resize_layers
+from ._types import type_name
+
+from typing import List
 
 import torch.nn as nn
 
 
-def calc_reduced_size(layer: nn.Module) -> (int, int):
-    """Calculates the reduced size of the layer, post training (initial or morphed re-training)
-    so the layers can be resized.
+class Shrinkage:
+    """
+    An intermediary for the "Shrink" step of the three step Morphing algorithm.
+    Rather than have all of the state be free in the small scope of a mega-function,
+    these abstractions ease the way of implementing the shrinking and prune of the
+    network.
+    * Given that we have access to the total count of nodes, and how wasteful a layer was
+      we can deduce any necessary changes once given a new input dimension
+    * We expect input dimensions to change to accomodate the trimmed down earlier layers,
+      but we want an expansion further along to allow the opening of bottlenecks in the architecture
+    """
+
+    def __init__(self, input_dimension: int, initial_parameters: int,
+                 waste_percentage: float):
+        self.input_dimension = input_dimension  # TODO: is this relevant in any non-Linear case?
+        self.initial_parameters = initial_parameters
+        self.waste_percentage = waste_percentage
+        self.reduced_parameters = Shrinkage.reduce_parameters(initial_parameters,
+                                                              waste_percentage)
+
+    @staticmethod
+    def reduce_parameters(initial_parameters: int, waste: float) -> int:
+        """Calculates the new, smaller, number of paratemers that this instance encapsulates"""
+        percent_keep = (1. - waste)
+        unrounded_params_to_keep = percent_keep * initial_parameters
+        # round digital up to the nearest integer
+        return round(unrounded_params_to_keep)
+
+
+def shrink_to_resize(shrinkage: Shrinkage, new_input_dimension: int) -> Resizing:
+    """Given the `new_input_dimension`, calculate a reshaping/resizing for the parameters
+    of the supplied `shrinkage`.
+    We round up the new output dimension, generously allowing for opening bottlenecks.
+    Iteratively, any waste introduced is pruned hereafter. (Needs proof/unit test)
     """
-    # TODO: remove this guard when properly we protect access to this function
-    check(
-        type(layer) == nn.Conv2d or type(layer) == nn.Linear,
-        'Invalid layer type: ' + type(layer))
+    new_output_dimension = round(shrinkage.reduced_parameters / new_input_dimension)
+    return Resizing(new_input_dimension, new_output_dimension)
+
+
+#################### prove of a good implementation ####################
+
+
+def uniform_prune(net: nn.Module) -> nn.Module:
+    """Shrink the network down 70%. Input and output dimensions are not altered"""
+    return resize_layers(net, width_factor=0.7)
+
+
+#################### the algorithm to end all algorithms ####################
+
+
+def shrink_layer(layer: nn.Module) -> Shrinkage:
+    waste = percent_waste(layer)
+    parameter_count = layer.weight.numel()  # the count is already tracked for us
+    return Shrinkage(in_dim(layer), parameter_count, waste)
+
+
+def fit_layer_sizes(layer_sizes: List[Shrinkage]) -> List[Resizing]:
+    # TODO: where's the invocation site for shrink_to_resize
+    pass
+
+
+def transform(original_layer: nn.Module, new_shape: Resizing) -> nn.Module:
+    # TODO: this might just be utils.redo_layer, without the primitive obsession
+    pass
+
+
+def shrink_prune_fit(net: nn.Module) -> nn.Module:
+    first, middle_layers, last = group_layers_by_algo(net)
+    shrunk = {
+        "first": shrink_layer(first),
+        "middle": [shrink_layer(m) for m in middle_layers],
+        "last": shrink_layer(last)
+    }
+
+    # FIXME: why doesn't the linter like `fitted_layers`
+    fitted_layers = fit_layer_sizes([shrunk["first"], *shrunk["middle"], shrunk["last"]])
+
+    # iteration very similar to `resize_layers` but matches Shrinkage with the corresponding layer
+    new_first, new_middle_layers, new_last = group_layers_by_algo(fitted_layers)
+
+    new_net = nn.Module()
+
+    new_net.add_module(type_name(first), transform(first, new_first))
+
+    for old, new in zip(middle_layers, new_middle_layers):
+        new_net.add_module(type_name(old), transform(old, new))
+        pass  # append to new_net with the Shrinkage's properties
 
-    percent_keep = 1 - percent_waste(layer)
-    shrunk_in, shrunk_out = percent_keep * in_dim(layer), percent_keep * out_dim(layer)
+    new_net.add_module(type_name(last), transform(last, new_last))
 
-    return round(shrunk_in), round(shrunk_out)
+    return new_net
diff --git a/morph/nn/utils.py b/morph/nn/utils.py
@@ -47,22 +47,26 @@ def make_children_list(children_or_named_children):
 
 
 def in_dim(layer: nn.Module) -> int:
-    check(type_supported(layer))
+    """Returns the input dimension of a given (supported) `layer`"""
+    layer_name = type_name(layer)
+    check(type_supported(layer_name))
 
-    if layer_is_linear(layer):
+    if layer_is_linear(layer_name):
         return layer.in_features
-    elif layer_is_conv2d(layer):
+    elif layer_is_conv2d(layer_name):
         return layer.in_channels
     else:
         raise RuntimeError('Inspecting on unsupported layer')
 
 
 def out_dim(layer: nn.Module) -> int:
-    check(type_supported(layer))
+    """Returns the output dimension of a given (supported) `layer`"""
+    layer_name = type_name(layer)
+    check(type_supported(layer_name))
 
-    if layer_is_linear(layer):
+    if layer_is_linear(layer_name):
         return layer.out_features
-    elif layer_is_conv2d(layer):
+    elif layer_is_conv2d(layer_name):
         return layer.out_channels
     else:
         raise RuntimeError('Inspecting on unsupported layer')
diff --git a/morph/nn/widen.py b/morph/nn/widen.py