Release 0.1.0/resize layers should be more clear (#7)

stephenjfox · web-flow · commit b085293971fb · 2019-02-16T21:34:10.000-05:00
* Use more utilities, new abstraction * Add layer inspectors * Add errors and utils * Calculate a new layer's size Rather than having to reinstatiate layers, or some sub-optimal traversal of a neural architectures' nn.Modules, just do the math for 'in-size' and 'out-size' for a given layer to make the math easier. * Clarify demo.py * Trim down resize_layers in nn.widen module * Complete resize_layers for #2 Need a 'fit_layers', for when shrinking/pruning clips out too many neuron connections
diff --git a/demo.py b/demo.py
@@ -9,14 +9,17 @@
 from morph._models import EasyMnist
 
 
+def random_dataset():
+    return TensorDataset(torch.randn(2, 28, 28))
+
 def main():
     my_model = EasyMnist()
     # do one pass through the algorithm
     modified = morph.once(my_model)
 
     print(modified) # take a peek at the new layers. You take it from here
 
-    my_dataloader = DataLoader(TensorDataset(torch.randn(2, 28, 28)))
+    my_dataloader = DataLoader(random_dataset)
 
     # get back the class that will do work
     morphed = net.Morph(my_model, epochs=5, dataloader=my_dataloader)
diff --git a/morph/_error.py b/morph/_error.py
@@ -0,0 +1,6 @@
+class ValidationError(Exception):
+    """Custom error that represents a validation issue, according to internal
+    system rules
+    """
+    def __init__(self, msg):
+        super(ValidationError, self).__init__(msg)
diff --git a/morph/_utils.py b/morph/_utils.py
@@ -0,0 +1,12 @@
+from ._error import ValidationError
+
+
+def check(pred: bool, message='Validation failed'):
+    if not pred: raise ValidationError(message)
+
+
+def round(value: float) -> int:
+    """Rounds a `value` up to the next integer if possible.
+    Performs differently from the standard Python `round`
+    """
+    return int(value + .5)
diff --git a/morph/layers/widen.py b/morph/layers/widen.py
@@ -3,11 +3,11 @@
 import torch
 import torch.nn as nn
 
-from ..nn.utils import layer_has_bias
+from ..nn.utils import layer_has_bias, redo_layer
+from .._utils import check, round
 
 
 # NOTE: should factor be {smaller, default at all}?
-# TODO: Research - is there a better type for layer than nn.Module?
 def widen(layer: nn.Module, factor=1.4, in_place=False) -> nn.Module:
     """
     Args:
@@ -23,23 +23,18 @@ def widen(layer: nn.Module, factor=1.4, in_place=False) -> nn.Module:
     Returns:
         A new layer of the base type (e.g. nn.Linear) or `None` if in_place=True
     """
-    if factor < 1.0:
-        raise ValueError('Cannot shrink with the widen() function')
-    if factor == 1.0:
-        raise ValueError("You shouldn't waste compute time if you're not changing anything")
+    check(factor > 1.0, "Your call to widen() should be increasing the size of your layers")
     # we know that layer.weight.size()[0] is the __output__ dimension in the linear case
     output_dim = 0
     if isinstance(layer, nn.Linear):
         output_dim = layer.weight.size()[0]  # FIXME: switch to layer.out_features?
         input_dim = layer.weight.size()[1]  # FIXME: switch to layer.in_features?
-    # TODO: other classes, for robustness?
-    # TODO: Use dictionary look-ups instead, because they're faster?
     else:
         raise ValueError('unsupported layer type:', type(layer))
 
     logging.debug(f"current dimensions: {(output_dim, input_dim)}")
 
-    new_size = round(factor * output_dim + .5)  # round up, not down, if we can
+    new_size = round(factor * output_dim)  # round up, not down, if we can
 
     # We're increasing layer width from output_dim to new_size, so let's save that for later
     size_diff = new_size - output_dim
@@ -56,20 +51,26 @@ def widen(layer: nn.Module, factor=1.4, in_place=False) -> nn.Module:
 
         # TODO: cleanup duplication? Missing properties that will effect usability?
         if in_place:
-            layer.out_features = new_size
-            layer.weight = p_weights
-            layer.bias = p_bias
-            logging.warning(
-                'Using experimental "in-place" version. May have unexpected affects on activation.'
-            )
+            write_layer_properties(layer, new_size, p_weights, p_bias)
             return layer
         else:
-            print(f"New shape = {expanded_weights.shape}")
-            l = nn.Linear(*expanded_weights.shape[::-1], bias=utils.layer_has_bias(layer))
-            l.weight = p_weights
-            l.bias = p_bias
+            logging.debug(f"New shape = {expanded_weights.shape}")
+            new_input, new_output = expanded_weights[1], expanded_weights[0]
+            l = redo_layer(layer, new_in=new_input, new_out=new_output)
+            write_layer_properties(layer, new_size=None, new_weights=p_weights, new_bias=p_bias)
+
             return l
 
+def write_layer_properties(layer, new_size, new_weights, new_bias):
+    """Assigns properties to this `layer`, making the changes on a model in-line
+    """
+    if new_size: layer.out_features = new_size
+    if new_weights: layer.weight = new_weights
+    if new_bias: layer.bias = new_bias
+    logging.warning(
+        'Using experimental "in-place" version. May have unexpected affects on activation.'
+    )
+
 
 def _expand_bias_or_weight(t: nn.Parameter, increase: int) -> torch.Tensor:
     """Returns a tensor of shape `t`, with padding values drawn from a Guassian distribution
diff --git a/morph/nn/shrink.py b/morph/nn/shrink.py
@@ -0,0 +1,20 @@
+from morph.layers.sparse import percent_waste
+from morph._utils import check, round
+from morph.nn.utils import in_dim, out_dim
+
+import torch.nn as nn
+
+
+def calc_reduced_size(layer: nn.Module) -> (int, int):
+    """Calculates the reduced size of the layer, post training (initial or morphed re-training)
+    so the layers can be resized.
+    """
+    # TODO: remove this guard when properly we protect access to this function
+    check(
+        type(layer) == nn.Conv2d or type(layer) == nn.Linear,
+        'Invalid layer type: ' + type(layer))
+
+    percent_keep = 1 - percent_waste(layer)
+    shrunk_in, shrunk_out = percent_keep * in_dim(layer), percent_keep * out_dim(layer)
+
+    return round(shrunk_in), round(shrunk_out)
diff --git a/morph/nn/utils.py b/morph/nn/utils.py
@@ -1,6 +1,7 @@
 import torch.nn as nn
 
 from morph.nn._types import type_name, type_supported
+from morph._utils import check
 
 from typing import List, Tuple, TypeVar
 
@@ -12,15 +13,14 @@
 def group_layers_by_algo(children_list: CL) -> ML:
     """Group the layers into how they will be acted upon by my implementation of the algorithm:
     1. First child in the list (the "input" layer)
-    2. Slice of all the child, those that are not first nor last
+    2. Slice of all the children, those that are not first nor last
     3. Last child in the list (the "output" layer)
     """
 
     list_len = len(children_list)
 
     # validate input in case I slip up
-    if list_len < 1:
-        raise ValueError('Invalid argument:', children_list)
+    check(list_len > 1, 'Your children_list must be more than a singleton')
 
     if list_len <= 2:
         return children_list  # interface?
@@ -43,6 +43,31 @@ def make_children_list(children_or_named_children):
     return [c for c in children_or_named_children]
 
 
+#################### LAYER INSPECTION ####################
+
+
+def in_dim(layer: nn.Module) -> int:
+    check(type_supported(layer))
+
+    if layer_is_linear(layer):
+        return layer.in_features
+    elif layer_is_conv2d(layer):
+        return layer.in_channels
+    else:
+        raise RuntimeError('Inspecting on unsupported layer')
+
+
+def out_dim(layer: nn.Module) -> int:
+    check(type_supported(layer))
+
+    if layer_is_linear(layer):
+        return layer.out_features
+    elif layer_is_conv2d(layer):
+        return layer.out_channels
+    else:
+        raise RuntimeError('Inspecting on unsupported layer')
+
+
 #################### NEW LAYERS ####################
 
 
diff --git a/morph/nn/widen.py b/morph/nn/widen.py
@@ -1,46 +1,42 @@
 import torch.nn as nn
 
-# TODO: nope. This is really long
-from morph.nn.utils import group_layers_by_algo, layer_is_conv2d, make_children_list, new_input_layer, new_output_layer, redo_layer, type_name, type_supported
+import logging
 
+from morph.nn.utils import group_layers_by_algo, make_children_list, out_dim, redo_layer
+from morph._utils import round
+from morph.nn._types import type_name, type_supported
 
-# TODO: refactor out width_factor
-def resize_layers(net: nn.Module):
+
+def resize_layers(net: nn.Module, width_factor: float = 1.4) -> nn.Module:
+    """Perform a uniform layer widening, which increases the output dimension for
+    fully-connected layers and the number of filters for convolutional layers.
+    """
 
     old_layers = make_children_list(net.named_children())
     (first_name, first_layer), middle, last = group_layers_by_algo(old_layers)
 
     first_layer_output_size = first_layer.out_channels  # count of the last layer's out features
 
-    new_out_next_in = int(first_layer_output_size * 1.4)
+    new_out_next_in = round(first_layer_output_size * width_factor)
 
     # NOTE: is there a better way to do this part? Maybe nn.Sequential?
     network = nn.Module()  # new network
 
-    network.add_module(
-        first_name,
-        new_input_layer(first_layer, type_name(first_layer), out_dim=new_out_next_in))
+    network.add_module(first_name, redo_layer(first_layer, new_out=new_out_next_in))
 
-    # TODO: format and utilize the functions in utils for making layers
     for name, child in middle:
-        # otherwise, we want to
-        _t = type_name(child)
-        if type_supported(_t):
+        if type_supported(type_name(child)):
 
-            new_out = 0
-            # TODO: look up performance on type name access. Maybe this could just be layer_is_conv2d(child)
-            if layer_is_conv2d(_t):
-                new_out = int(child.out_channels * 1.4)
-            else:  # type_name == 'Linear'
-                new_out = int(child.out_features * 1.4)
+            new_out = round(out_dim(child) * width_factor)
 
             new_layer = redo_layer(child, new_in=new_out_next_in, new_out=new_out)
             new_out_next_in = new_out
             network.add_module(name, new_layer)
+        else:
+            logging.warning(f"Encountered a non-resizable layer: {type(child)}")
+            network.add_module(name, child)
 
     last_name, last_layer = last
-    network.add_module(
-        last_name,
-        new_output_layer(last_layer, type_name(last_layer), in_dim=new_out_next_in))
+    network.add_module(last_name, redo_layer(last_layer, new_in=new_out_next_in))
 
     return network