Merge pull request #9 from EIDOSLAB/v1.1.4

AndreaBrg · web-flow · commit 16702e54e435 · 2022-11-11T14:34:31.000+01:00
V1.1.4
diff --git a/requirements.txt b/requirements.txt
@@ -1,3 +1,3 @@
 setuptools==45.2.0
-torch==1.11.0
-torchvision==0.12.0
+torch==1.12.0
+torchvision==0.13.0
diff --git a/setup.py b/setup.py
@@ -5,7 +5,7 @@
 HERE = pathlib.Path(__file__).parent
 README = (HERE / "README.md").read_text()
 
-__version__ = "1.1.3"
+__version__ = "1.1.4"
 
 setup(
     name='torch-simplify',
diff --git a/simplify/layers.py b/simplify/layers.py
@@ -5,6 +5,29 @@
 import torch.nn as nn
 
 
+class LinearExpand(nn.Linear):
+    @staticmethod
+    def from_linear(module: nn.Linear, idxs: torch.Tensor, bias):
+        module.__class__ = LinearExpand
+
+        module.register_parameter('bf', torch.nn.Parameter(bias.clone()))
+        module.bf[idxs] = 0
+
+        module.register_buffer('idxs', idxs.to(module.weight.device))
+        module.register_buffer('zeros', torch.zeros(bias.shape, dtype=bias.dtype, device=module.weight.device))
+
+        setattr(module, 'idxs_cache', module.idxs)
+        setattr(module, 'zero_cache', module.zeros)
+
+        return module
+
+    def forward(self, x):
+        x = super().forward(x)
+
+        expanded = torch.scatter(self.zeros, 0, self.idxs, x)
+        return expanded + self.bf
+
+
 class ConvB(nn.Conv2d):
     @staticmethod
     def from_conv(module: nn.Conv2d, bias):
@@ -98,4 +121,5 @@ def forward(self, x):
         return expanded + self.bf[:, None, None].expand_as(expanded)
 
     def __repr__(self):
-        return f'BatchNormExpand({self.num_features}, eps={self.eps}, momentum={self.momentum}, affine={self.affine}, track_running_stats={self.track_running_stats})'
+        return f'BatchNormExpand({self.num_features}, eps={self.eps}, momentum={self.momentum}, ' \
+               f'affine={self.affine}, track_running_stats={self.track_running_stats})'
diff --git a/simplify/propagate.py b/simplify/propagate.py
@@ -18,7 +18,8 @@ def propagate_bias(model: nn.Module, x: torch.Tensor, pinned_out: List) -> nn.Mo
     Args:
         model (nn.Module):
         x (torch.Tensor): `model`'s input of shape [1, C, N, M], same as the model usual input.
-        pinned_out (List): List of `nn.Modules` which output needs to remain of the original shape (e.g. layers related to a residual connection with a sum operation).
+        pinned_out (List): List of `nn.Modules` which output needs to remain of the original shape
+        (e.g. layers related to a residual connection with a sum operation).
 
     Returns:
         nn.Module: Model with propagated bias.
@@ -36,7 +37,7 @@ def __remove_nan(module, input):
         return input
 
     @torch.no_grad()
-    def __propagate_biases_hook(module, input, output):
+    def __propagate_biases_hook(module, input, output, name=None):
         """
         PyTorch hook used to propagate the biases of pruned neurons to following non-pruned layers.
         """
@@ -47,7 +48,14 @@ def __propagate_biases_hook(module, input, output):
 
         bias_feature_maps = output[0].clone()
 
-        if isinstance(module, nn.Conv2d):
+        if isinstance(module, nn.Linear):
+            # TODO: handle missing bias
+            # For a linear layer, we can just update the scalar bias values
+            # if getattr(module, 'bias', None) is not None:
+            #    module.bias.data = bias_feature_maps
+            module.register_parameter('bias', nn.Parameter(bias_feature_maps))
+
+        elif isinstance(module, nn.Conv2d):
             # For a conv layer, we remove the scalar biases
             # and use bias matrices (ConvB)
             if bias_feature_maps.abs().sum() != 0.:
@@ -107,13 +115,6 @@ def __propagate_biases_hook(module, input, output):
         # if getattr(module, 'bias', None) is not None and module.bias.abs().sum() == 0:
         #     module.register_parameter('bias', None)
 
-        elif isinstance(module, nn.Linear):
-            # TODO: handle missing bias
-            # For a linear layer, we can just update the scalar bias values
-            # if getattr(module, 'bias', None) is not None:
-            #    module.bias.data = bias_feature_maps
-            module.register_parameter('bias', nn.Parameter(bias_feature_maps))
-
         else:
             error('Unsupported module type:', module)
 
@@ -136,8 +137,7 @@ def __propagate_biases_hook(module, input, output):
                 module.bias.data.mul_(~pruned_channels)
 
         elif isinstance(module, nn.Conv2d):
-            output[~pruned_channels[None, :, None,
-                    None].expand_as(output)] *= float('nan')
+            output[~pruned_channels[None, :, None, None].expand_as(output)] *= float('nan')
             if isinstance(module, (ConvB, ConvExpand)):
                 if getattr(module, 'bf', None) is not None:
                     module.bf.data.mul_(~pruned_channels[:, None, None])
@@ -146,8 +146,7 @@ def __propagate_biases_hook(module, input, output):
                     module.bias.data.mul_(~pruned_channels)
 
         if isinstance(module, nn.BatchNorm2d):
-            output[~pruned_channels[None, :, None,
-                    None].expand_as(output)] *= float('nan')
+            output[~pruned_channels[None, :, None, None].expand_as(output)] *= float('nan')
             if isinstance(module, (BatchNormB, BatchNormExpand)):
                 module.bf.data.mul_(~pruned_channels)
             else:
@@ -164,7 +163,7 @@ def __propagate_biases_hook(module, input, output):
         if isinstance(module, (nn.Conv2d, nn.Linear, nn.BatchNorm2d)):
             handle = module.register_forward_pre_hook(__remove_nan)
             handles.append(handle)
-            handle = module.register_forward_hook(lambda m, i, o: __propagate_biases_hook(m, i, o))
+            handle = module.register_forward_hook(lambda m, i, o, n=name: __propagate_biases_hook(m, i, o, n))
             handles.append(handle)
 
     # Propagate biases
diff --git a/simplify/remove.py b/simplify/remove.py
@@ -6,7 +6,7 @@
 import torch
 import torch.nn as nn
 
-from .layers import BatchNormB, ConvExpand, BatchNormExpand
+from .layers import BatchNormB, ConvExpand, BatchNormExpand, LinearExpand
 
 
 @torch.no_grad()
@@ -37,16 +37,16 @@ def __remove_zeroed_channels_hook(module, input, output, name):
         nonzero_idx = ~(input.view(input.shape[0], -1).sum(dim=1) == 0)
         # print('input:', input.shape)
 
-        if isinstance(module, nn.Conv2d):
+        if isinstance(module, nn.Linear):
+            module.weight = nn.Parameter(module.weight[:, nonzero_idx])
+            module.in_features = module.weight.shape[1]
+
+        elif isinstance(module, nn.Conv2d):
             if module.groups == 1:
                 module.weight = nn.Parameter(module.weight[:, nonzero_idx])
                 module.in_channels = module.weight.shape[1]
             # TODO: handle when groups > 1 (if possible)
 
-        elif isinstance(module, nn.Linear):
-            module.weight = nn.Parameter(module.weight[:, nonzero_idx])
-            module.in_features = module.weight.shape[1]
-
         elif isinstance(module, nn.BatchNorm2d):
             module.weight.data.mul_(nonzero_idx)
             module.running_mean.data.mul_(nonzero_idx)
@@ -104,24 +104,27 @@ def __remove_zeroed_channels_hook(module, input, output, name):
             module.running_mean = module.running_mean[nonzero_idx]
             module.running_var = module.running_var[nonzero_idx]
 
-        # 3. If it is a pinned layer, convert it into ConvExpand or BatchNormExpand
+        # 3. If it is a pinned layer, convert it into LinearExpand, ConvExpand or BatchNormExpand
         if name in pinned_out:
             idxs = torch.where(nonzero_idx)[0]
 
+            if isinstance(module, nn.Linear):
+                module = LinearExpand.from_linear(module, idxs, module.bias)
+
             # Keep bias (bf) full size
-            if isinstance(module, nn.Conv2d):
+            elif isinstance(module, nn.Conv2d):
                 module_bf = getattr(module, 'bf', None)
                 if module_bf is None:
                     module_bf = torch.zeros_like(output[0])
 
                 module = ConvExpand.from_conv(module, idxs, module_bf)
 
-            if isinstance(module, BatchNormB):
-                module = BatchNormExpand.from_bn(module, idxs, module.bf, output.shape)
-
             elif isinstance(module, nn.BatchNorm2d):
-                module = BatchNormExpand.from_bn(module, idxs, module.bias, output.shape)
-                module.register_parameter("bias", None)
+                bias = module.bf if isinstance(module, BatchNormB) else module.bias
+                module = BatchNormExpand.from_bn(module, idxs, bias, output.shape)
+
+                if not isinstance(module, BatchNormB):
+                    module.register_parameter("bias", None)
         else:
             if getattr(module, 'bf', None) is not None:
                 module.bf = nn.Parameter(module.bf[nonzero_idx])
diff --git a/simplify/utils.py b/simplify/utils.py
@@ -94,7 +94,8 @@ def get_previous_layer(connections: Dict, module: fx.Node) -> fx.Node:
 
 def get_pinned(model: torch.nn.Module) -> List[str]:
     """
-    Try to find all the modules for which the output shape needs to stay fixed, (e.g. modules involved in residual connections with a sum).
+    Try to find all the modules for which the output shape needs to stay fixed,
+    (e.g. modules involved in residual connections with a sum).
 
     Args:
         model (torch.nn.Module): The model on which to perform the research.
diff --git a/test/modules/fuse.py b/test/modules/fuse.py
@@ -26,5 +26,6 @@ def test_arch(arch, x):
         x = im / 255.
 
         for architecture in models:
+            print(f"Testing with {architecture.__name__}")
             with self.subTest(arch=architecture):
                 self.assertTrue(test_arch(architecture, x))
diff --git a/test/modules/propagate.py b/test/modules/propagate.py
@@ -13,6 +13,8 @@ class Test(unittest.TestCase):
     def test(self):
         @torch.no_grad()
         def test_arch(arch, x, fuse_bn):
+            print(f"Fuse: {fuse_bn}")
+
             model = get_model(architecture, arch)
 
             if fuse_bn:
@@ -31,6 +33,8 @@ def test_arch(arch, x, fuse_bn):
         x = im / 255.
 
         for architecture in models:
+            print(f"Testing with {architecture.__name__}")
+
             with self.subTest(arch=architecture, fuse_bn=True):
                 self.assertTrue(test_arch(architecture, x, fuse_bn=True))
 
diff --git a/test/modules/remove.py b/test/modules/remove.py
@@ -13,6 +13,8 @@ class Test(unittest.TestCase):
     def test(self):
         @torch.no_grad()
         def test_arch(arch, x, fuse_bn):
+            print(f"Fuse: {fuse_bn}")
+
             model = get_model(architecture, arch)
 
             if fuse_bn:
@@ -33,6 +35,8 @@ def test_arch(arch, x, fuse_bn):
         x = im / 255.
 
         for architecture in models:
+            print(f"Testing with {architecture.__name__}")
+
             with self.subTest(arch=architecture, fuse_bn=True):
                 self.assertTrue(test_arch(architecture, x, fuse_bn=True))
 
diff --git a/test/utils.py b/test/utils.py
@@ -13,17 +13,17 @@
 
 models = [
     alexnet,
-    vgg11, vgg11_bn, vgg13, vgg13_bn, vgg16, vgg16_bn, vgg19, vgg19_bn,
-    resnet18, resnet34, resnet50, resnet101, resnet152,
-    squeezenet1_0, squeezenet1_1,
-    densenet121, densenet161, densenet169, densenet201,
+    vgg11, vgg11_bn,
+    resnet18, resnet50,
+    squeezenet1_0,
+    densenet121,
     inception_v3,
     googlenet,
-    shufflenet_v2_x0_5, shufflenet_v2_x1_0, shufflenet_v2_x1_5, shufflenet_v2_x2_0,
-    mobilenet_v2, mobilenet_v3_small, mobilenet_v3_large,
-    resnext50_32x4d, resnext101_32x8d,
-    wide_resnet50_2, wide_resnet101_2,
-    mnasnet0_5, mnasnet0_75, mnasnet1_0, mnasnet1_3,
+    shufflenet_v2_x0_5,
+    mobilenet_v2, mobilenet_v3_small,
+    resnext50_32x4d,
+    wide_resnet50_2,
+    mnasnet0_5, mnasnet1_0,
     densenet121
 ]
 
@@ -41,6 +41,8 @@ def get_model(architecture, arch):
         if isinstance(model, SqueezeNet) and 'classifier.1' in name:
             continue
 
-        if isinstance(module, nn.Conv2d):
+        if isinstance(module, (nn.Conv2d, nn.BatchNorm2d)):
             prune.random_structured(module, 'weight', amount=0.8, dim=0)
             prune.remove(module, 'weight')
+
+    return model