add Twist

ayasyrev · ayasyrev · commit 84d767354ce5 · 2020-05-09T18:16:33.000+03:00
diff --git a/model_constructor/_nbdev.py b/model_constructor/_nbdev.py
@@ -33,6 +33,12 @@
          "NewResBlock": "81_Net.ipynb",
          "net34": "04_Net.ipynb",
          "net50": "04_Net.ipynb",
+         "nn": "05_Twist.ipynb",
+         "F": "05_Twist.ipynb",
+         "ConvTwist": "05_Twist.ipynb",
+         "ConvLayerTwist": "05_Twist.ipynb",
+         "NewResBlockTwist": "05_Twist.ipynb",
+         "ResBlockTwist": "05_Twist.ipynb",
          "NewConvLayer": "81_Net.ipynb",
          "me": "81_Net.ipynb"}
 
@@ -41,6 +47,7 @@
            "resnet.py",
            "xresnet.py",
            "net.py",
+           "twist.py",
            "tst_net_2.py"]
 
 doc_url = "https://ayasyrev.github.io/model_constructor/"
diff --git a/model_constructor/layers.py b/model_constructor/layers.py
@@ -15,14 +15,15 @@
 
 class ConvLayer(nn.Sequential):
     """Basic conv layers block"""
+    Conv2d = nn.Conv2d
     def __init__(self, ni, nf, ks=3, stride=1,
             act=True,  act_fn=act_fn,
             bn_layer=True, bn_1st=True, zero_bn=False,
             padding=None, bias=False, groups=1, **kwargs):
 
 #         self.act = act
         if padding==None: padding = ks//2
-        layers = [('conv', nn.Conv2d(ni, nf, ks, stride=stride, padding=padding, bias=bias, groups=groups))]
+        layers = [('conv', self.Conv2d(ni, nf, ks, stride=stride, padding=padding, bias=bias, groups=groups))]
         act_bn = [('act_fn', act_fn)] if act else []
         if bn_layer:
             bn = nn.BatchNorm2d(nf)
diff --git a/model_constructor/twist.py b/model_constructor/twist.py
@@ -0,0 +1,180 @@
+# AUTOGENERATED! DO NOT EDIT! File to edit: nbs/05_Twist.ipynb (unless otherwise specified).
+
+__all__ = ['nn', 'F', 'ConvTwist', 'ConvLayerTwist', 'NewResBlockTwist', 'ResBlockTwist']
+
+# Cell
+from functools import partial
+from collections import OrderedDict
+from .layers import *
+from .net import *
+
+# Cell
+import sys, torch
+nn = torch.nn
+F = torch.nn.functional
+
+# Cell
+class ConvTwist(nn.Module):
+    '''Replacement for Conv2d (kernelsize 3x3)'''
+    def __init__(self, ni, nf,
+                 ks=3, stride=1, padding=1, bias=False,
+                 groups=1, iters=1, init_max=0.7, twist = False, permute=True):
+#         super(ConvTwist, self).__init__()
+        super().__init__()
+        self.twist = twist
+        self.permute = permute
+        self.same = ni==nf and stride==1
+        if not (ni%groups==0 and nf%groups==0): groups = 1
+        # elif ni%64==0: groups = ni//8
+        self.conv = nn.Conv2d(ni, nf, kernel_size=3, stride=stride, padding=padding, bias=bias, groups=groups)
+        if self.twist:
+            # self.conv_x = nn.Conv2d(ni, nf, kernel_size=3, stride=stride, padding=1, bias=False, groups=groups)
+            # self.conv_y = nn.Conv2d(ni, nf, kernel_size=3, stride=stride, padding=1, bias=False, groups=groups)
+            std = self.conv.weight.std().item()
+            self.coeff_Ax = nn.Parameter(torch.empty((nf,ni//groups)).normal_(0, std), requires_grad=True)
+            self.coeff_Ay = nn.Parameter(torch.empty((nf,ni//groups)).normal_(0, std), requires_grad=True)
+            # self.coeff_Bx = nn.Parameter(torch.zeros((nf,ni)).normal_(0, std), requires_grad=True)
+            # self.coeff_By = nn.Parameter(torch.zeros((nf,ni)).normal_(0, std), requires_grad=True)
+            # self.center_x = nn.Parameter(torch.Tensor(nf), requires_grad=True)
+            # self.center_y = nn.Parameter(torch.Tensor(nf), requires_grad=True)
+            # self.center_x.data.uniform_(-init_max, init_max)
+            # self.center_y.data.uniform_(-init_max, init_max)
+        self.iters = iters
+        self.stride = stride
+        self.groups = groups
+        self.DD = self.derivatives()
+
+    def derivatives(self):
+        I = torch.Tensor([[0,0,0],[0,1,0],[0,0,0]]).view(1,1,3,3)
+        D_x = torch.Tensor([[-1,0,1],[-2,0,2],[-1,0,1]]).view(1,1,3,3) / 10
+        D_y = torch.Tensor([[1,2,1],[0,0,0],[-1,-2,-1]]).view(1,1,3,3) / 10
+        def convolution(K1, K2):
+            return F.conv2d(K1, K2.flip(2).flip(3), padding=2)
+        D_xx = convolution(I+D_x, I+D_x).view(5,5)
+        D_yy = convolution(I+D_y, I+D_y).view(5,5)
+        D_xy = convolution(I+D_x, I+D_y).view(5,5)
+        return {'x': D_x, 'y': D_y, 'xx': D_xx, 'yy': D_yy, 'xy': D_xy}
+
+    # def init_coeff(self):
+    #     self.coeff_Bx.data = self.coeff_Ay
+    #     self.coeff_By.data = -self.coeff_Ax
+
+    def kernel(self, coeff_x, coeff_y):
+        D_x = torch.Tensor([[-1,0,1],[-2,0,2],[-1,0,1]]).to(coeff_x.device)
+        D_y = torch.Tensor([[1,2,1],[0,0,0],[-1,-2,-1]]).to(coeff_x.device)
+        return coeff_x[:,:,None,None] * D_x + coeff_y[:,:,None,None] * D_y
+
+    def full_kernel(self, kernel): # permuting the groups
+        if self.groups==1: return kernel
+        n = self.groups
+        a,b,_,_ = kernel.size()
+        a = a//n
+        KK = torch.zeros((a*n,b*n,3,3)).to(kernel.device)
+        # KK[:a,-b:] = kernel[:a]
+        for i in range(n):
+            if i%4==0:
+                KK[a*i:a*(i+1),b*(i+3):b*(i+4)] = kernel[a*i:a*(i+1)]
+            else:
+                KK[a*i:a*(i+1),b*(i-1):b*i] = kernel[a*i:a*(i+1)]
+        return KK
+
+    def _conv(self, inpt, kernel=None):
+#         permute = True
+        if kernel is None:
+            kernel = self.conv.weight
+        if not self.permute:
+            return F.conv2d(inpt, kernel, padding=1, stride=self.stride, groups=self.groups)
+        else:
+            return F.conv2d(inpt, self.full_kernel(kernel), padding=1, stride=self.stride, groups=1)
+
+    def symmetrize(self, conv_wt):
+        # conv_wt.data = (conv_wt - conv_wt.flip(2).flip(3)) / 2
+        if self.same:
+            n = conv_wt.size()[1]
+            for i in range(self.groups):
+                conv_wt.data[n*i:n*(i+1)] = (conv_wt[n*i:n*(i+1)] + torch.transpose(conv_wt[n*i:n*(i+1)],0,1)) / 2
+
+    def forward(self, inpt):
+        # self.symmetrize(self.conv.weight)
+        out = self.conv(inpt)
+        if self.twist is False:
+            return out
+        _,_,h,w = out.size()
+        XX = torch.from_numpy(np.indices((1,1,h,w))[3]*2/w-1).type(out.dtype).to(out.device)
+        YY = torch.from_numpy(np.indices((1,1,h,w))[2]*2/h-1).type(out.dtype).to(out.device)
+        # self.symmetrize(self.conv_x.weight)
+        # self.symmetrize(self.conv_y.weight)
+        # kernel_x = self.conv_x.weight
+        # kernel_y = self.conv_y.weight
+        # self.symmetrize(self.coeff_Ax)
+        # self.symmetrize(self.coeff_Ay)
+        kernel_x = self.kernel(self.coeff_Ax, self.coeff_Ay)
+        self.symmetrize(kernel_x)
+        # self.symmetrize(kernel_y)
+        kernel_y = kernel_x.transpose(2,3).flip(3)  # make conv_y a 90 degree rotation of conv_x
+        # kernel_y = self.kernel(self.coeff_Bx, self.coeff_By)
+        out = out + XX * self._conv(inpt, kernel_x) + YY * self._conv(inpt, kernel_y)
+        # out = out + (XX-self.center_x.view(-1,1,1)) * self.conv_x(inpt) + (YY-self.center_y.view(-1,1,1)) * self.conv_y(inpt)
+        if self.same and self.iters>1:
+            out = inpt + out / self.iters
+            for _ in range(self.iters-1):
+                out = out + (self._conv(out) + XX * self._conv(out, kernel_x) + YY * self._conv(out, kernel_y)) / self.iters
+            out = out - inpt
+        return out
+
+    def extra_repr(self):
+        return f"twist: {self.twist}, permute: {self.permute}, same: {self.same}"
+
+# Cell
+class ConvLayerTwist(ConvLayer): # replace Conv2d by Twist
+    Conv2d = ConvTwist
+
+# Cell
+class NewResBlockTwist(nn.Module):
+    def __init__(self, expansion, ni, nh, stride=1,
+                 conv_layer=ConvLayer, act_fn=act_fn, bn_1st=True,
+                 pool=nn.AvgPool2d(2, ceil_mode=True), sa=False, sym=False, zero_bn=True):
+        super().__init__()
+        nf,ni = nh*expansion,ni*expansion
+#         conv_layer = ConvLayerTwist
+        self.reduce = noop if stride==1 else pool
+        layers  = [(f"conv_0", conv_layer(ni, nh, 3, act_fn=act_fn, bn_1st=bn_1st)),
+                   (f"conv_1", conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, bn_1st=bn_1st))
+        ] if expansion == 1 else [
+                   (f"conv_0", conv_layer(ni, nh, 1, act_fn=act_fn, bn_1st=bn_1st)),
+#                    (f"conv_1", conv_layer(nh, nh, 3, act_fn=act_fn, bn_1st=bn_1st)),
+                   (f"conv_1_twist", ConvLayerTwist(nh, nh, 3, act_fn=act_fn, bn_1st=bn_1st)),
+                   (f"conv_2", conv_layer(nh, nf, 1, zero_bn=zero_bn, act=False, bn_1st=bn_1st))
+        ]
+        if sa: layers.append(('sa', SimpleSelfAttention(nf,ks=1,sym=sym)))
+        self.convs = nn.Sequential(OrderedDict(layers))
+        self.idconv = noop if ni==nf else conv_layer(ni, nf, 1, act=False, bn_1st=bn_1st)
+        self.merge = act_fn
+
+    def forward(self, x):
+        o = self.reduce(x)
+        return self.merge(self.convs(o) + self.idconv(o))
+
+# Cell
+class ResBlockTwist(nn.Module):
+    def __init__(self, expansion, ni, nh, stride=1,
+                 conv_layer=ConvLayer, act_fn=act_fn, zero_bn=True, bn_1st=True,
+                 pool=nn.AvgPool2d(2, ceil_mode=True), sa=False,sym=False):
+        super().__init__()
+        nf,ni = nh*expansion,ni*expansion
+#         conv_layer = ConvLayerTwist
+        layers  = [(f"conv_0", conv_layer(ni, nh, 3, stride=stride, act_fn=act_fn, bn_1st=bn_1st)),
+                   (f"conv_1", conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, bn_1st=bn_1st))
+        ] if expansion == 1 else [
+                   (f"conv_0",conv_layer(ni, nh, 1, act_fn=act_fn, bn_1st=bn_1st)),
+#                    (f"conv_1",conv_layer(nh, nh, 3, stride=stride, act_fn=act_fn, bn_1st=bn_1st)),
+                   (f"conv_1_twist",ConvLayerTwist(nh, nh, 3, stride=stride, act_fn=act_fn, bn_1st=bn_1st)),
+                   (f"conv_2",conv_layer(nh, nf, 1, zero_bn=zero_bn, act=False, bn_1st=bn_1st))
+        ]
+        if sa: layers.append(('sa', SimpleSelfAttention(nf,ks=1,sym=sym)))
+        self.convs = nn.Sequential(OrderedDict(layers))
+        self.pool = noop if stride==1 else pool
+        self.idconv = noop if ni==nf else conv_layer(ni, nf, 1, act=False)
+        self.act_fn =act_fn
+
+    def forward(self, x): return self.act_fn(self.convs(x) + self.idconv(self.pool(x)))
diff --git a/nbs/01_layers.ipynb b/nbs/01_layers.ipynb
@@ -60,14 +60,15 @@
     "\n",
     "class ConvLayer(nn.Sequential):\n",
     "    \"\"\"Basic conv layers block\"\"\"\n",
+    "    Conv2d = nn.Conv2d\n",
     "    def __init__(self, ni, nf, ks=3, stride=1, \n",
     "            act=True,  act_fn=act_fn, \n",
     "            bn_layer=True, bn_1st=True, zero_bn=False, \n",
     "            padding=None, bias=False, groups=1, **kwargs):\n",
     "\n",
     "#         self.act = act\n",
     "        if padding==None: padding = ks//2  \n",
-    "        layers = [('conv', nn.Conv2d(ni, nf, ks, stride=stride, padding=padding, bias=bias, groups=groups))]\n",
+    "        layers = [('conv', self.Conv2d(ni, nf, ks, stride=stride, padding=padding, bias=bias, groups=groups))]\n",
     "        act_bn = [('act_fn', act_fn)] if act else []\n",
     "        if bn_layer:\n",
     "            bn = nn.BatchNorm2d(nf)\n",
@@ -1904,7 +1905,10 @@
       "Converted 01_layers.ipynb.\n",
       "Converted 02_resnet.ipynb.\n",
       "Converted 03_xresnet.ipynb.\n",
-      "Converted 80_test_layers.ipynb.\n",
+      "Converted 04_Net.ipynb.\n",
+      "Converted 05_Twist.ipynb.\n",
+      "Converted 80_test_net.ipynb.\n",
+      "Converted 81_Net.ipynb.\n",
       "Converted 81_test_xresnet.ipynb.\n",
       "Converted index.ipynb.\n"
      ]
diff --git a/nbs/05_Twist.ipynb b/nbs/05_Twist.ipynb