div_groups

ayasyrev · ayasyrev · commit c077b8dac9b5 · 2021-08-26T13:22:37.000Z
diff --git a/Nbs/04_YaResNet.ipynb b/Nbs/04_YaResNet.ipynb
@@ -11,7 +11,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 1,
    "source": [
     "#hide\n",
     "# from nbdev.showdoc import *\n",
@@ -36,7 +36,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 2,
    "source": [
     "#hide\n",
     "from model_constructor.yaresnet import YaResBlock"
@@ -46,7 +46,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 3,
    "source": [
     "#collapse_output\n",
     "bl = YaResBlock(1,64,64,sa=True)\n",
@@ -77,14 +77,14 @@
       ]
      },
      "metadata": {},
-     "execution_count": 2
+     "execution_count": 3
     }
    ],
    "metadata": {}
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 4,
    "source": [
     "#hide\n",
     "bs_test = 16\n",
@@ -106,7 +106,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 5,
    "source": [
     "#collapse_output\n",
     "bl = YaResBlock(1,64,64,se=True)\n",
@@ -131,9 +131,9 @@
        "    (se): SEBlock(\n",
        "      (squeeze): AdaptiveAvgPool2d(output_size=1)\n",
        "      (excitation): Sequential(\n",
-       "        (fc_reduce): Linear(in_features=64, out_features=4, bias=False)\n",
+       "        (fc_reduce): Linear(in_features=64, out_features=4, bias=True)\n",
        "        (se_act): ReLU(inplace=True)\n",
-       "        (fc_expand): Linear(in_features=4, out_features=64, bias=False)\n",
+       "        (fc_expand): Linear(in_features=4, out_features=64, bias=True)\n",
        "        (sigmoid): Sigmoid()\n",
        "      )\n",
        "    )\n",
@@ -143,14 +143,14 @@
       ]
      },
      "metadata": {},
-     "execution_count": 7
+     "execution_count": 5
     }
    ],
    "metadata": {}
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 6,
    "source": [
     "#hide\n",
     "bs_test = 16\n",
@@ -172,7 +172,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 7,
    "source": [
     "#collapse_output\n",
     "bl = YaResBlock(4,64,128,stride=2,act_fn=nn.LeakyReLU(), bn_1st=False)\n",
@@ -210,6 +210,73 @@
       ]
      },
      "metadata": {},
+     "execution_count": 7
+    }
+   ],
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "source": [
+    "#hide\n",
+    "bs_test = 16\n",
+    "xb = torch.randn(bs_test, 256, 32, 32)\n",
+    "y = bl(xb)\n",
+    "print(y.shape)\n",
+    "assert y.shape == torch.Size([bs_test, 512, 16, 16]), f\"size\""
+   ],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "torch.Size([16, 512, 16, 16])\n"
+     ]
+    }
+   ],
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "source": [
+    "#collapse_output\n",
+    "bl = YaResBlock(4,64,128,stride=2,act_fn=nn.LeakyReLU(), bn_1st=False, groups=4)\n",
+    "bl"
+   ],
+   "outputs": [
+    {
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "YaResBlock(\n",
+       "  (reduce): AvgPool2d(kernel_size=2, stride=2, padding=0)\n",
+       "  (convs): Sequential(\n",
+       "    (conv_0): ConvLayer(\n",
+       "      (conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+       "      (act_fn): LeakyReLU(negative_slope=0.01)\n",
+       "      (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "    )\n",
+       "    (conv_1): ConvLayer(\n",
+       "      (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=4, bias=False)\n",
+       "      (act_fn): LeakyReLU(negative_slope=0.01)\n",
+       "      (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "    )\n",
+       "    (conv_2): ConvLayer(\n",
+       "      (conv): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+       "      (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "    )\n",
+       "  )\n",
+       "  (idconv): ConvLayer(\n",
+       "    (conv): Conv2d(256, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+       "    (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "  )\n",
+       "  (merge): LeakyReLU(negative_slope=0.01)\n",
+       ")"
+      ]
+     },
+     "metadata": {},
      "execution_count": 9
     }
    ],
@@ -242,7 +309,7 @@
    "execution_count": 11,
    "source": [
     "#collapse_output\n",
-    "bl = YaResBlock(4,64,128,stride=2,act_fn=nn.LeakyReLU(), bn_1st=False, groups=4)\n",
+    "bl = YaResBlock(4,64,128,stride=2,act_fn=nn.LeakyReLU(), bn_1st=False, div_groups=4)\n",
     "bl"
    ],
    "outputs": [
@@ -259,7 +326,7 @@
        "      (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
        "    )\n",
        "    (conv_1): ConvLayer(\n",
-       "      (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=4, bias=False)\n",
+       "      (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)\n",
        "      (act_fn): LeakyReLU(negative_slope=0.01)\n",
        "      (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
        "    )\n",
diff --git a/model_constructor/net.py b/model_constructor/net.py
@@ -29,11 +29,11 @@ class ResBlock(nn.Module):
     def __init__(self, expansion, ni, nh, stride=1,
                  conv_layer=ConvLayer, act_fn=act_fn, zero_bn=True, bn_1st=True,
                  pool=nn.AvgPool2d(2, ceil_mode=True), sa=False, sym=False, se=False, se_reduction=16,
-                 groups=1, dw=False):
+                 groups=1, dw=False, div_groups=None):
         super().__init__()
         nf, ni = nh * expansion, ni * expansion
-        # if groups != 1:
-        #     groups = int(nh / groups)
+        if div_groups is not None:  # check if grops != 1 and div_groups
+            groups = int(nh / div_groups)
         if expansion == 1:
             layers = [("conv_0", conv_layer(ni, nh, 3, stride=stride, act_fn=act_fn, bn_1st=bn_1st,
                                             groups=nh if dw else groups)),
@@ -68,11 +68,11 @@ class NewResBlock(nn.Module):
     def __init__(self, expansion, ni, nh, stride=1,
                  conv_layer=ConvLayer, act_fn=act_fn, zero_bn=True, bn_1st=True,
                  pool=nn.AvgPool2d(2, ceil_mode=True), sa=False, sym=False, se=False, se_reduction=16,
-                 groups=1, dw=False):
+                 groups=1, dw=False, div_groups=None):
         super().__init__()
         nf, ni = nh * expansion, ni * expansion
-        # if groups != 1:
-        #     groups = int(nh / groups)
+        if div_groups is not None:  # check if grops != 1 and div_groups
+            groups = int(nh / div_groups)
         self.reduce = noop if stride == 1 else pool
         if expansion == 1:
             layers = [("conv_0", conv_layer(ni, nh, 3, stride=1, act_fn=act_fn, bn_1st=bn_1st,
@@ -114,7 +114,7 @@ def _make_layer(self, expansion, ni, nf, blocks, stride, sa):
     layers = [(f"bl_{i}", self.block(expansion, ni if i == 0 else nf, nf,
                                      stride if i == 0 else 1, sa=sa if i == blocks - 1 else False,
                                      conv_layer=self.conv_layer, act_fn=self.act_fn, pool=self.pool,
-                                     zero_bn=self.zero_bn, bn_1st=self.bn_1st, groups=self.groups,
+                                     zero_bn=self.zero_bn, bn_1st=self.bn_1st, groups=self.groups, div_groups=self.div_groups,
                                      dw=self.dw, se=self.se))
               for i in range(blocks)]
     return nn.Sequential(OrderedDict(layers))
@@ -144,7 +144,7 @@ def __init__(self, name='Net', c_in=3, c_out=1000,
                  norm=nn.BatchNorm2d,
                  act_fn=nn.ReLU(inplace=True),
                  pool=nn.AvgPool2d(2, ceil_mode=True),
-                 expansion=1, groups=1, dw=False,
+                 expansion=1, groups=1, dw=False, div_groups=None,
                  sa=False, se=False, se_reduction=16,
                  bn_1st=True,
                  zero_bn=True,
@@ -195,7 +195,7 @@ def __call__(self):
     def __repr__(self):
         return (f"{self.name} constructor\n"
                 f"  c_in: {self.c_in}, c_out: {self.c_out}\n"
-                f"  expansion: {self.expansion}, groups: {self.groups}, dw: {self.dw}\n"
+                f"  expansion: {self.expansion}, groups: {self.groups}, dw: {self.dw}\n, div_groups: {self.div_groups}"
                 f"  sa: {self.sa}, se: {self.se}\n"
                 f"  stem sizes: {self.stem_sizes}, stide on {self.stem_stride_on}\n"
                 f"  body sizes {self._block_sizes}\n"
diff --git a/model_constructor/yaresnet.py b/model_constructor/yaresnet.py
@@ -19,11 +19,11 @@ class YaResBlock(nn.Module):
     def __init__(self, expansion, ni, nh, stride=1,
                  conv_layer=ConvLayer, act_fn=act_fn, zero_bn=True, bn_1st=True,
                  pool=nn.AvgPool2d(2, ceil_mode=True), sa=False, sym=False, se=False,
-                 groups=1, dw=False):
+                 groups=1, dw=False, div_groups=None):
         super().__init__()
         nf, ni = nh * expansion, ni * expansion
-        # if groups != 1:
-        #     groups = int(nh / groups)
+        if div_groups is not None:  # check if grops != 1 and div_groups
+            groups = int(nh / div_groups)
         self.reduce = noop if stride == 1 else pool
         layers = [("conv_0", conv_layer(ni, nh, 3, stride=1, act_fn=act_fn, bn_1st=bn_1st,
                                         groups=nh if dw else groups)),