fix se, test layers

ayasyrev · ayasyrev · commit 851426f9c2e3 · 2022-04-28T11:54:22.000+03:00
diff --git a/model_constructor/layers.py b/model_constructor/layers.py
@@ -117,7 +117,7 @@ def __init__(self, n_in: int, ks=1, sym=False):
         self.n_in = n_in
 
     def forward(self, x):
-        if self.sym:
+        if self.sym:  # check ks=3
             # symmetry hack by https://github.com/mgrankin
             c = self.conv.weight.view(self.n_in, self.n_in)
             c = (c + c.t()) / 2
@@ -232,7 +232,7 @@ def __init__(self,
                  ):
         super().__init__()
 #       rd_channels = math.ceil(channels//reduction/8)*8
-        reducted = channels // reduction
+        reducted = max(channels // reduction, 1)  # preserve zero-element tensors
         if rd_channels is None:
             rd_channels = reducted
         else:
diff --git a/tests/test_block.py b/tests/test_block.py
@@ -10,15 +10,16 @@
 img_size = 16
 
 
-params = {
-    "Block": [ResBlock, YaResBlock],
-    "expansion": [1, 2],
-    "mid_channels": [8, 16],
-    "stride": [1, 2],
-    "pool": [None, nn.AvgPool2d(2, ceil_mode=True)],
-    "se": [None, SEModule],
-    "sa": [None, SimpleSelfAttention],
-}
+params = dict(
+    Block=[ResBlock, YaResBlock],
+    expansion=[1, 2],
+    mid_channels=[8, 16],
+    stride=[1, 2],
+    div_groups=[None, 2],
+    pool=[None, nn.AvgPool2d(2, ceil_mode=True)],
+    se=[None, SEModule],
+    sa=[None, SimpleSelfAttention],
+)
 
 
 def value_name(value) -> str:
@@ -41,11 +42,14 @@ def pytest_generate_tests(metafunc):
             metafunc.parametrize(key, value, ids=ids_fn(key, value))
 
 
-def test_block(Block, expansion, mid_channels, stride, pool, se, sa):
+def test_block(Block, expansion, mid_channels, stride, div_groups, pool, se, sa):
     """test block"""
     in_channels = 8
     out_channels = mid_channels * expansion
-    block = Block(expansion, in_channels, mid_channels, stride, pool=pool, se=se, sa=sa)
+    block = Block(
+        expansion, in_channels, mid_channels,
+        stride, div_groups=div_groups,
+        pool=pool, se=se, sa=sa)
     xb = torch.randn(bs_test, in_channels * expansion, img_size, img_size)
     y = block(xb)
     out_size = img_size if stride == 1 else img_size // stride
diff --git a/tests/test_layers.py b/tests/test_layers.py
@@ -0,0 +1,108 @@
+import torch
+import torch.nn as nn
+
+from model_constructor.layers import ConvBnAct, Flatten, Noop, SEModule, SEModuleConv, SimpleSelfAttention, noop
+
+
+bs_test = 4
+
+
+params = dict(
+    kernel_size=[3, 1],
+    stride=[1, 2],
+    padding=[None, 1],
+    bias=[False, True],
+    groups=[1, 2],
+    # # act_fn=act_fn,
+    pre_act=[False, True],
+    bn_layer=[True, False],
+    bn_1st=[True, False],
+    zero_bn=[False, True],
+    # SA
+    sym=[False, True],
+    # SE
+    se_module=[SEModule, SEModuleConv],
+    reduction=[16, 2],
+    rd_channels=[None, 2],
+    rd_max=[False, True],
+    use_bias=[True, False],
+)
+
+
+def value_name(value) -> str:
+    name = getattr(value, "__name__", None)
+    if name is not None:
+        return name
+    if isinstance(value, nn.Module):
+        return value._get_name()
+    else:
+        return value
+
+
+def ids_fn(key, value):
+    return [f"{key[:2]}_{value_name(v)}" for v in value]
+
+
+def pytest_generate_tests(metafunc):
+    for key, value in params.items():
+        if key in metafunc.fixturenames:
+            metafunc.parametrize(key, value, ids=ids_fn(key, value))
+
+
+def test_Flatten():
+    """test Flatten"""
+    flatten = Flatten()
+    channels = 4
+    xb = torch.randn(bs_test, channels, channels)
+    out = flatten(xb)
+    assert out.shape == torch.Size([bs_test, channels * channels])
+
+
+def test_noop():
+    """test Noop, noop"""
+    xb = torch.randn(bs_test)
+    xb_copy = xb.clone().detach()
+    out = noop(xb)
+    assert out is xb
+    assert all(out.eq(xb_copy))
+    noop_module = Noop()
+    out = noop_module(xb)
+    assert out is xb
+    assert all(out.eq(xb_copy))
+
+
+def test_ConvBnAct(kernel_size, stride, bias, groups, pre_act, bn_layer, bn_1st, zero_bn):
+    """test ConvBnAct"""
+    in_channels = out_channels = 4
+    channel_size = 4
+    block = ConvBnAct(
+        in_channels, out_channels, kernel_size, stride,
+        padding=None, bias=bias, groups=groups,
+        pre_act=pre_act, bn_layer=bn_layer, bn_1st=bn_1st, zero_bn=zero_bn)
+    xb = torch.randn(bs_test, in_channels, channel_size, channel_size)
+    out = block(xb)
+    out_size = channel_size
+    if stride == 2:
+        out_size = channel_size // stride
+    assert out.shape == torch.Size([bs_test, out_channels, out_size, out_size])
+
+
+def test_SimpleSelfAttention(sym):
+    """test SimpleSelfAttention"""
+    in_channels = 4
+    kernel_size = 1  # ? can be 3? if so check sym hack.
+    channel_size = 4
+    sa = SimpleSelfAttention(in_channels, kernel_size, sym)
+    xb = torch.randn(bs_test, in_channels, channel_size, channel_size)
+    out = sa(xb)
+    assert out.shape == torch.Size([bs_test, in_channels, channel_size, channel_size])
+
+
+def test_SE(se_module, reduction, rd_channels, rd_max, use_bias):
+    """test SE"""
+    in_channels = 8
+    channel_size = 4
+    se = se_module(in_channels, reduction, rd_channels, rd_max, use_bias=use_bias)
+    xb = torch.randn(bs_test, in_channels, channel_size, channel_size)
+    out = se(xb)
+    assert out.shape == torch.Size([bs_test, in_channels, channel_size, channel_size])