ayasyrev
diff --git a/‎model_constructor/convmixer.py‎
Lines changed: 1 addition & 1 deletion b/‎model_constructor/convmixer.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎model_constructor/layers.py‎
Lines changed: 7 additions & 7 deletions b/‎model_constructor/layers.py‎
Lines changed: 7 additions & 7 deletions
diff --git a/‎model_constructor/model_constructor.py‎
Lines changed: 13 additions & 8 deletions b/‎model_constructor/model_constructor.py‎
Lines changed: 13 additions & 8 deletions
diff --git a/‎model_constructor/net.py‎
Lines changed: 2 additions & 2 deletions b/‎model_constructor/net.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎model_constructor/yaresnet.py‎
Lines changed: 6 additions & 3 deletions b/‎model_constructor/yaresnet.py‎
Lines changed: 6 additions & 3 deletions
diff --git a/‎tests/__init__.py‎ b/‎tests/__init__.py‎
diff --git a/‎tests/test_Net.py‎
Lines changed: 191 additions & 0 deletions b/‎tests/test_Net.py‎
Lines changed: 191 additions & 0 deletions
diff --git a/‎tests/test_block.py‎
Lines changed: 56 additions & 0 deletions b/‎tests/test_block.py‎
Lines changed: 56 additions & 0 deletions
@@ -104,5 +104,5 @@ def __init__(self, dim: int, depth: int,
             nn.AdaptiveAvgPool2d((1, 1)),
             nn.Flatten(),
             nn.Linear(dim, n_classes))
-        if init_func is not None:
+        if init_func is not None:  # pragma: no cover
             init_func(self)
@@ -109,15 +109,15 @@ class SimpleSelfAttention(nn.Module):
     Inspired by https://arxiv.org/pdf/1805.08318.pdf  
     '''
 
-    def __init__(self, n_in: int, ks=1, sym=False):
+    def __init__(self, n_in: int, ks=1, sym=False, use_bias=False):
         super().__init__()
-        self.conv = conv1d(n_in, n_in, ks, padding=ks // 2, bias=False)
+        self.conv = conv1d(n_in, n_in, ks, padding=ks // 2, bias=use_bias)
         self.gamma = nn.Parameter(torch.tensor([0.]))
         self.sym = sym
         self.n_in = n_in
 
     def forward(self, x):
-        if self.sym:
+        if self.sym:  # check ks=3
             # symmetry hack by https://github.com/mgrankin
             c = self.conv.weight.view(self.n_in, self.n_in)
             c = (c + c.t()) / 2
@@ -141,7 +141,7 @@ class SEBlock(nn.Module):  # todo: deprecation worning.
 
     def __init__(self, c, r=16):
         super().__init__()
-        ch = c // r
+        ch = max(c // r, 1)
         self.squeeze = nn.AdaptiveAvgPool2d(1)
         self.excitation = nn.Sequential(
             OrderedDict([('fc_reduce', self.se_layer(c, ch, bias=self.use_bias)),
@@ -166,7 +166,7 @@ class SEBlockConv(nn.Module):  # todo: deprecation worning.
     def __init__(self, c, r=16):
         super().__init__()
 #         c_in = math.ceil(c//r/8)*8
-        c_in = c // r
+        c_in = max(c // r, 1)
         self.squeeze = nn.AdaptiveAvgPool2d(1)
         self.excitation = nn.Sequential(
             OrderedDict([
@@ -196,7 +196,7 @@ def __init__(self,
                  gate=nn.Sigmoid
                  ):
         super().__init__()
-        reducted = channels // reduction
+        reducted = max(channels // reduction, 1)  # preserve zero-element tensors
         if rd_channels is None:
             rd_channels = reducted
         else:
@@ -232,7 +232,7 @@ def __init__(self,
                  ):
         super().__init__()
 #       rd_channels = math.ceil(channels//reduction/8)*8
-        reducted = channels // reduction
+        reducted = max(channels // reduction, 1)  # preserve zero-element tensors
         if rd_channels is None:
             rd_channels = reducted
         else:
 
@@ -53,13 +53,18 @@ def __init__(self, expansion, in_channels, mid_channels, stride=1,
         if sa:
             layers.append(('sa', sa(out_channels)))
         self.convs = nn.Sequential(OrderedDict(layers))
-        id_layers = []
-        if stride != 1 and pool:
-            id_layers.append(("pool", pool))
-        id_layers += [] if in_channels == out_channels else [("id_conv", conv_layer(in_channels, out_channels, 1,
-                                                                                    stride=1 if pool else stride,
-                                                                                    act_fn=False))]
-        self.id_conv = None if id_layers == [] else nn.Sequential(OrderedDict(id_layers))
+        if stride != 1 or in_channels != out_channels:
+            id_layers = []
+            if stride != 1 and pool is not None:
+                id_layers.append(("pool", pool))
+            if in_channels != out_channels or (stride != 1 and pool is None):
+                id_layers += [("id_conv", conv_layer(
+                    in_channels, out_channels, 1,
+                    stride=1 if pool else stride,
+                    act_fn=False))]
+            self.id_conv = nn.Sequential(OrderedDict(id_layers))
+        else:
+            self.id_conv = None
         self.act_fn = act_fn
 
     def forward(self, x):
@@ -147,7 +152,7 @@ def __init__(self, name='MC', in_chans=3, num_classes=1000,
         if self.sa:  # if sa=1 or sa=True
             if type(self.sa) in (bool, int):
                 self.sa = SimpleSelfAttention  # default: ks=1, sym=sym
-        if self.se_module or se_reduction:
+        if self.se_module or se_reduction:  # pragma: no cover
             print("Deprecated. Pass se_module as se argument, se_reduction as arg to se.")  # add deprecation worning.
 
     @property
 
@@ -36,7 +36,7 @@ def __init__(self, expansion, ni, nh, stride=1,
             groups = int(nh / div_groups)
         if expansion == 1:
             layers = [("conv_0", conv_layer(ni, nh, 3, stride=stride, act_fn=act_fn, bn_1st=bn_1st,
-                                            groups=nh if dw else groups)),
+                                            groups=ni if dw else groups)),
                       ("conv_1", conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, bn_1st=bn_1st))
                       ]
         else:
@@ -78,7 +78,7 @@ def __init__(self, expansion, ni, nh, stride=1,
         self.reduce = noop if stride == 1 else pool
         if expansion == 1:
             layers = [("conv_0", conv_layer(ni, nh, 3, stride=1, act_fn=act_fn, bn_1st=bn_1st,
-                                            groups=nh if dw else groups)),
+                                            groups=ni if dw else groups)),
                       ("conv_1", conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, bn_1st=bn_1st))
                       ]
         else:
 
@@ -30,8 +30,10 @@ def __init__(self, expansion, in_channels, mid_channels, stride=1,
             groups = int(mid_channels / div_groups)
         if stride != 1:
             if pool is None:
-                raise Exception("pool not passed")
-            self.reduce = pool
+                self.reduce = conv_layer(in_channels, in_channels, 1, stride=2)
+                # warnings.warn("pool not passed")  # need to warn?
+            else:
+                self.reduce = pool
         else:
             self.reduce = None
         layers = [("conv_0", conv_layer(in_channels, mid_channels, 3, stride=1,
@@ -42,7 +44,8 @@ def __init__(self, expansion, in_channels, mid_channels, stride=1,
                       ("conv_0", conv_layer(in_channels, mid_channels, 1, act_fn=act_fn, bn_1st=bn_1st)),
                       ("conv_1", conv_layer(mid_channels, mid_channels, 3, stride=1, act_fn=act_fn, bn_1st=bn_1st,
                                             groups=mid_channels if dw else groups)),
-                      ("conv_2", conv_layer(mid_channels, out_channels, 1, zero_bn=zero_bn, act_fn=False, bn_1st=bn_1st))
+                      ("conv_2", conv_layer(
+                          mid_channels, out_channels, 1, zero_bn=zero_bn, act_fn=False, bn_1st=bn_1st))
         ]
         if se:
             layers.append(('se', se(out_channels)))
 
@@ -0,0 +1,191 @@
+import torch
+import torch.nn as nn
+
+from model_constructor.net import Net, NewResBlock, ResBlock
+# from model_constructor.layers import SEModule, SimpleSelfAttention
+
+
+bs_test = 4
+
+
+params = dict(
+    block=[ResBlock, NewResBlock],
+    expansion=[1, 2],
+    groups=[1, 2],
+    dw=[0, 1],
+    div_groups=[None, 2],
+    sa=[0, 1],
+    se=[0, 1],
+    bn_1st=[True, False],
+    zero_bn=[True, False],
+    stem_bn_end=[True, False],
+    stem_stride_on=[0, 1]
+)
+
+
+def value_name(value) -> str:  # pragma: no cover
+    name = getattr(value, "__name__", None)
+    if name is not None:
+        return name
+    if isinstance(value, nn.Module):
+        return value._get_name()
+    else:
+        return value
+
+
+def ids_fn(key, value):
+    return [f"{key[:2]}_{value_name(v)}" for v in value]
+
+
+def pytest_generate_tests(metafunc):
+    for key, value in params.items():
+        if key in metafunc.fixturenames:
+            metafunc.parametrize(key, value, ids=ids_fn(key, value))
+
+
+def test_Net(
+    block, expansion,
+    groups,
+):
+    """test Net"""
+    c_in = 3
+    img_size = 16
+    c_out = 8
+    name = "Test name"
+
+    mc = Net(
+        name, c_in, c_out, block,
+        expansion=expansion,
+        stem_sizes=[8, 16],
+        block_sizes=[16, 32, 64, 128],
+        groups=groups,
+        # dw=dw,
+        # div_groups=div_groups,
+        # bn_1st=bn_1st, zero_bn=zero_bn,
+        # stem_bn_end=stem_bn_end,
+    )
+    assert f"{name} constructor" in str(mc)
+    model = mc()
+    xb = torch.randn(bs_test, c_in, img_size, img_size)
+    pred = model(xb)
+    assert pred.shape == torch.Size([bs_test, c_out])
+
+
+def test_Net_SE_SA(
+    block, expansion,
+    se, sa
+):
+    """test Net"""
+    c_in = 3
+    img_size = 16
+    c_out = 8
+    name = "Test name"
+
+    mc = Net(
+        name, c_in, c_out, block,
+        expansion=expansion,
+        stem_sizes=[8, 16],
+        block_sizes=[16, 32, 64, 128],
+        se=se, sa=sa
+    )
+    assert f"{name} constructor" in str(mc)
+    model = mc()
+    xb = torch.randn(bs_test, c_in, img_size, img_size)
+    pred = model(xb)
+    assert pred.shape == torch.Size([bs_test, c_out])
+
+
+def test_Net_div_gr(
+    block, expansion,
+    div_groups,
+):
+    """test Net"""
+    c_in = 3
+    img_size = 16
+    c_out = 8
+    name = "Test name"
+
+    mc = Net(
+        name, c_in, c_out, block,
+        expansion=expansion,
+        stem_sizes=[8, 16],
+        block_sizes=[16, 32, 64, 128],
+        div_groups=div_groups,
+    )
+    assert f"{name} constructor" in str(mc)
+    model = mc()
+    xb = torch.randn(bs_test, c_in, img_size, img_size)
+    pred = model(xb)
+    assert pred.shape == torch.Size([bs_test, c_out])
+
+
+def test_Net_dw(
+    block, expansion,
+    dw
+):
+    """test Net"""
+    c_in = 3
+    img_size = 16
+    c_out = 8
+    name = "Test name"
+
+    mc = Net(
+        name, c_in, c_out, block,
+        expansion=expansion,
+        stem_sizes=[8, 16],
+        block_sizes=[16, 32, 64, 128],
+        dw=dw
+    )
+    assert f"{name} constructor" in str(mc)
+    model = mc()
+    xb = torch.randn(bs_test, c_in, img_size, img_size)
+    pred = model(xb)
+    assert pred.shape == torch.Size([bs_test, c_out])
+
+
+def test_Net_2(
+    block, expansion,
+    bn_1st, zero_bn,
+):
+    """test Net"""
+    c_in = 3
+    img_size = 16
+    c_out = 8
+    name = "Test name"
+
+    mc = Net(
+        name, c_in, c_out, block,
+        expansion=expansion,
+        stem_sizes=[8, 16],
+        block_sizes=[16, 32, 64, 128],
+        bn_1st=bn_1st, zero_bn=zero_bn,
+    )
+    assert f"{name} constructor" in str(mc)
+    model = mc()
+    xb = torch.randn(bs_test, c_in, img_size, img_size)
+    pred = model(xb)
+    assert pred.shape == torch.Size([bs_test, c_out])
+
+
+def test_Net_stem(
+    stem_bn_end,
+    stem_stride_on
+):
+    """test Net"""
+    c_in = 3
+    img_size = 16
+    c_out = 8
+    name = "Test name"
+
+    mc = Net(
+        name, c_in, c_out,
+        stem_sizes=[8, 16],
+        block_sizes=[16, 32, 64, 128],
+        stem_bn_end=stem_bn_end,
+        stem_stride_on=stem_stride_on
+    )
+    assert f"{name} constructor" in str(mc)
+    model = mc()
+    xb = torch.randn(bs_test, c_in, img_size, img_size)
+    pred = model(xb)
+    assert pred.shape == torch.Size([bs_test, c_out])
@@ -0,0 +1,56 @@
+# import pytest
+import torch
+import torch.nn as nn
+from model_constructor.layers import SEModule, SimpleSelfAttention
+
+from model_constructor.model_constructor import ResBlock
+from model_constructor.yaresnet import YaResBlock
+
+bs_test = 4
+img_size = 16
+
+
+params = dict(
+    Block=[ResBlock, YaResBlock],
+    expansion=[1, 2],
+    mid_channels=[8, 16],
+    stride=[1, 2],
+    div_groups=[None, 2],
+    pool=[None, nn.AvgPool2d(2, ceil_mode=True)],
+    se=[None, SEModule],
+    sa=[None, SimpleSelfAttention],
+)
+
+
+def value_name(value) -> str:
+    name = getattr(value, "__name__", None)
+    if name is not None:
+        return name
+    if isinstance(value, nn.Module):
+        return value._get_name()
+    else:
+        return value
+
+
+def ids_fn(key, value):
+    return [f"{key[:2]}_{value_name(v)}" for v in value]
+
+
+def pytest_generate_tests(metafunc):
+    for key, value in params.items():
+        if key in metafunc.fixturenames:
+            metafunc.parametrize(key, value, ids=ids_fn(key, value))
+
+
+def test_block(Block, expansion, mid_channels, stride, div_groups, pool, se, sa):
+    """test block"""
+    in_channels = 8
+    out_channels = mid_channels * expansion
+    block = Block(
+        expansion, in_channels, mid_channels,
+        stride, div_groups=div_groups,
+        pool=pool, se=se, sa=sa)
+    xb = torch.randn(bs_test, in_channels * expansion, img_size, img_size)
+    y = block(xb)
+    out_size = img_size if stride == 1 else img_size // stride
+    assert y.shape == torch.Size([bs_test, out_channels, out_size, out_size])