Merge pull request #72 from ayasyrev/mc_root_validate

ayasyrev · web-flow · commit 8f9d6baa701e · 2022-12-06T15:20:26.000+03:00
Mc root validate
diff --git a/src/model_constructor/layers.py b/src/model_constructor/layers.py
@@ -1,5 +1,5 @@
 from collections import OrderedDict
-from typing import List, Optional, Union
+from typing import List, Optional, Type, Union
 
 import torch
 import torch.nn as nn
@@ -61,7 +61,7 @@ def __init__(
         padding: Optional[int] = None,
         bias: bool = False,
         groups: int = 1,
-        act_fn: Union[nn.Module, bool] = act_fn,
+        act_fn: Union[Type[nn.Module], bool] = nn.ReLU,
         pre_act: bool = False,
         bn_layer: bool = True,
         bn_1st: bool = True,
@@ -88,14 +88,14 @@ def __init__(
             bn = self.batchnorm_module(out_channels)
             nn.init.constant_(bn.weight, 0.0 if zero_bn else 1.0)
             layers.append(("bn", bn))
-        if isinstance(act_fn, nn.Module):  # act_fn either nn.Module or False
+        if act_fn:  # act_fn either nn.Module subclass or False
             if pre_act:
                 act_position = 0
             elif not bn_1st:
                 act_position = 1
             else:
                 act_position = len(layers)
-            layers.insert(act_position, ("act_fn", act_fn))
+            layers.insert(act_position, ("act_fn", act_fn(inplace=True)))  # type: ignore
         super().__init__(OrderedDict(layers))
 
 
diff --git a/src/model_constructor/model_constructor.py b/src/model_constructor/model_constructor.py
@@ -1,24 +1,21 @@
 from collections import OrderedDict
-from typing import Callable, List, Optional, Type, Union
+from functools import partial
+from typing import Any, Callable, List, Optional, Type, Union
 
 import torch.nn as nn
-from pydantic import BaseModel
+from pydantic import BaseModel, root_validator
 
 from .layers import ConvBnAct, SEModule, SimpleSelfAttention
 
 __all__ = [
     "init_cnn",
-    "act_fn",
     "ResBlock",
     "ModelConstructor",
     "XResNet34",
     "XResNet50",
 ]
 
 
-act_fn = nn.ReLU(inplace=True)
-
-
 class ResBlock(nn.Module):
     """Universal Resnet block. Basic block if expansion is 1, otherwise is Bottleneck."""
 
@@ -29,13 +26,13 @@ def __init__(
         mid_channels: int,
         stride: int = 1,
         conv_layer=ConvBnAct,
-        act_fn: nn.Module = act_fn,
+        act_fn: Type[nn.Module] = nn.ReLU,
         zero_bn: bool = True,
         bn_1st: bool = True,
         groups: int = 1,
         dw: bool = False,
         div_groups: Union[None, int] = None,
-        pool: Union[nn.Module, None] = None,
+        pool: Union[Callable[[Any], nn.Module], None] = None,
         se: Union[nn.Module, None] = None,
         sa: Union[nn.Module, None] = None,
     ):
@@ -100,7 +97,7 @@ def __init__(
         if stride != 1 or in_channels != out_channels:
             id_layers = []
             if stride != 1 and pool is not None:  # if pool - reduce by pool else stride 2 art id_conv
-                id_layers.append(("pool", pool))
+                id_layers.append(("pool", pool()))
             if in_channels != out_channels or (stride != 1 and pool is None):
                 id_layers += [("id_conv", conv_layer(
                     in_channels,
@@ -112,7 +109,7 @@ def __init__(
             self.id_conv = nn.Sequential(OrderedDict(id_layers))
         else:
             self.id_conv = None
-        self.act_fn = act_fn
+        self.act_fn = act_fn(inplace=True)  # type: ignore
 
     def forward(self, x):
         identity = self.id_conv(x) if self.id_conv is not None else x
@@ -130,8 +127,8 @@ class ModelCfg(BaseModel):
     block_sizes: List[int] = [64, 128, 256, 512]
     layers: List[int] = [2, 2, 2, 2]
     norm: Type[nn.Module] = nn.BatchNorm2d
-    act_fn: nn.Module = nn.ReLU(inplace=True)
-    pool: nn.Module = nn.AvgPool2d(2, ceil_mode=True)
+    act_fn: Type[nn.Module] = nn.ReLU
+    pool: Callable[[Any], nn.Module] = partial(nn.AvgPool2d, kernel_size=2, ceil_mode=True)
     expansion: int = 1
     groups: int = 1
     dw: bool = False
@@ -144,7 +141,7 @@ class ModelCfg(BaseModel):
     zero_bn: bool = True
     stem_stride_on: int = 0
     stem_sizes: List[int] = [32, 32, 64]
-    stem_pool: Union[nn.Module, None] = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)  # type: ignore
+    stem_pool: Union[Callable[[Any], nn.Module], None] = partial(nn.MaxPool2d, kernel_size=3, stride=2, padding=1)
     stem_bn_end: bool = False
     init_cnn: Optional[Callable[[nn.Module], None]] = None
     make_stem: Optional[Callable] = None
@@ -192,7 +189,7 @@ def make_stem(self: ModelCfg) -> nn.Sequential:
         for i in range(len(self.stem_sizes) - 1)
     ]
     if self.stem_pool:
-        stem.append(("stem_pool", self.stem_pool))
+        stem.append(("stem_pool", self.stem_pool()))
     if self.stem_bn_end:
         stem.append(("norm", self.norm(self.stem_sizes[-1])))  # type: ignore
     return nn.Sequential(OrderedDict(stem))
@@ -260,29 +257,30 @@ def make_head(cfg: ModelCfg) -> nn.Sequential:
 class ModelConstructor(ModelCfg):
     """Model constructor. As default - xresnet18"""
 
-    def __init__(self, **data):
-        super().__init__(**data)
-        if self.init_cnn is None:
-            self.init_cnn = init_cnn
-        if self.make_stem is None:
-            self.make_stem = make_stem
-        if self.make_layer is None:
-            self.make_layer = make_layer
-        if self.make_body is None:
-            self.make_body = make_body
-        if self.make_head is None:
-            self.make_head = make_head
-
-        if self.stem_sizes[0] != self.in_chans:
-            self.stem_sizes = [self.in_chans] + self.stem_sizes
-        if self.se and isinstance(self.se, (bool, int)):  # if se=1 or se=True
-            self.se = SEModule
-        if self.sa and isinstance(self.sa, (bool, int)):  # if sa=1 or sa=True
-            self.sa = SimpleSelfAttention  # default: ks=1, sym=sym
-        if self.se_module or self.se_reduction:  # pragma: no cover
+    @root_validator
+    def post_init(cls, values):
+        if values["init_cnn"] is None:
+            values["init_cnn"] = init_cnn
+        if values["make_stem"] is None:
+            values["make_stem"] = make_stem
+        if values["make_layer"] is None:
+            values["make_layer"] = make_layer
+        if values["make_body"] is None:
+            values["make_body"] = make_body
+        if values["make_head"] is None:
+            values["make_head"] = make_head
+
+        if values["stem_sizes"][0] != values["in_chans"]:
+            values["stem_sizes"] = [values["in_chans"]] + values["stem_sizes"]
+        if values["se"] and isinstance(values["se"], (bool, int)):  # if se=1 or se=True
+            values["se"] = SEModule
+        if values["sa"] and isinstance(values["sa"], (bool, int)):  # if sa=1 or sa=True
+            values["sa"] = SimpleSelfAttention  # default: ks=1, sym=sym
+        if values["se_module"] or values["se_reduction"]:  # pragma: no cover
             print(
                 "Deprecated. Pass se_module as se argument, se_reduction as arg to se."
             )  # add deprecation warning.
+        return values
 
     @property
     def stem(self):
@@ -309,11 +307,12 @@ def __call__(self):
         return model
 
     def __repr__(self):
+        se_repr = self.se.__name__ if self.se else "False"
         return (
             f"{self.name} constructor\n"
             f"  in_chans: {self.in_chans}, num_classes: {self.num_classes}\n"
             f"  expansion: {self.expansion}, groups: {self.groups}, dw: {self.dw}, div_groups: {self.div_groups}\n"
-            f"  sa: {self.sa}, se: {self.se}\n"
+            f"  act_fn: {self.act_fn.__name__}, sa: {self.sa}, se: {se_repr}\n"
             f"  stem sizes: {self.stem_sizes}, stride on {self.stem_stride_on}\n"
             f"  body sizes {self.block_sizes}\n"
             f"  layers: {self.layers}"
diff --git a/src/model_constructor/yaresnet.py b/src/model_constructor/yaresnet.py
@@ -2,7 +2,7 @@
 # Yet another ResNet.
 
 from collections import OrderedDict
-from typing import List, Type, Union
+from typing import Any, Callable, List, Type, Union
 
 import torch.nn as nn
 from torch.nn import Mish
@@ -15,7 +15,7 @@
 ]
 
 
-act_fn = nn.ReLU(inplace=True)
+# act_fn = nn.ReLU(inplace=True)
 
 
 class YaResBlock(nn.Module):
@@ -28,13 +28,13 @@ def __init__(
         mid_channels: int,
         stride: int = 1,
         conv_layer=ConvBnAct,
-        act_fn: nn.Module = act_fn,
+        act_fn: Type[nn.Module] = nn.ReLU,
         zero_bn: bool = True,
         bn_1st: bool = True,
         groups: int = 1,
         dw: bool = False,
         div_groups: Union[None, int] = None,
-        pool: Union[nn.Module, None] = None,
+        pool: Union[Callable[[Any], nn.Module], None] = None,
         se: Union[nn.Module, None] = None,
         sa: Union[nn.Module, None] = None,
     ):
@@ -49,7 +49,7 @@ def __init__(
                 self.reduce = conv_layer(in_channels, in_channels, 1, stride=2)
                 # warnings.warn("pool not passed")  # need to warn?
             else:
-                self.reduce = pool
+                self.reduce = pool()
         else:
             self.reduce = None
         if expansion == 1:
@@ -115,7 +115,7 @@ def __init__(
             )
         else:
             self.id_conv = None
-        self.merge = act_fn
+        self.merge = act_fn()
 
     def forward(self, x):
         if self.reduce:
diff --git a/tests/test_block.py b/tests/test_block.py
@@ -1,4 +1,5 @@
 # import pytest
+from functools import partial
 import torch
 import torch.nn as nn
 from model_constructor.layers import SEModule, SimpleSelfAttention
@@ -16,7 +17,7 @@
     mid_channels=[8, 16],
     stride=[1, 2],
     div_groups=[None, 2],
-    pool=[None, nn.AvgPool2d(2, ceil_mode=True)],
+    pool=[None, partial(nn.AvgPool2d, kernel_size=2, ceil_mode=True)],
     se=[None, SEModule],
     sa=[None, SimpleSelfAttention],
 )