2222
2323from timm .data import IMAGENET_DEFAULT_MEAN , IMAGENET_DEFAULT_STD
2424from timm .layers import PatchEmbed , Mlp , DropPath , to_2tuple , trunc_normal_ , _assert , ClassifierHead ,\
25- resample_patch_embed , ndgrid
25+ resample_patch_embed , ndgrid , get_act_layer , LayerType
2626from ._builder import build_model_with_cfg
2727from ._features_fx import register_notrace_function
2828from ._registry import generate_default_cfgs , register_model , register_model_deprecations
@@ -206,7 +206,7 @@ def __init__(
206206 proj_drop : float = 0. ,
207207 attn_drop : float = 0. ,
208208 drop_path : float = 0. ,
209- act_layer : nn . Module = nn . GELU ,
209+ act_layer : LayerType = "gelu" ,
210210 norm_layer : nn .Module = nn .LayerNorm ,
211211 pretrained_window_size : _int_or_tuple_2_t = 0 ,
212212 ) -> None :
@@ -235,6 +235,7 @@ def __init__(
235235 self .shift_size : Tuple [int , int ] = ss
236236 self .window_area = self .window_size [0 ] * self .window_size [1 ]
237237 self .mlp_ratio = mlp_ratio
238+ act_layer = get_act_layer (act_layer )
238239
239240 self .attn = WindowAttention (
240241 dim ,
@@ -372,6 +373,7 @@ def __init__(
372373 proj_drop : float = 0. ,
373374 attn_drop : float = 0. ,
374375 drop_path : float = 0. ,
376+ act_layer : Union [str , Callable ] = 'gelu' ,
375377 norm_layer : nn .Module = nn .LayerNorm ,
376378 pretrained_window_size : _int_or_tuple_2_t = 0 ,
377379 output_nchw : bool = False ,
@@ -390,6 +392,7 @@ def __init__(
390392 proj_drop: Projection dropout rate
391393 attn_drop: Attention dropout rate.
392394 drop_path: Stochastic depth rate.
395+ act_layer: Activation layer type.
393396 norm_layer: Normalization layer.
394397 pretrained_window_size: Local window size in pretraining.
395398 output_nchw: Output tensors on NCHW format instead of NHWC.
@@ -424,6 +427,7 @@ def __init__(
424427 proj_drop = proj_drop ,
425428 attn_drop = attn_drop ,
426429 drop_path = drop_path [i ] if isinstance (drop_path , list ) else drop_path ,
430+ act_layer = act_layer ,
427431 norm_layer = norm_layer ,
428432 pretrained_window_size = pretrained_window_size ,
429433 )
@@ -471,6 +475,7 @@ def __init__(
471475 proj_drop_rate : float = 0. ,
472476 attn_drop_rate : float = 0. ,
473477 drop_path_rate : float = 0.1 ,
478+ act_layer : Union [str , Callable ] = 'gelu' ,
474479 norm_layer : Callable = nn .LayerNorm ,
475480 pretrained_window_sizes : Tuple [int , ...] = (0 , 0 , 0 , 0 ),
476481 ** kwargs ,
@@ -492,6 +497,7 @@ def __init__(
492497 attn_drop_rate: Attention dropout rate.
493498 drop_path_rate: Stochastic depth rate.
494499 norm_layer: Normalization layer.
500+ act_layer: Activation layer type.
495501 patch_norm: If True, add normalization after patch embedding.
496502 pretrained_window_sizes: Pretrained window sizes of each layer.
497503 output_fmt: Output tensor format if not None, otherwise output 'NHWC' by default.
@@ -541,6 +547,7 @@ def __init__(
541547 proj_drop = proj_drop_rate ,
542548 attn_drop = attn_drop_rate ,
543549 drop_path = dpr [i ],
550+ act_layer = act_layer ,
544551 norm_layer = norm_layer ,
545552 pretrained_window_size = pretrained_window_sizes [i ],
546553 )]
0 commit comments