Skip to content

Commit 1f4498f

Browse files
committed
Add ResNet deep tiered stem and model weights for seresnext26t_32x4d and seresnext26d_32x4d
1 parent 73b7845 commit 1f4498f

File tree

3 files changed

+96
-30
lines changed

3 files changed

+96
-30
lines changed

README.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,15 @@
22

33
## What's New
44

5+
### Dec 28, 2019
6+
* Add new model weights and training hparams (see Training Hparams section)
7+
* `seresnext26d_32x4d`- 77.6 top-1, 93.6 top-5
8+
* deep stem (32, 32, 64), avgpool downsample
9+
* stem/dowsample from bag-of-tricks paper
10+
* `seresnext26t_32x4d`- 78.0 top-1, 93.7 top-5
11+
* deep tiered stem (24, 48, 64), avgpool downsample (a modified 'D' variant)
12+
* stem sizing mods from Jeremy Howard and fastai devs discussing ResNet architecture experiments
13+
514
### Dec 23, 2019
615
* Add RandAugment trained MixNet-XL weights with 80.48 top-1.
716
* `--dist-bn` argument added to train.py, will distribute BN stats between nodes after each train epoch, before eval
@@ -114,6 +123,8 @@ I've leveraged the training scripts in this repository to train a few of the mod
114123
| efficientnet_b1 | 78.692 (21.308) | 94.086 (5.914) | 7.79M | bicubic | 240 |
115124
| resnext50_32x4d | 78.512 (21.488) | 94.042 (5.958) | 25M | bicubic | 224 |
116125
| resnet50 | 78.470 (21.530) | 94.266 (5.734) | 25.6M | bicubic | 224 |
126+
| seresnext26t_32x4d | 77.998 (22.002) | 93.708 (6.292) | 16.8M | bicubic | 224 |
127+
| seresnext26d_32x4d | 77.602 (22.398) | 93.608 (6.392) | 16.8M | bicubic | 224 |
117128
| mixnet_m | 77.256 (22.744) | 93.418 (6.582) | 5.01M | bicubic | 224 |
118129
| seresnext26_32x4d | 77.104 (22.896) | 93.316 (6.684) | 16.8M | bicubic | 224 |
119130
| efficientnet_b0 | 76.912 (23.088) | 93.210 (6.790) | 5.29M | bicubic | 224 |
@@ -237,11 +248,20 @@ Sources for original weights:
237248
## Training Hyperparameters
238249

239250
### EfficientNet-B2 with RandAugment - 80.4 top-1, 95.1 top-5
251+
These params are for dual Titan RTX cards with NVIDIA Apex installed:
252+
240253
`./distributed_train.sh 2 /imagenet/ --model efficientnet_b2 -b 128 --sched step --epochs 450 --decay-epochs 2.4 --decay-rate .97 --opt rmsproptf --opt-eps .001 -j 8 --warmup-lr 1e-6 --weight-decay 1e-5 --drop 0.3 --drop-connect 0.2 --model-ema --model-ema-decay 0.9999 --aa rand-m9-mstd0.5 --remode pixel --reprob 0.2 --amp --lr .016`
241254

242255
### MixNet-XL with RandAugment - 80.5 top-1, 94.9 top-5
256+
This params are for dual Titan RTX cards with NVIDIA Apex installed:
257+
243258
`./distributed_train.sh 2 /imagenet/ --model mixnet_xl -b 128 --sched step --epochs 450 --decay-epochs 2.4 --decay-rate .969 --opt rmsproptf --opt-eps .001 -j 8 --warmup-lr 1e-6 --weight-decay 1e-5 --drop 0.3 --drop-connect 0.2 --model-ema --model-ema-decay 0.9999 --aa rand-m9-mstd0.5 --remode pixel --reprob 0.3 --amp --lr .016 --dist-bn reduce`
244259

260+
### SE-ResNeXt-26-D and SE-ResNeXt-26-T
261+
These hparams (or similar) work well for a wide range of ResNet architecture, generally a good idea to increase the epoch # as the model size increases... ie approx 180-200 for ResNe(X)t50, and 220+ for larger. Increase batch size and LR proportionally for better GPUs or with AMP enabled. These params were for 2 1080Ti cards:
262+
263+
`./distributed_train.sh 2 /imagenet/ --model seresnext26t_32x4d --lr 0.1 --warmup-epochs 5 --epochs 160 --weight-decay 1e-4 --sched cosine --reprob 0.4 --remode pixel -b 112`
264+
245265
**TODO dig up some more**
246266

247267

timm/models/gluon_resnet.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ def gluon_resnet50_v1c(pretrained=False, num_classes=1000, in_chans=3, **kwargs)
121121
"""
122122
default_cfg = default_cfgs['gluon_resnet50_v1c']
123123
model = ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes, in_chans=in_chans,
124-
stem_width=32, deep_stem=True, **kwargs)
124+
stem_width=32, stem_type='deep', **kwargs)
125125
model.default_cfg = default_cfg
126126
if pretrained:
127127
load_pretrained(model, default_cfg, num_classes, in_chans)
@@ -134,7 +134,7 @@ def gluon_resnet101_v1c(pretrained=False, num_classes=1000, in_chans=3, **kwargs
134134
"""
135135
default_cfg = default_cfgs['gluon_resnet101_v1c']
136136
model = ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, in_chans=in_chans,
137-
stem_width=32, deep_stem=True, **kwargs)
137+
stem_width=32, stem_type='deep', **kwargs)
138138
model.default_cfg = default_cfg
139139
if pretrained:
140140
load_pretrained(model, default_cfg, num_classes, in_chans)
@@ -147,7 +147,7 @@ def gluon_resnet152_v1c(pretrained=False, num_classes=1000, in_chans=3, **kwargs
147147
"""
148148
default_cfg = default_cfgs['gluon_resnet152_v1c']
149149
model = ResNet(Bottleneck, [3, 8, 36, 3], num_classes=num_classes, in_chans=in_chans,
150-
stem_width=32, deep_stem=True, **kwargs)
150+
stem_width=32, stem_type='deep', **kwargs)
151151
model.default_cfg = default_cfg
152152
if pretrained:
153153
load_pretrained(model, default_cfg, num_classes, in_chans)
@@ -160,7 +160,7 @@ def gluon_resnet50_v1d(pretrained=False, num_classes=1000, in_chans=3, **kwargs)
160160
"""
161161
default_cfg = default_cfgs['gluon_resnet50_v1d']
162162
model = ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes, in_chans=in_chans,
163-
stem_width=32, deep_stem=True, avg_down=True, **kwargs)
163+
stem_width=32, stem_type='deep', avg_down=True, **kwargs)
164164
model.default_cfg = default_cfg
165165
if pretrained:
166166
load_pretrained(model, default_cfg, num_classes, in_chans)
@@ -173,7 +173,7 @@ def gluon_resnet101_v1d(pretrained=False, num_classes=1000, in_chans=3, **kwargs
173173
"""
174174
default_cfg = default_cfgs['gluon_resnet101_v1d']
175175
model = ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, in_chans=in_chans,
176-
stem_width=32, deep_stem=True, avg_down=True, **kwargs)
176+
stem_width=32, stem_type='deep', avg_down=True, **kwargs)
177177
model.default_cfg = default_cfg
178178
if pretrained:
179179
load_pretrained(model, default_cfg, num_classes, in_chans)
@@ -186,7 +186,7 @@ def gluon_resnet152_v1d(pretrained=False, num_classes=1000, in_chans=3, **kwargs
186186
"""
187187
default_cfg = default_cfgs['gluon_resnet152_v1d']
188188
model = ResNet(Bottleneck, [3, 8, 36, 3], num_classes=num_classes, in_chans=in_chans,
189-
stem_width=32, deep_stem=True, avg_down=True, **kwargs)
189+
stem_width=32, stem_type='deep', avg_down=True, **kwargs)
190190
model.default_cfg = default_cfg
191191
if pretrained:
192192
load_pretrained(model, default_cfg, num_classes, in_chans)
@@ -199,7 +199,7 @@ def gluon_resnet50_v1e(pretrained=False, num_classes=1000, in_chans=3, **kwargs)
199199
"""
200200
default_cfg = default_cfgs['gluon_resnet50_v1e']
201201
model = ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes, in_chans=in_chans,
202-
stem_width=64, deep_stem=True, avg_down=True, **kwargs)
202+
stem_width=64, stem_type='deep', avg_down=True, **kwargs)
203203
model.default_cfg = default_cfg
204204
#if pretrained:
205205
# load_pretrained(model, default_cfg, num_classes, in_chans)
@@ -212,7 +212,7 @@ def gluon_resnet101_v1e(pretrained=False, num_classes=1000, in_chans=3, **kwargs
212212
"""
213213
default_cfg = default_cfgs['gluon_resnet101_v1e']
214214
model = ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, in_chans=in_chans,
215-
stem_width=64, deep_stem=True, avg_down=True, **kwargs)
215+
stem_width=64, stem_type='deep', avg_down=True, **kwargs)
216216
model.default_cfg = default_cfg
217217
if pretrained:
218218
load_pretrained(model, default_cfg, num_classes, in_chans)
@@ -225,7 +225,7 @@ def gluon_resnet152_v1e(pretrained=False, num_classes=1000, in_chans=3, **kwargs
225225
"""
226226
default_cfg = default_cfgs['gluon_resnet152_v1e']
227227
model = ResNet(Bottleneck, [3, 8, 36, 3], num_classes=num_classes, in_chans=in_chans,
228-
stem_width=64, deep_stem=True, avg_down=True, **kwargs)
228+
stem_width=64, stem_type='deep', avg_down=True, **kwargs)
229229
model.default_cfg = default_cfg
230230
if pretrained:
231231
load_pretrained(model, default_cfg, num_classes, in_chans)
@@ -238,7 +238,7 @@ def gluon_resnet50_v1s(pretrained=False, num_classes=1000, in_chans=3, **kwargs)
238238
"""
239239
default_cfg = default_cfgs['gluon_resnet50_v1s']
240240
model = ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes, in_chans=in_chans,
241-
stem_width=64, deep_stem=True, **kwargs)
241+
stem_width=64, stem_type='deep', **kwargs)
242242
model.default_cfg = default_cfg
243243
if pretrained:
244244
load_pretrained(model, default_cfg, num_classes, in_chans)
@@ -251,7 +251,7 @@ def gluon_resnet101_v1s(pretrained=False, num_classes=1000, in_chans=3, **kwargs
251251
"""
252252
default_cfg = default_cfgs['gluon_resnet101_v1s']
253253
model = ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, in_chans=in_chans,
254-
stem_width=64, deep_stem=True, **kwargs)
254+
stem_width=64, stem_type='deep', **kwargs)
255255
model.default_cfg = default_cfg
256256
if pretrained:
257257
load_pretrained(model, default_cfg, num_classes, in_chans)
@@ -264,7 +264,7 @@ def gluon_resnet152_v1s(pretrained=False, num_classes=1000, in_chans=3, **kwargs
264264
"""
265265
default_cfg = default_cfgs['gluon_resnet152_v1s']
266266
model = ResNet(Bottleneck, [3, 8, 36, 3], num_classes=num_classes, in_chans=in_chans,
267-
stem_width=64, deep_stem=True, **kwargs)
267+
stem_width=64, stem_type='deep', **kwargs)
268268
model.default_cfg = default_cfg
269269
if pretrained:
270270
load_pretrained(model, default_cfg, num_classes, in_chans)
@@ -362,7 +362,7 @@ def gluon_senet154(pretrained=False, num_classes=1000, in_chans=3, **kwargs):
362362
default_cfg = default_cfgs['gluon_senet154']
363363
model = ResNet(
364364
Bottleneck, [3, 8, 36, 3], cardinality=64, base_width=4, use_se=True,
365-
deep_stem=True, down_kernel_size=3, block_reduce_first=2,
365+
stem_type='deep', down_kernel_size=3, block_reduce_first=2,
366366
num_classes=num_classes, in_chans=in_chans, **kwargs)
367367
model.default_cfg = default_cfg
368368
if pretrained:

timm/models/resnet.py

Lines changed: 63 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,12 @@ def _cfg(url='', **kwargs):
9191
url='https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_weakly_supervised_resnext101_32x8-b4712904.pth'),
9292
'swsl_resnext101_32x16d': _cfg(
9393
url='https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_weakly_supervised_resnext101_32x16-f3559a9c.pth'),
94+
'seresnext26d_32x4d': _cfg(
95+
url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/seresnext26d_32x4d-80fa48a3.pth',
96+
interpolation='bicubic'),
97+
'seresnext26t_32x4d': _cfg(
98+
url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/seresnext26t_32x4d-361bc1c4.pth',
99+
interpolation='bicubic'),
94100
}
95101

96102

@@ -231,10 +237,11 @@ class ResNet(nn.Module):
231237
232238
ResNet variants:
233239
* normal, b - 7x7 stem, stem_width = 64, same as torchvision ResNet, NVIDIA ResNet 'v1.5', Gluon v1b
234-
* c - 3 layer deep 3x3 stem, stem_width = 32
235-
* d - 3 layer deep 3x3 stem, stem_width = 32, average pool in downsample
236-
* e - 3 layer deep 3x3 stem, stem_width = 64, average pool in downsample
237-
* s - 3 layer deep 3x3 stem, stem_width = 64
240+
* c - 3 layer deep 3x3 stem, stem_width = 32 (32, 32, 64)
241+
* d - 3 layer deep 3x3 stem, stem_width = 32 (32, 32, 64), average pool in downsample
242+
* e - 3 layer deep 3x3 stem, stem_width = 64 (64, 64, 128), average pool in downsample
243+
* s - 3 layer deep 3x3 stem, stem_width = 64 (64, 64, 128)
244+
* t - 3 layer deep 3x3 stem, stem width = 32 (24, 48, 64), average pool in downsample
238245
239246
ResNeXt
240247
* normal - 7x7 stem, stem_width = 64, standard cardinality and base widths
@@ -263,10 +270,13 @@ class ResNet(nn.Module):
263270
Number of convolution groups for 3x3 conv in Bottleneck.
264271
base_width : int, default 64
265272
Factor determining bottleneck channels. `planes * base_width / 64 * cardinality`
266-
deep_stem : bool, default False
267-
Whether to replace the 7x7 conv1 with 3 3x3 convolution layers.
268273
stem_width : int, default 64
269274
Number of channels in stem convolutions
275+
stem_type : str, default ''
276+
The type of stem:
277+
* '', default - a single 7x7 conv with a width of stem_width
278+
* 'deep' - three 3x3 convolution layers of widths stem_width, stem_width, stem_width * 2
279+
* 'deep_tiered' - three 3x3 conv layers of widths stem_width//4 * 3, stem_width//4 * 6, stem_width * 2
270280
block_reduce_first: int, default 1
271281
Reduction factor for first convolution output width of residual blocks,
272282
1 for all archs except senets, where 2
@@ -283,12 +293,13 @@ class ResNet(nn.Module):
283293
Global pooling type. One of 'avg', 'max', 'avgmax', 'catavgmax'
284294
"""
285295
def __init__(self, block, layers, num_classes=1000, in_chans=3, use_se=False,
286-
cardinality=1, base_width=64, stem_width=64, deep_stem=False,
296+
cardinality=1, base_width=64, stem_width=64, stem_type='',
287297
block_reduce_first=1, down_kernel_size=1, avg_down=False, dilated=False,
288298
norm_layer=nn.BatchNorm2d, drop_rate=0.0, global_pool='avg',
289299
zero_init_last_bn=True, block_args=None):
290300
block_args = block_args or dict()
291301
self.num_classes = num_classes
302+
deep_stem = 'deep' in stem_type
292303
self.inplanes = stem_width * 2 if deep_stem else 64
293304
self.cardinality = cardinality
294305
self.base_width = base_width
@@ -298,16 +309,20 @@ def __init__(self, block, layers, num_classes=1000, in_chans=3, use_se=False,
298309
super(ResNet, self).__init__()
299310

300311
if deep_stem:
312+
stem_chs_1 = stem_chs_2 = stem_width
313+
if 'tiered' in stem_type:
314+
stem_chs_1 = 3 * (stem_width // 4)
315+
stem_chs_2 = 6 * (stem_width // 4)
301316
self.conv1 = nn.Sequential(*[
302-
nn.Conv2d(in_chans, stem_width, 3, stride=2, padding=1, bias=False),
303-
norm_layer(stem_width),
317+
nn.Conv2d(in_chans, stem_chs_1, 3, stride=2, padding=1, bias=False),
318+
norm_layer(stem_chs_1),
304319
nn.ReLU(inplace=True),
305-
nn.Conv2d(stem_width, stem_width, 3, stride=1, padding=1, bias=False),
306-
norm_layer(stem_width),
320+
nn.Conv2d(stem_chs_1, stem_chs_2, 3, stride=1, padding=1, bias=False),
321+
norm_layer(stem_chs_2),
307322
nn.ReLU(inplace=True),
308-
nn.Conv2d(stem_width, self.inplanes, 3, stride=1, padding=1, bias=False)])
323+
nn.Conv2d(stem_chs_2, self.inplanes, 3, stride=1, padding=1, bias=False)])
309324
else:
310-
self.conv1 = nn.Conv2d(in_chans, stem_width, kernel_size=7, stride=2, padding=3, bias=False)
325+
self.conv1 = nn.Conv2d(in_chans, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False)
311326
self.bn1 = norm_layer(self.inplanes)
312327
self.relu = nn.ReLU(inplace=True)
313328
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
@@ -324,7 +339,7 @@ def __init__(self, block, layers, num_classes=1000, in_chans=3, use_se=False,
324339
self.num_features = 512 * block.expansion
325340
self.fc = nn.Linear(self.num_features * self.global_pool.feat_mult(), num_classes)
326341

327-
last_bn_name = 'bn3' if 'Bottleneck' in block.__name__ else 'bn2'
342+
last_bn_name = 'bn3' if 'Bottle' in block.__name__ else 'bn2'
328343
for n, m in self.named_modules():
329344
if isinstance(m, nn.Conv2d):
330345
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
@@ -440,7 +455,7 @@ def resnet26d(pretrained=False, num_classes=1000, in_chans=3, **kwargs):
440455
"""
441456
default_cfg = default_cfgs['resnet26d']
442457
model = ResNet(
443-
Bottleneck, [2, 2, 2, 2], stem_width=32, deep_stem=True, avg_down=True,
458+
Bottleneck, [2, 2, 2, 2], stem_width=32, stem_type='deep', avg_down=True,
444459
num_classes=num_classes, in_chans=in_chans, **kwargs)
445460
model.default_cfg = default_cfg
446461
if pretrained:
@@ -466,7 +481,7 @@ def resnet50d(pretrained=False, num_classes=1000, in_chans=3, **kwargs):
466481
"""
467482
default_cfg = default_cfgs['resnet50d']
468483
model = ResNet(
469-
Bottleneck, [3, 4, 6, 3], stem_width=32, deep_stem=True, avg_down=True,
484+
Bottleneck, [3, 4, 6, 3], stem_width=32, stem_type='deep', avg_down=True,
470485
num_classes=num_classes, in_chans=in_chans, **kwargs)
471486
model.default_cfg = default_cfg
472487
if pretrained:
@@ -574,7 +589,7 @@ def resnext50d_32x4d(pretrained=False, num_classes=1000, in_chans=3, **kwargs):
574589
default_cfg = default_cfgs['resnext50d_32x4d']
575590
model = ResNet(
576591
Bottleneck, [3, 4, 6, 3], cardinality=32, base_width=4,
577-
stem_width=32, deep_stem=True, avg_down=True,
592+
stem_width=32, stem_type='deep', avg_down=True,
578593
num_classes=num_classes, in_chans=in_chans, **kwargs)
579594
model.default_cfg = default_cfg
580595
if pretrained:
@@ -854,3 +869,34 @@ def swsl_resnext101_32x16d(pretrained=True, **kwargs):
854869
if pretrained:
855870
load_pretrained(model, num_classes=kwargs.get('num_classes', 0), in_chans=kwargs.get('in_chans', 3))
856871
return model
872+
873+
874+
@register_model
875+
def seresnext26d_32x4d(pretrained=False, num_classes=1000, in_chans=3, **kwargs):
876+
"""Constructs a ResNet-26 v1d model.
877+
This is technically a 28 layer ResNet, sticking with 'd' modifier from Gluon for now.
878+
"""
879+
default_cfg = default_cfgs['seresnext26d_32x4d']
880+
model = ResNet(
881+
Bottleneck, [2, 2, 2, 2], cardinality=32, base_width=4,
882+
stem_width=32, stem_type='deep', avg_down=True, use_se=True,
883+
num_classes=num_classes, in_chans=in_chans, **kwargs)
884+
model.default_cfg = default_cfg
885+
if pretrained:
886+
load_pretrained(model, default_cfg, num_classes, in_chans)
887+
return model
888+
889+
890+
@register_model
891+
def seresnext26t_32x4d(pretrained=False, num_classes=1000, in_chans=3, **kwargs):
892+
"""Constructs a ResNet-26 v1d model.
893+
"""
894+
default_cfg = default_cfgs['seresnext26t_32x4d']
895+
model = ResNet(
896+
Bottleneck, [2, 2, 2, 2], cardinality=32, base_width=4,
897+
stem_width=32, stem_type='deep_tiered', avg_down=True, use_se=True,
898+
num_classes=num_classes, in_chans=in_chans, **kwargs)
899+
model.default_cfg = default_cfg
900+
if pretrained:
901+
load_pretrained(model, default_cfg, num_classes, in_chans)
902+
return model

0 commit comments

Comments
 (0)