Skip to content

Commit a9d0615

Browse files
committed
Fix ijepa vit issue with 448 model, minor formatting fixes
1 parent e590ec5 commit a9d0615

File tree

1 file changed

+11
-3
lines changed

1 file changed

+11
-3
lines changed

timm/models/vision_transformer.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1466,6 +1466,7 @@ def _cfg(url='', **kwargs):
14661466
url='https://dl.fbaipublicfiles.com/ijepa/IN1K-vit.h.16-448px-300e.pth.tar',
14671467
# hf_hub_id='timm/',
14681468
license='cc-by-nc-4.0',
1469+
input_size=(3, 448, 448), crop_pct=1.0,
14691470
mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD, num_classes=0),
14701471
'vit_gigantic_patch16_224_ijepa.in22k': _cfg(
14711472
url='https://dl.fbaipublicfiles.com/ijepa/IN22K-vit.g.16-600e.pth.tar',
@@ -2066,22 +2067,28 @@ def vit_giant_patch14_dinov2(pretrained=False, **kwargs) -> VisionTransformer:
20662067
'vit_giant_patch14_dinov2', pretrained=pretrained, **dict(model_args, **kwargs))
20672068
return model
20682069

2070+
20692071
@register_model
20702072
def vit_huge_patch14_224_ijepa(pretrained=False, **kwargs) -> VisionTransformer:
20712073
""" ViT-Huge model (ViT-H/14) from `I-JEPA` - https://arxiv.org/abs/2301.08243
20722074
"""
20732075
model_args = dict(patch_size=14, embed_dim=1280, depth=32, num_heads=16, class_token=False, global_pool='avg')
2074-
model = _create_vision_transformer('vit_huge_patch14_224_ijepa', pretrained=pretrained, **dict(model_args, **kwargs))
2076+
model = _create_vision_transformer(
2077+
'vit_huge_patch14_224_ijepa', pretrained=pretrained, **dict(model_args, **kwargs))
20752078
return model
20762079

2080+
20772081
@register_model
20782082
def vit_huge_patch16_448_ijepa(pretrained=False, **kwargs) -> VisionTransformer:
20792083
""" ViT-Huge model (ViT-H/16) from `I-JEPA` - https://arxiv.org/abs/2301.08243
20802084
"""
2081-
model_args = dict(patch_size=16, embed_dim=1280, depth=32, num_heads=16, class_token=False, global_pool='avg', img_size=448)
2082-
model = _create_vision_transformer('vit_huge_patch16_448_ijepa', pretrained=pretrained, **dict(model_args, **kwargs))
2085+
model_args = dict(
2086+
patch_size=16, embed_dim=1280, depth=32, num_heads=16, class_token=False, global_pool='avg', img_size=448)
2087+
model = _create_vision_transformer(
2088+
'vit_huge_patch16_448_ijepa', pretrained=pretrained, **dict(model_args, **kwargs))
20832089
return model
20842090

2091+
20852092
@register_model
20862093
def vit_gigantic_patch16_224_ijepa(pretrained=False, **kwargs) -> VisionTransformer:
20872094
""" ViT-Gigantic (big-G) model (ViT-G/16) from `I-JEPA - https://arxiv.org/abs/2301.08243
@@ -2091,6 +2098,7 @@ def vit_gigantic_patch16_224_ijepa(pretrained=False, **kwargs) -> VisionTransfor
20912098
'vit_gigantic_patch16_224_ijepa', pretrained=pretrained, **dict(model_args, **kwargs))
20922099
return model
20932100

2101+
20942102
register_model_deprecations(__name__, {
20952103
'vit_tiny_patch16_224_in21k': 'vit_tiny_patch16_224.augreg_in21k',
20962104
'vit_small_patch32_224_in21k': 'vit_small_patch32_224.augreg_in21k',

0 commit comments

Comments
 (0)