@@ -1466,6 +1466,7 @@ def _cfg(url='', **kwargs):
14661466 url = 'https://dl.fbaipublicfiles.com/ijepa/IN1K-vit.h.16-448px-300e.pth.tar' ,
14671467 # hf_hub_id='timm/',
14681468 license = 'cc-by-nc-4.0' ,
1469+ input_size = (3 , 448 , 448 ), crop_pct = 1.0 ,
14691470 mean = IMAGENET_DEFAULT_MEAN , std = IMAGENET_DEFAULT_STD , num_classes = 0 ),
14701471 'vit_gigantic_patch16_224_ijepa.in22k' : _cfg (
14711472 url = 'https://dl.fbaipublicfiles.com/ijepa/IN22K-vit.g.16-600e.pth.tar' ,
@@ -2066,22 +2067,28 @@ def vit_giant_patch14_dinov2(pretrained=False, **kwargs) -> VisionTransformer:
20662067 'vit_giant_patch14_dinov2' , pretrained = pretrained , ** dict (model_args , ** kwargs ))
20672068 return model
20682069
2070+
20692071@register_model
20702072def vit_huge_patch14_224_ijepa (pretrained = False , ** kwargs ) -> VisionTransformer :
20712073 """ ViT-Huge model (ViT-H/14) from `I-JEPA` - https://arxiv.org/abs/2301.08243
20722074 """
20732075 model_args = dict (patch_size = 14 , embed_dim = 1280 , depth = 32 , num_heads = 16 , class_token = False , global_pool = 'avg' )
2074- model = _create_vision_transformer ('vit_huge_patch14_224_ijepa' , pretrained = pretrained , ** dict (model_args , ** kwargs ))
2076+ model = _create_vision_transformer (
2077+ 'vit_huge_patch14_224_ijepa' , pretrained = pretrained , ** dict (model_args , ** kwargs ))
20752078 return model
20762079
2080+
20772081@register_model
20782082def vit_huge_patch16_448_ijepa (pretrained = False , ** kwargs ) -> VisionTransformer :
20792083 """ ViT-Huge model (ViT-H/16) from `I-JEPA` - https://arxiv.org/abs/2301.08243
20802084 """
2081- model_args = dict (patch_size = 16 , embed_dim = 1280 , depth = 32 , num_heads = 16 , class_token = False , global_pool = 'avg' , img_size = 448 )
2082- model = _create_vision_transformer ('vit_huge_patch16_448_ijepa' , pretrained = pretrained , ** dict (model_args , ** kwargs ))
2085+ model_args = dict (
2086+ patch_size = 16 , embed_dim = 1280 , depth = 32 , num_heads = 16 , class_token = False , global_pool = 'avg' , img_size = 448 )
2087+ model = _create_vision_transformer (
2088+ 'vit_huge_patch16_448_ijepa' , pretrained = pretrained , ** dict (model_args , ** kwargs ))
20832089 return model
20842090
2091+
20852092@register_model
20862093def vit_gigantic_patch16_224_ijepa (pretrained = False , ** kwargs ) -> VisionTransformer :
20872094 """ ViT-Gigantic (big-G) model (ViT-G/16) from `I-JEPA - https://arxiv.org/abs/2301.08243
@@ -2091,6 +2098,7 @@ def vit_gigantic_patch16_224_ijepa(pretrained=False, **kwargs) -> VisionTransfor
20912098 'vit_gigantic_patch16_224_ijepa' , pretrained = pretrained , ** dict (model_args , ** kwargs ))
20922099 return model
20932100
2101+
20942102register_model_deprecations (__name__ , {
20952103 'vit_tiny_patch16_224_in21k' : 'vit_tiny_patch16_224.augreg_in21k' ,
20962104 'vit_small_patch32_224_in21k' : 'vit_small_patch32_224.augreg_in21k' ,
0 commit comments