@@ -1260,6 +1260,25 @@ def _pe_cfg(url: str = '', **kwargs) -> Dict[str, Any]:
12601260 }
12611261
12621262
1263+ def _dinov3_cfg (url : str = '' , ** kwargs ) -> Dict [str , Any ]:
1264+ """Generate default configuration for DINOv3 models.
1265+
1266+ Args:
1267+ url: Model weights URL.
1268+ **kwargs: Additional configuration parameters.
1269+
1270+ Returns:
1271+ Model configuration dictionary.
1272+ """
1273+ return {
1274+ 'url' : url ,
1275+ 'num_classes' : 0 , 'input_size' : (3 , 256 , 256 ), 'pool_size' : None ,
1276+ 'crop_pct' : 1.0 , 'interpolation' : 'bicubic' , 'min_input_size' : (3 , 128 , 128 ),
1277+ 'mean' : IMAGENET_DEFAULT_MEAN , 'std' : IMAGENET_DEFAULT_STD ,
1278+ 'first_conv' : 'patch_embed.proj' , 'classifier' : 'head' ,
1279+ 'license' : 'dinov3' , ** kwargs
1280+ }
1281+
12631282default_cfgs = generate_default_cfgs ({
12641283
12651284 # EVA 01 CLIP fine-tuned on imagenet-1k
@@ -1614,89 +1633,43 @@ def _pe_cfg(url: str = '', **kwargs) -> Dict[str, Any]:
16141633
16151634 # DINOv3 weights are under a specific license with redistribution terms, please see
16161635 # https://github.com/facebookresearch/dinov3/blob/main/LICENSE.md
1617- 'vit_small_patch16_dinov3_224 .lvdm_1689m' : _cfg (
1636+ 'vit_small_patch16_dinov3 .lvdm_1689m' : _dinov3_cfg (
16181637 # hf_hub_id='timm/',
1619- mean = IMAGENET_DEFAULT_MEAN , std = IMAGENET_DEFAULT_STD ,
1620- crop_pct = 1.0 ,
1621- num_classes = 0 ,
1622- license = 'dinov3' ,
16231638 ),
1624- 'vit_small_patch16_dinov3_qkvb_224 .lvdm_1689m' : _cfg (
1639+ 'vit_small_patch16_dinov3_qkvb .lvdm_1689m' : _dinov3_cfg (
16251640 # hf_hub_id='timm/',
1626- mean = IMAGENET_DEFAULT_MEAN , std = IMAGENET_DEFAULT_STD ,
1627- crop_pct = 1.0 ,
1628- num_classes = 0 ,
1629- license = 'dinov3' ,
16301641 ),
1631- 'vit_small_plus_patch16_dinov3_224 .lvdm_1689m' : _cfg (
1642+ 'vit_small_plus_patch16_dinov3 .lvdm_1689m' : _dinov3_cfg (
16321643 # hf_hub_id='timm/',
1633- mean = IMAGENET_DEFAULT_MEAN , std = IMAGENET_DEFAULT_STD ,
1634- crop_pct = 1.0 ,
1635- num_classes = 0 ,
1636- license = 'dinov3' ,
16371644 ),
1638- 'vit_small_plus_patch16_dinov3_qkvb_224 .lvdm_1689m' : _cfg (
1645+ 'vit_small_plus_patch16_dinov3_qkvb .lvdm_1689m' : _dinov3_cfg (
16391646 # hf_hub_id='timm/',
1640- mean = IMAGENET_DEFAULT_MEAN , std = IMAGENET_DEFAULT_STD ,
1641- crop_pct = 1.0 ,
1642- num_classes = 0 ,
1643- license = 'dinov3' ,
16441647 ),
1645- 'vit_base_patch16_dinov3_224 .lvdm_1689m' : _cfg (
1648+ 'vit_base_patch16_dinov3 .lvdm_1689m' : _dinov3_cfg (
16461649 #hf_hub_id='timm/',
1647- mean = IMAGENET_DEFAULT_MEAN , std = IMAGENET_DEFAULT_STD ,
1648- crop_pct = 1.0 ,
1649- num_classes = 0 ,
1650- license = 'dinov3' ,
16511650 ),
1652- 'vit_base_patch16_dinov3_qkvb_224 .lvdm_1689m' : _cfg (
1651+ 'vit_base_patch16_dinov3_qkvb .lvdm_1689m' : _dinov3_cfg (
16531652 #hf_hub_id='timm/',
1654- mean = IMAGENET_DEFAULT_MEAN , std = IMAGENET_DEFAULT_STD ,
1655- crop_pct = 1.0 ,
1656- num_classes = 0 ,
1657- license = 'dinov3' ,
16581653 ),
1659- 'vit_large_patch16_dinov3_224 .lvdm_1689m' : _cfg (
1654+ 'vit_large_patch16_dinov3 .lvdm_1689m' : _dinov3_cfg (
16601655 # hf_hub_id='timm/',
1661- mean = IMAGENET_DEFAULT_MEAN , std = IMAGENET_DEFAULT_STD ,
1662- crop_pct = 1.0 ,
1663- num_classes = 0 ,
1664- license = 'dinov3' ,
16651656 ),
1666- 'vit_large_patch16_dinov3_qkvb_224 .lvdm_1689m' : _cfg (
1657+ 'vit_large_patch16_dinov3_qkvb .lvdm_1689m' : _dinov3_cfg (
16671658 # hf_hub_id='timm/',
1668- mean = IMAGENET_DEFAULT_MEAN , std = IMAGENET_DEFAULT_STD ,
1669- crop_pct = 1.0 ,
1670- num_classes = 0 ,
1671- license = 'dinov3' ,
16721659 ),
1673- 'vit_large_patch16_dinov3_224 .sat_493m' : _cfg (
1660+ 'vit_large_patch16_dinov3 .sat_493m' : _dinov3_cfg (
16741661 # hf_hub_id='timm/',
16751662 mean = (0.430 , 0.411 , 0.296 ), std = (0.213 , 0.156 , 0.143 ),
1676- crop_pct = 1.0 ,
1677- num_classes = 0 ,
1678- license = 'dinov3' ,
16791663 ),
1680- 'vit_huge_plus_patch16_dinov3_224 .lvdm_1689m' : _cfg (
1664+ 'vit_huge_plus_patch16_dinov3 .lvdm_1689m' : _dinov3_cfg (
16811665 # hf_hub_id='timm/',
1682- mean = IMAGENET_DEFAULT_MEAN , std = IMAGENET_DEFAULT_STD ,
1683- crop_pct = 1.0 ,
1684- num_classes = 0 ,
1685- license = 'dinov3' ,
16861666 ),
1687- 'vit_7b_patch16_dinov3_224 .lvdm_1689m' : _cfg (
1667+ 'vit_7b_patch16_dinov3 .lvdm_1689m' : _dinov3_cfg (
16881668 # hf_hub_id='timm/',
1689- mean = IMAGENET_DEFAULT_MEAN , std = IMAGENET_DEFAULT_STD ,
1690- crop_pct = 1.0 ,
1691- num_classes = 0 ,
1692- license = 'dinov3' ,
16931669 ),
1694- 'vit_7b_patch16_dinov3_224 .sat_493m' : _cfg (
1670+ 'vit_7b_patch16_dinov3 .sat_493m' : _dinov3_cfg (
16951671 # hf_hub_id='timm/',
16961672 mean = (0.430 , 0.411 , 0.296 ), std = (0.213 , 0.156 , 0.143 ),
1697- crop_pct = 1.0 ,
1698- num_classes = 0 ,
1699- license = 'dinov3' ,
17001673 ),
17011674
17021675})
@@ -2640,9 +2613,10 @@ def vit_large_patch16_rope_mixed_ape_224(pretrained: bool = False, **kwargs) ->
26402613
26412614
26422615@register_model
2643- def vit_small_patch16_dinov3_224 (pretrained : bool = False , ** kwargs ) -> Eva :
2616+ def vit_small_patch16_dinov3 (pretrained : bool = False , ** kwargs ) -> Eva :
26442617 model_args = dict (
26452618 patch_size = 16 ,
2619+ dynamic_img_size = True ,
26462620 embed_dim = 384 ,
26472621 depth = 12 ,
26482622 num_heads = 6 ,
@@ -2658,14 +2632,15 @@ def vit_small_patch16_dinov3_224(pretrained: bool = False, **kwargs) -> Eva:
26582632 use_fc_norm = False ,
26592633 norm_layer = partial (LayerNorm , eps = 1e-5 ),
26602634 )
2661- model = _create_eva ('vit_small_patch16_dinov3_224 ' , pretrained = pretrained , ** dict (model_args , ** kwargs ))
2635+ model = _create_eva ('vit_small_patch16_dinov3 ' , pretrained = pretrained , ** dict (model_args , ** kwargs ))
26622636 return model
26632637
26642638
26652639@register_model
2666- def vit_small_patch16_dinov3_qkvb_224 (pretrained : bool = False , ** kwargs ) -> Eva :
2640+ def vit_small_patch16_dinov3_qkvb (pretrained : bool = False , ** kwargs ) -> Eva :
26672641 model_args = dict (
26682642 patch_size = 16 ,
2643+ dynamic_img_size = True ,
26692644 embed_dim = 384 ,
26702645 depth = 12 ,
26712646 num_heads = 6 ,
@@ -2681,14 +2656,15 @@ def vit_small_patch16_dinov3_qkvb_224(pretrained: bool = False, **kwargs) -> Eva
26812656 use_fc_norm = False ,
26822657 norm_layer = partial (LayerNorm , eps = 1e-5 ),
26832658 )
2684- model = _create_eva ('vit_small_patch16_dinov3_qkvb_224 ' , pretrained = pretrained , ** dict (model_args , ** kwargs ))
2659+ model = _create_eva ('vit_small_patch16_dinov3_qkvb ' , pretrained = pretrained , ** dict (model_args , ** kwargs ))
26852660 return model
26862661
26872662
26882663@register_model
2689- def vit_small_plus_patch16_dinov3_224 (pretrained : bool = False , ** kwargs ) -> Eva :
2664+ def vit_small_plus_patch16_dinov3 (pretrained : bool = False , ** kwargs ) -> Eva :
26902665 model_args = dict (
26912666 patch_size = 16 ,
2667+ dynamic_img_size = True ,
26922668 embed_dim = 384 ,
26932669 depth = 12 ,
26942670 num_heads = 6 ,
@@ -2706,14 +2682,15 @@ def vit_small_plus_patch16_dinov3_224(pretrained: bool = False, **kwargs) -> Eva
27062682 use_fc_norm = False ,
27072683 norm_layer = partial (LayerNorm , eps = 1e-5 ),
27082684 )
2709- model = _create_eva ('vit_small_plus_patch16_dinov3_224 ' , pretrained = pretrained , ** dict (model_args , ** kwargs ))
2685+ model = _create_eva ('vit_small_plus_patch16_dinov3 ' , pretrained = pretrained , ** dict (model_args , ** kwargs ))
27102686 return model
27112687
27122688
27132689@register_model
2714- def vit_small_plus_patch16_dinov3_qkvb_224 (pretrained : bool = False , ** kwargs ) -> Eva :
2690+ def vit_small_plus_patch16_dinov3_qkvb (pretrained : bool = False , ** kwargs ) -> Eva :
27152691 model_args = dict (
27162692 patch_size = 16 ,
2693+ dynamic_img_size = True ,
27172694 embed_dim = 384 ,
27182695 depth = 12 ,
27192696 num_heads = 6 ,
@@ -2731,14 +2708,15 @@ def vit_small_plus_patch16_dinov3_qkvb_224(pretrained: bool = False, **kwargs) -
27312708 use_fc_norm = False ,
27322709 norm_layer = partial (LayerNorm , eps = 1e-5 ),
27332710 )
2734- model = _create_eva ('vit_small_plus_patch16_dinov3_qkvb_224 ' , pretrained = pretrained , ** dict (model_args , ** kwargs ))
2711+ model = _create_eva ('vit_small_plus_patch16_dinov3_qkvb ' , pretrained = pretrained , ** dict (model_args , ** kwargs ))
27352712 return model
27362713
27372714
27382715@register_model
2739- def vit_base_patch16_dinov3_224 (pretrained : bool = False , ** kwargs ) -> Eva :
2716+ def vit_base_patch16_dinov3 (pretrained : bool = False , ** kwargs ) -> Eva :
27402717 model_args = dict (
27412718 patch_size = 16 ,
2719+ dynamic_img_size = True ,
27422720 embed_dim = 768 ,
27432721 depth = 12 ,
27442722 num_heads = 12 ,
@@ -2754,15 +2732,16 @@ def vit_base_patch16_dinov3_224(pretrained: bool = False, **kwargs) -> Eva:
27542732 use_fc_norm = False ,
27552733 norm_layer = partial (LayerNorm , eps = 1e-5 ),
27562734 )
2757- model = _create_eva ('vit_base_patch16_dinov3_224 ' , pretrained = pretrained , ** dict (model_args , ** kwargs ))
2735+ model = _create_eva ('vit_base_patch16_dinov3 ' , pretrained = pretrained , ** dict (model_args , ** kwargs ))
27582736 return model
27592737
27602738
27612739@register_model
2762- def vit_base_patch16_dinov3_qkvb_224 (pretrained : bool = False , ** kwargs ) -> Eva :
2740+ def vit_base_patch16_dinov3_qkvb (pretrained : bool = False , ** kwargs ) -> Eva :
27632741 # DINOv3 Base variant w/ qkv_bias enabled (zero'd in weights)
27642742 model_args = dict (
27652743 patch_size = 16 ,
2744+ dynamic_img_size = True ,
27662745 embed_dim = 768 ,
27672746 depth = 12 ,
27682747 num_heads = 12 ,
@@ -2778,14 +2757,15 @@ def vit_base_patch16_dinov3_qkvb_224(pretrained: bool = False, **kwargs) -> Eva:
27782757 use_fc_norm = False ,
27792758 norm_layer = partial (LayerNorm , eps = 1e-5 ),
27802759 )
2781- model = _create_eva ('vit_base_patch16_dinov3_qkvb_224 ' , pretrained = pretrained , ** dict (model_args , ** kwargs ))
2760+ model = _create_eva ('vit_base_patch16_dinov3_qkvb ' , pretrained = pretrained , ** dict (model_args , ** kwargs ))
27822761 return model
27832762
27842763
27852764@register_model
2786- def vit_large_patch16_dinov3_224 (pretrained : bool = False , ** kwargs ) -> Eva :
2765+ def vit_large_patch16_dinov3 (pretrained : bool = False , ** kwargs ) -> Eva :
27872766 model_args = dict (
27882767 patch_size = 16 ,
2768+ dynamic_img_size = True ,
27892769 embed_dim = 1024 ,
27902770 depth = 24 ,
27912771 num_heads = 16 ,
@@ -2801,14 +2781,15 @@ def vit_large_patch16_dinov3_224(pretrained: bool = False, **kwargs) -> Eva:
28012781 use_fc_norm = False ,
28022782 norm_layer = partial (LayerNorm , eps = 1e-5 ),
28032783 )
2804- model = _create_eva ('vit_large_patch16_dinov3_224 ' , pretrained = pretrained , ** dict (model_args , ** kwargs ))
2784+ model = _create_eva ('vit_large_patch16_dinov3 ' , pretrained = pretrained , ** dict (model_args , ** kwargs ))
28052785 return model
28062786
28072787
28082788@register_model
2809- def vit_large_patch16_dinov3_qkvb_224 (pretrained : bool = False , ** kwargs ) -> Eva :
2789+ def vit_large_patch16_dinov3_qkvb (pretrained : bool = False , ** kwargs ) -> Eva :
28102790 model_args = dict (
28112791 patch_size = 16 ,
2792+ dynamic_img_size = True ,
28122793 embed_dim = 768 ,
28132794 depth = 24 ,
28142795 num_heads = 16 ,
@@ -2824,14 +2805,15 @@ def vit_large_patch16_dinov3_qkvb_224(pretrained: bool = False, **kwargs) -> Eva
28242805 use_fc_norm = False ,
28252806 norm_layer = partial (LayerNorm , eps = 1e-5 ),
28262807 )
2827- model = _create_eva ('vit_large_patch16_dinov3_qkvb_224 ' , pretrained = pretrained , ** dict (model_args , ** kwargs ))
2808+ model = _create_eva ('vit_large_patch16_dinov3_qkvb ' , pretrained = pretrained , ** dict (model_args , ** kwargs ))
28282809 return model
28292810
28302811
28312812@register_model
2832- def vit_huge_plus_patch16_dinov3_224 (pretrained : bool = False , ** kwargs ) -> Eva :
2813+ def vit_huge_plus_patch16_dinov3 (pretrained : bool = False , ** kwargs ) -> Eva :
28332814 model_args = dict (
28342815 patch_size = 16 ,
2816+ dynamic_img_size = True ,
28352817 embed_dim = 1280 ,
28362818 depth = 32 ,
28372819 num_heads = 20 ,
@@ -2850,14 +2832,15 @@ def vit_huge_plus_patch16_dinov3_224(pretrained: bool = False, **kwargs) -> Eva:
28502832 norm_layer = partial (LayerNorm , eps = 1e-5 ),
28512833 )
28522834
2853- model = _create_eva ('vit_huge_plus_patch16_dinov3_224 ' , pretrained = pretrained , ** dict (model_args , ** kwargs ))
2835+ model = _create_eva ('vit_huge_plus_patch16_dinov3 ' , pretrained = pretrained , ** dict (model_args , ** kwargs ))
28542836 return model
28552837
28562838
28572839@register_model
2858- def vit_7b_patch16_dinov3_224 (pretrained : bool = False , ** kwargs ) -> Eva :
2840+ def vit_7b_patch16_dinov3 (pretrained : bool = False , ** kwargs ) -> Eva :
28592841 model_args = dict (
28602842 patch_size = 16 ,
2843+ dynamic_img_size = True ,
28612844 embed_dim = 4096 ,
28622845 depth = 40 ,
28632846 num_heads = 32 ,
@@ -2877,5 +2860,5 @@ def vit_7b_patch16_dinov3_224(pretrained: bool = False, **kwargs) -> Eva:
28772860 norm_layer = partial (LayerNorm , eps = 1e-5 ),
28782861 )
28792862
2880- model = _create_eva ('vit_7b_patch16_dinov3_224 ' , pretrained = pretrained , ** dict (model_args , ** kwargs ))
2863+ model = _create_eva ('vit_7b_patch16_dinov3 ' , pretrained = pretrained , ** dict (model_args , ** kwargs ))
28812864 return model
0 commit comments