Skip to content

Commit f71c3d1

Browse files
committed
Merge branch 'main' into grad_checkpointing
2 parents cd1542a + 96256aa commit f71c3d1

File tree

4 files changed

+342
-44
lines changed

4 files changed

+342
-44
lines changed

tests/test_optim.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,7 @@ def test_optim_factory(optimizer):
298298
assert isinstance(opt_info, OptimInfo)
299299

300300
lr = (1e-2,) * 4
301-
if optimizer in ('mars', 'nadam', 'claprop', 'crmsproptf', 'cadafactorbv', 'csgdw', 'clamb'):
301+
if optimizer in ('mars', 'nadam', 'claprop', 'crmsproptf', 'cadafactorbv', 'csgdw', 'csgdc', 'clamb'):
302302
lr = (1e-3,) * 4
303303
elif optimizer in ('cmars',):
304304
lr = (1e-4,) * 4
@@ -378,7 +378,7 @@ def test_sgd(optimizer):
378378
_test_model(optimizer, dict(lr=1e-3))
379379

380380

381-
@pytest.mark.parametrize('optimizer', ['adamw', 'adam', 'nadam', 'adamax', 'nadamw'])
381+
@pytest.mark.parametrize('optimizer', ['adamw', 'adam', 'nadam', 'adamax', 'nadamw', 'adamwlegacy', 'adamc'])
382382
def test_adam(optimizer):
383383
_test_rosenbrock(
384384
lambda params: create_optimizer_v2(params, optimizer, lr=5e-2)

timm/layers/attention.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -154,8 +154,8 @@ def __init__(
154154
self.k_proj = nn.Linear(dim, attn_dim, bias=qkv_bias)
155155
self.v_proj = nn.Linear(dim, attn_dim, bias=qkv_bias)
156156

157-
self.q_norm = norm_layer(self.head_dim) if qk_norm else nn.Identity()
158-
self.k_norm = norm_layer(self.head_dim) if qk_norm else nn.Identity()
157+
self.q_norm = norm_layer(head_dim) if qk_norm else nn.Identity()
158+
self.k_norm = norm_layer(head_dim) if qk_norm else nn.Identity()
159159
self.attn_drop = nn.Dropout(attn_drop)
160160
self.norm = norm_layer(attn_dim) if scale_norm else nn.Identity()
161161
self.proj = nn.Linear(attn_dim, dim)

0 commit comments

Comments
 (0)