11import numpy as np
2+ import tensorflow as tf
23
34from keras .src import backend
45from keras .src import ops
1011
1112class MuonTest (testing .TestCase ):
1213 def test_config (self ):
13- optimizer = Muon (
14- learning_rate = 0.5 ,
15- epsilon = 1e-5 ,
16- )
14+ optimizer = Muon (learning_rate = 0.5 , epsilon = 1e-5 )
1715 self .run_class_serialization_test (optimizer )
1816
1917 def test_Newton_Schulz (self ):
2018 optimizer = Muon ()
2119 tensor_input = ops .array ([[0.2499 , 0.9105 ], [0.2655 , 0.8824 ]])
22- except_output = ops .array ([[- 0.4422 , 0.6457 ], [0.7285 , 0.2968 ]])
20+ expected_output = ops .array ([[- 0.4422 , 0.6457 ], [0.7285 , 0.2968 ]])
2321 output = optimizer .zeropower_via_newtonschulz5 (tensor_input , 5 )
24- self .assertAllClose (output , except_output , rtol = 1e-3 , atol = 1e-3 )
22+ self .assertAllClose (output , expected_output , rtol = 1e-3 , atol = 1e-3 )
2523
2624 def test_adamw_single_step (self ):
2725 optimizer = Muon ()
2826 grads = ops .array ([1.0 , 6.0 , 7.0 , 2.0 ])
29- vars = backend .Variable ([1.0 , 2.0 , 3.0 , 4.0 ], name = "test_vars" )
30- optimizer .build ([vars ])
31- optimizer ._adamw_update_step (grads , vars , 0.5 )
32- self .assertAllClose (vars , [0.5 , 1.5 , 2.5 , 3.5 ], rtol = 1e-4 , atol = 1e-4 )
27+ var = backend .Variable ([1.0 , 2.0 , 3.0 , 4.0 ], name = "test_vars" )
28+ optimizer .build ([var ])
29+ optimizer ._adamw_update_step (grads , var , 0.5 )
30+ self .assertAllClose (var , [0.5 , 1.5 , 2.5 , 3.5 ], rtol = 1e-4 , atol = 1e-4 )
3331
34- def test_should_use_adamw (self ):
35- vars = backend .Variable ([[1.0 , 2.0 ], [3.0 , 4.0 ]])
36- optimizer = Muon (exclude_layers = ["var" ])
37- self .assertAllClose (
38- True ,
39- optimizer ._should_use_adamw (vars ),
32+ def test_should_use_adamw_excluded_layer (self ):
33+ """Ensure exclude_layers keyword works and no .path is accessed."""
34+ optimizer = Muon (exclude_layers = ["dense" ])
35+ dummy_var = backend .Variable (
36+ [[1.0 , 2.0 ], [3.0 , 4.0 ]], name = "dense_kernel_0"
4037 )
41- embeding = Embedding (2 , 2 )
42- embeding .build ()
43- self .assertAllClose (
44- True ,
45- optimizer ._should_use_adamw (embeding .weights [0 ]),
46- )
47- vars = backend .Variable ([[1.0 , 2.0 ], [3.0 , 4.0 ]])
38+ result = optimizer ._should_use_adamw (dummy_var )
39+ self .assertTrue (result )
40+
41+ def test_should_use_adamw_embedding (self ):
42+ """Embedding layer should use AdamW when exclude_embeddings=True."""
43+ embedding = Embedding (2 , 2 )
44+ embedding .build ()
45+ optimizer = Muon (exclude_embeddings = True )
46+ result = optimizer ._should_use_adamw (embedding .weights [0 ])
47+ self .assertTrue (result )
48+
49+ def test_should_use_adamw_dimension_rule (self ):
50+ """Variables with dimensions not between 2–4 use AdamW."""
51+ v_1d = backend .Variable ([1.0 , 2.0 ], name = "v1d" )
52+ v_5d = backend .Variable (np .zeros ((2 , 2 , 2 , 2 , 2 )), name = "v5d" )
4853 optimizer = Muon ()
49- self .assertAllClose (
50- False ,
51- optimizer ._should_use_adamw (vars ),
52- )
54+ self .assertTrue (optimizer ._should_use_adamw (v_1d ))
55+ self .assertTrue (optimizer ._should_use_adamw (v_5d ))
56+
57+ def test_should_use_adamw_dense_layer (self ):
58+ """2D dense layer weights should use Muon (False)."""
5359 dense = Dense (2 )
5460 dense .build ([None , 2 ])
55- self .assertAllClose (
56- False ,
57- optimizer ._should_use_adamw (dense .weights [0 ]),
58- )
61+ optimizer = Muon ()
62+ result = optimizer ._should_use_adamw (dense .weights [0 ])
63+ self .assertFalse (result )
5964
6065 def test_muon_single_step (self ):
61- optimizer = Muon (
62- learning_rate = 0.5 ,
63- weight_decay = 0 ,
64- )
66+ optimizer = Muon (learning_rate = 0.5 , weight_decay = 0 )
6567 grads = ops .array ([[1.0 , 6.0 ], [7.0 , 2.0 ]])
6668 vars = backend .Variable ([[1.0 , 2.0 ], [3.0 , 4.0 ]])
6769 optimizer .build ([vars ])
@@ -81,3 +83,15 @@ def test_clip_value(self):
8183 grad = [np .array ([100.0 , 100.0 ])]
8284 clipped_grad = optimizer ._clip_gradients (grad )
8385 self .assertAllClose (clipped_grad [0 ], [1.0 , 1.0 ])
86+
87+ def test_no_path_attribute_error (self ):
88+ """Ensure compatibility with TF 2.16+ where
89+ ResourceVariable has no .path."""
90+ optimizer = Muon ()
91+ var = tf .Variable ([1.0 , 2.0 ], name = "test_var" )
92+ # Force-run method that caused AttributeError in issue #21793
93+ try :
94+ result = optimizer ._should_use_adamw (var )
95+ self .assertIn (result , [True , False ])
96+ except AttributeError as e :
97+ self .fail (f"Unexpected AttributeError: { e } " )
0 commit comments