@@ -41,6 +41,7 @@ local cfg_template = {
4141 },
4242 bucket_count = 15 ,
4343 replication_timeout = 0.1 ,
44+ rebalancer_mode = ' auto' ,
4445}
4546local global_cfg
4647
@@ -101,3 +102,91 @@ test_group.test_manual_bucket_send_doubled_buckets = function(g)
101102 ilt .assert_equals (box .space ._bucket :get (bid ), nil )
102103 end , {bid })
103104end
105+
106+ local rebalancer_recovery_group = t .group (' rebalancer-recovery-logging' )
107+
108+ local function get_first_storage_bucket_id (storage )
109+ return storage :exec (function ()
110+ return box .space ._bucket :select ()[1 ].id
111+ end )
112+ end
113+
114+ local function move_bucket (src_storage , dest_storage , bucket_id )
115+ src_storage :exec (function (bucket_id , replicaset_id )
116+ ivshard .storage .bucket_send (bucket_id , replicaset_id )
117+ end , {bucket_id , dest_storage :replicaset_uuid ()})
118+
119+ dest_storage :exec (function (bucket_id )
120+ t .helpers .retrying ({timeout = 10 }, function ()
121+ t .assert (box .space ._bucket :select (bucket_id ))
122+ end )
123+ end , {bucket_id })
124+ end
125+
126+ rebalancer_recovery_group .before_all (function (g )
127+ global_cfg = vtest .config_new (cfg_template )
128+ vtest .cluster_new (g , global_cfg )
129+ g .router = vtest .router_new (g , ' router' , global_cfg )
130+ vtest .cluster_bootstrap (g , global_cfg )
131+ vtest .cluster_wait_vclock_all (g )
132+
133+ local first_buckets = vtest .cluster_exec_each_master (g , function ()
134+ rawset (_G , ' create_test_space' , function ()
135+ box .schema .create_space (' test_space' )
136+ box .space .test_space :format ({
137+ {name = ' pk' , type = ' unsigned' },
138+ {name = ' bucket_id' , type = ' unsigned' },
139+ })
140+ box .space .test_space :create_index (' primary' , {parts = {' pk' }})
141+ box .space .test_space :create_index (
142+ ' bucket_id' , {parts = {' bucket_id' }, unique = false })
143+ end )
144+ return _G .get_first_bucket ()
145+ end )
146+ for _ , bucket_id in pairs (first_buckets ) do
147+ g .router :exec (function (storage_bucket )
148+ ivshard .router .call (storage_bucket , ' write' , ' create_test_space' )
149+ end , {bucket_id })
150+ end
151+ g .buckets_not_active_log = ' Some buckets are not active'
152+ g .buckets_not_active_pattern = " %d+-%d+-%d+ %d+:%d+:%d+.%d+ .* " ..
153+ g .buckets_not_active_log
154+ g .rebalancer_wait_interval = 0.01
155+ end )
156+
157+ rebalancer_recovery_group .after_all (function (g )
158+ g .cluster :drop ()
159+ end )
160+
161+ --
162+ -- Improve logging of rebalancer and recovery (gh-212).
163+ --
164+ rebalancer_recovery_group .test_no_logs_while_unsuccess_recovery = function (g )
165+ g .replica_2_a :exec (function ()
166+ ivshard .storage .internal .errinj .ERRINJ_RECEIVE_PARTIALLY = true
167+ rawset (_G , ' old_call' , ivshard .storage ._call )
168+ ivshard .storage ._call = function (service_name , ...)
169+ if service_name == ' recovery_bucket_stat' then
170+ return error (' TimedOut' )
171+ end
172+ return _G .old_call (service_name , ... )
173+ end
174+ end )
175+ local hanged_bucket_id = get_first_storage_bucket_id (g .replica_1_a )
176+ move_bucket (g .replica_1_a , g .replica_2_a , hanged_bucket_id )
177+ t .helpers .retrying ({}, function ()
178+ t .assert (g .replica_1_a :grep_log (' Error during recovery of bucket 1' ))
179+ end )
180+ t .assert_not (g .replica_1_a :grep_log (' Finish bucket recovery step, 0' ))
181+ g .replica_2_a :exec (function ()
182+ ivshard .storage .internal .errinj .ERRINJ_RECEIVE_PARTIALLY = false
183+ ivshard .storage ._call = _G .old_call
184+ ivshard .storage .recovery_wakeup ()
185+ end )
186+ g .replica_1_a :exec (function (bucket_id )
187+ t .helpers .retrying ({}, function ()
188+ t .assert_equals (box .space ._bucket :select (bucket_id )[1 ].status ,
189+ ' active' )
190+ end )
191+ end , {hanged_bucket_id })
192+ end
0 commit comments