Skip to content

Commit 60b62ca

Browse files
committed
recovery: reduce spam of "Finish bucket recovery step" logs
Before this patch "Finish bucket recovery step ..." logs were printed at the end of recovery even if no buckets were successfully recovered. It led to unnecessary log records. This patch fixes the issue by adding an additional check for the number of recovered buckets. Part of #212 NO_DOC=bugfix
1 parent f72bb21 commit 60b62ca

File tree

2 files changed

+95
-1
lines changed

2 files changed

+95
-1
lines changed

test/storage-luatest/storage_1_1_1_test.lua

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,3 +101,97 @@ test_group.test_manual_bucket_send_doubled_buckets = function(g)
101101
ilt.assert_equals(box.space._bucket:get(bid), nil)
102102
end, {bid})
103103
end
104+
105+
local rebalancer_recovery_group = t.group('rebalancer-recovery-logging')
106+
107+
local function start_bucket_move(src_storage, dest_storage, bucket_id)
108+
src_storage:exec(function(bucket_id, replicaset_id)
109+
ivshard.storage.bucket_send(bucket_id, replicaset_id)
110+
end, {bucket_id, dest_storage:replicaset_uuid()})
111+
112+
dest_storage:exec(function(bucket_id)
113+
t.helpers.retrying({timeout = 10}, function()
114+
t.assert(box.space._bucket:select(bucket_id))
115+
end)
116+
end, {bucket_id})
117+
end
118+
119+
local function wait_for_bucket_is_transferred(src_storage, dest_storage,
120+
bucket_id)
121+
src_storage:exec(function(bucket_id)
122+
t.helpers.retrying({}, function()
123+
t.assert_equals(box.space._bucket:select(bucket_id), {})
124+
end)
125+
end, {bucket_id})
126+
dest_storage:exec(function(bucket_id)
127+
t.helpers.retrying({}, function()
128+
t.assert_equals(box.space._bucket:select(bucket_id)[1].status,
129+
'active')
130+
end)
131+
end, {bucket_id})
132+
end
133+
134+
rebalancer_recovery_group.before_all(function(g)
135+
global_cfg = vtest.config_new(cfg_template)
136+
vtest.cluster_new(g, global_cfg)
137+
g.router = vtest.router_new(g, 'router', global_cfg)
138+
vtest.cluster_bootstrap(g, global_cfg)
139+
vtest.cluster_wait_vclock_all(g)
140+
141+
vtest.cluster_exec_each_master(g, function()
142+
box.schema.create_space('test_space')
143+
box.space.test_space:format({
144+
{name = 'pk', type = 'unsigned'},
145+
{name = 'bucket_id', type = 'unsigned'},
146+
})
147+
box.space.test_space:create_index('primary', {parts = {'pk'}})
148+
box.space.test_space:create_index(
149+
'bucket_id', {parts = {'bucket_id'}, unique = false})
150+
end)
151+
end)
152+
153+
rebalancer_recovery_group.after_all(function(g)
154+
g.cluster:drop()
155+
end)
156+
157+
--
158+
-- Improve logging of rebalancer and recovery (gh-212).
159+
--
160+
rebalancer_recovery_group.test_no_logs_while_unsuccess_recovery = function(g)
161+
g.replica_2_a:exec(function()
162+
ivshard.storage.internal.errinj.ERRINJ_RECEIVE_PARTIALLY = true
163+
rawset(_G, 'old_call', ivshard.storage._call)
164+
ivshard.storage._call = function(service_name, ...)
165+
if service_name == 'recovery_bucket_stat' then
166+
return error('TimedOut')
167+
end
168+
return _G.old_call(service_name, ...)
169+
end
170+
end)
171+
local hanged_bucket_id_1 = vtest.storage_first_bucket(g.replica_1_a)
172+
start_bucket_move(g.replica_1_a, g.replica_2_a, hanged_bucket_id_1)
173+
local hanged_bucket_id_2 = vtest.storage_first_bucket(g.replica_1_a)
174+
start_bucket_move(g.replica_1_a, g.replica_2_a, hanged_bucket_id_2)
175+
t.helpers.retrying({}, function()
176+
g.replica_1_a:exec(function() ivshard.storage.recovery_wakeup() end)
177+
t.assert(g.replica_1_a:grep_log('Error during recovery of bucket 1'))
178+
end)
179+
t.assert_not(g.replica_1_a:grep_log('Finish bucket recovery step, 0'))
180+
g.replica_2_a:exec(function()
181+
ivshard.storage.internal.errinj.ERRINJ_RECEIVE_PARTIALLY = false
182+
ivshard.storage._call = _G.old_call
183+
end)
184+
t.helpers.retrying({timeout = 60}, function()
185+
g.replica_2_a:exec(function()
186+
ivshard.storage.garbage_collector_wakeup()
187+
ivshard.storage.recovery_wakeup()
188+
end)
189+
g.replica_1_a:exec(function() ivshard.storage.recovery_wakeup() end)
190+
t.assert(g.replica_1_a:grep_log('Finish bucket recovery step, 2 ' ..
191+
'sending buckets are recovered among'))
192+
end)
193+
wait_for_bucket_is_transferred(g.replica_2_a, g.replica_1_a,
194+
hanged_bucket_id_1)
195+
wait_for_bucket_is_transferred(g.replica_2_a, g.replica_1_a,
196+
hanged_bucket_id_2)
197+
end

vshard/storage/init.lua

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1003,7 +1003,7 @@ local function recovery_step_by_type(type)
10031003
is_step_empty = false
10041004
::continue::
10051005
end
1006-
if not is_step_empty then
1006+
if recovered > 0 then
10071007
log.info('Finish bucket recovery step, %d %s buckets are recovered '..
10081008
'among %d', recovered, type, total)
10091009
end

0 commit comments

Comments
 (0)