Skip to content

Commit 2abe583

Browse files
committed
recovery: reduce spam in logs
Before this patch "Finish bucket recovery step ..." logs were printed at the end of recovery even if no buckets were successfully recovered. It led to unnecessary log records. This patch fixes the issue by adding an additional check for the number of recovered buckets. Closes #212 NO_DOC=bugfix
1 parent 3a5ddef commit 2abe583

File tree

2 files changed

+90
-1
lines changed

2 files changed

+90
-1
lines changed

test/storage-luatest/storage_1_1_1_test.lua

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ local cfg_template = {
4141
},
4242
bucket_count = 15,
4343
replication_timeout = 0.1,
44+
rebalancer_mode = 'auto',
4445
}
4546
local global_cfg
4647

@@ -101,3 +102,91 @@ test_group.test_manual_bucket_send_doubled_buckets = function(g)
101102
ilt.assert_equals(box.space._bucket:get(bid), nil)
102103
end, {bid})
103104
end
105+
106+
local rebalancer_recovery_group = t.group('rebalancer-recovery-logging')
107+
108+
local function get_first_storage_bucket_id(storage)
109+
return storage:exec(function()
110+
return box.space._bucket:select()[1].id
111+
end)
112+
end
113+
114+
local function move_bucket(src_storage, dest_storage, bucket_id)
115+
src_storage:exec(function(bucket_id, replicaset_id)
116+
ivshard.storage.bucket_send(bucket_id, replicaset_id)
117+
end, {bucket_id, dest_storage:replicaset_uuid()})
118+
119+
dest_storage:exec(function(bucket_id)
120+
t.helpers.retrying({timeout = 10}, function()
121+
t.assert(box.space._bucket:select(bucket_id))
122+
end)
123+
end, {bucket_id})
124+
end
125+
126+
rebalancer_recovery_group.before_all(function(g)
127+
global_cfg = vtest.config_new(cfg_template)
128+
vtest.cluster_new(g, global_cfg)
129+
g.router = vtest.router_new(g, 'router', global_cfg)
130+
vtest.cluster_bootstrap(g, global_cfg)
131+
vtest.cluster_wait_vclock_all(g)
132+
133+
local first_buckets = vtest.cluster_exec_each_master(g, function()
134+
rawset(_G, 'create_test_space', function()
135+
box.schema.create_space('test_space')
136+
box.space.test_space:format({
137+
{name = 'pk', type = 'unsigned'},
138+
{name = 'bucket_id', type = 'unsigned'},
139+
})
140+
box.space.test_space:create_index('primary', {parts = {'pk'}})
141+
box.space.test_space:create_index(
142+
'bucket_id', {parts = {'bucket_id'}, unique = false})
143+
end)
144+
return _G.get_first_bucket()
145+
end)
146+
for _, bucket_id in pairs(first_buckets) do
147+
g.router:exec(function(storage_bucket)
148+
ivshard.router.call(storage_bucket, 'write', 'create_test_space')
149+
end, {bucket_id})
150+
end
151+
g.buckets_not_active_log = 'Some buckets are not active'
152+
g.buckets_not_active_pattern = "%d+-%d+-%d+ %d+:%d+:%d+.%d+ .* " ..
153+
g.buckets_not_active_log
154+
g.rebalancer_wait_interval = 0.01
155+
end)
156+
157+
rebalancer_recovery_group.after_all(function(g)
158+
g.cluster:drop()
159+
end)
160+
161+
--
162+
-- Improve logging of rebalancer and recovery (gh-212).
163+
--
164+
rebalancer_recovery_group.test_no_logs_while_unsuccess_recovery = function(g)
165+
g.replica_2_a:exec(function()
166+
ivshard.storage.internal.errinj.ERRINJ_RECEIVE_PARTIALLY = true
167+
rawset(_G, 'old_call', ivshard.storage._call)
168+
ivshard.storage._call = function(service_name, ...)
169+
if service_name == 'recovery_bucket_stat' then
170+
return error('TimedOut')
171+
end
172+
return _G.old_call(service_name, ...)
173+
end
174+
end)
175+
local hanged_bucket_id = get_first_storage_bucket_id(g.replica_1_a)
176+
move_bucket(g.replica_1_a, g.replica_2_a, hanged_bucket_id)
177+
t.helpers.retrying({}, function()
178+
t.assert(g.replica_1_a:grep_log('Error during recovery of bucket 1'))
179+
end)
180+
t.assert_not(g.replica_1_a:grep_log('Finish bucket recovery step, 0'))
181+
g.replica_2_a:exec(function()
182+
ivshard.storage.internal.errinj.ERRINJ_RECEIVE_PARTIALLY = false
183+
ivshard.storage._call = _G.old_call
184+
ivshard.storage.recovery_wakeup()
185+
end)
186+
g.replica_1_a:exec(function(bucket_id)
187+
t.helpers.retrying({}, function()
188+
t.assert_equals(box.space._bucket:select(bucket_id)[1].status,
189+
'active')
190+
end)
191+
end, {hanged_bucket_id})
192+
end

vshard/storage/init.lua

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1003,7 +1003,7 @@ local function recovery_step_by_type(type)
10031003
is_step_empty = false
10041004
::continue::
10051005
end
1006-
if not is_step_empty then
1006+
if not is_step_empty and recovered > 0 then
10071007
log.info('Finish bucket recovery step, %d %s buckets are recovered '..
10081008
'among %d', recovered, type, total)
10091009
end

0 commit comments

Comments
 (0)