Skip to content

Commit 6f15a1c

Browse files
committed
dm thin: Fix ABBA deadlock between shrink_slab and dm_pool_abort_metadata
Bugzilla: https://bugzilla.redhat.com/2162536 Upstream Status: kernel/git/torvalds/linux.git commit 8111964 Author: Zhihao Cheng <chengzhihao1@huawei.com> Date: Wed Nov 30 21:31:34 2022 +0800 dm thin: Fix ABBA deadlock between shrink_slab and dm_pool_abort_metadata Following concurrent processes: P1(drop cache) P2(kworker) drop_caches_sysctl_handler drop_slab shrink_slab down_read(&shrinker_rwsem) - LOCK A do_shrink_slab super_cache_scan prune_icache_sb dispose_list evict ext4_evict_inode ext4_clear_inode ext4_discard_preallocations ext4_mb_load_buddy_gfp ext4_mb_init_cache ext4_read_block_bitmap_nowait ext4_read_bh_nowait submit_bh dm_submit_bio do_worker process_deferred_bios commit metadata_operation_failed dm_pool_abort_metadata down_write(&pmd->root_lock) - LOCK B __destroy_persistent_data_objects dm_block_manager_destroy dm_bufio_client_destroy unregister_shrinker down_write(&shrinker_rwsem) thin_map | dm_thin_find_block ↓ down_read(&pmd->root_lock) --> ABBA deadlock , which triggers hung task: [ 76.974820] INFO: task kworker/u4:3:63 blocked for more than 15 seconds. [ 76.976019] Not tainted 6.1.0-rc4-00011-g8f17dd350364-dirty #910 [ 76.978521] task:kworker/u4:3 state:D stack:0 pid:63 ppid:2 [ 76.978534] Workqueue: dm-thin do_worker [ 76.978552] Call Trace: [ 76.978564] __schedule+0x6ba/0x10f0 [ 76.978582] schedule+0x9d/0x1e0 [ 76.978588] rwsem_down_write_slowpath+0x587/0xdf0 [ 76.978600] down_write+0xec/0x110 [ 76.978607] unregister_shrinker+0x2c/0xf0 [ 76.978616] dm_bufio_client_destroy+0x116/0x3d0 [ 76.978625] dm_block_manager_destroy+0x19/0x40 [ 76.978629] __destroy_persistent_data_objects+0x5e/0x70 [ 76.978636] dm_pool_abort_metadata+0x8e/0x100 [ 76.978643] metadata_operation_failed+0x86/0x110 [ 76.978649] commit+0x6a/0x230 [ 76.978655] do_worker+0xc6e/0xd90 [ 76.978702] process_one_work+0x269/0x630 [ 76.978714] worker_thread+0x266/0x630 [ 76.978730] kthread+0x151/0x1b0 [ 76.978772] INFO: task test.sh:2646 blocked for more than 15 seconds. [ 76.979756] Not tainted 6.1.0-rc4-00011-g8f17dd350364-dirty #910 [ 76.982111] task:test.sh state:D stack:0 pid:2646 ppid:2459 [ 76.982128] Call Trace: [ 76.982139] __schedule+0x6ba/0x10f0 [ 76.982155] schedule+0x9d/0x1e0 [ 76.982159] rwsem_down_read_slowpath+0x4f4/0x910 [ 76.982173] down_read+0x84/0x170 [ 76.982177] dm_thin_find_block+0x4c/0xd0 [ 76.982183] thin_map+0x201/0x3d0 [ 76.982188] __map_bio+0x5b/0x350 [ 76.982195] dm_submit_bio+0x2b6/0x930 [ 76.982202] __submit_bio+0x123/0x2d0 [ 76.982209] submit_bio_noacct_nocheck+0x101/0x3e0 [ 76.982222] submit_bio_noacct+0x389/0x770 [ 76.982227] submit_bio+0x50/0xc0 [ 76.982232] submit_bh_wbc+0x15e/0x230 [ 76.982238] submit_bh+0x14/0x20 [ 76.982241] ext4_read_bh_nowait+0xc5/0x130 [ 76.982247] ext4_read_block_bitmap_nowait+0x340/0xc60 [ 76.982254] ext4_mb_init_cache+0x1ce/0xdc0 [ 76.982259] ext4_mb_load_buddy_gfp+0x987/0xfa0 [ 76.982263] ext4_discard_preallocations+0x45d/0x830 [ 76.982274] ext4_clear_inode+0x48/0xf0 [ 76.982280] ext4_evict_inode+0xcf/0xc70 [ 76.982285] evict+0x119/0x2b0 [ 76.982290] dispose_list+0x43/0xa0 [ 76.982294] prune_icache_sb+0x64/0x90 [ 76.982298] super_cache_scan+0x155/0x210 [ 76.982303] do_shrink_slab+0x19e/0x4e0 [ 76.982310] shrink_slab+0x2bd/0x450 [ 76.982317] drop_slab+0xcc/0x1a0 [ 76.982323] drop_caches_sysctl_handler+0xb7/0xe0 [ 76.982327] proc_sys_call_handler+0x1bc/0x300 [ 76.982331] proc_sys_write+0x17/0x20 [ 76.982334] vfs_write+0x3d3/0x570 [ 76.982342] ksys_write+0x73/0x160 [ 76.982347] __x64_sys_write+0x1e/0x30 [ 76.982352] do_syscall_64+0x35/0x80 [ 76.982357] entry_SYSCALL_64_after_hwframe+0x63/0xcd Function metadata_operation_failed() is called when operations failed on dm pool metadata, dm pool will destroy and recreate metadata. So, shrinker will be unregistered and registered, which could down write shrinker_rwsem under pmd_write_lock. Fix it by allocating dm_block_manager before locking pmd->root_lock and destroying old dm_block_manager after unlocking pmd->root_lock, then old dm_block_manager is replaced with new dm_block_manager under pmd->root_lock. So, shrinker register/unregister could be done without holding pmd->root_lock. Fetch a reproducer in [Link]. Link: https://bugzilla.kernel.org/show_bug.cgi?id=216676 Cc: stable@vger.kernel.org #v5.2+ Fixes: e49e582 ("dm thin: add read only and fail io modes") Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com> Signed-off-by: Mike Snitzer <snitzer@kernel.org> Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
1 parent f72ac1d commit 6f15a1c

File tree

1 file changed

+43
-8
lines changed

1 file changed

+43
-8
lines changed

drivers/md/dm-thin-metadata.c

Lines changed: 43 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -776,13 +776,15 @@ static int __create_persistent_data_objects(struct dm_pool_metadata *pmd, bool f
776776
return r;
777777
}
778778

779-
static void __destroy_persistent_data_objects(struct dm_pool_metadata *pmd)
779+
static void __destroy_persistent_data_objects(struct dm_pool_metadata *pmd,
780+
bool destroy_bm)
780781
{
781782
dm_sm_destroy(pmd->data_sm);
782783
dm_sm_destroy(pmd->metadata_sm);
783784
dm_tm_destroy(pmd->nb_tm);
784785
dm_tm_destroy(pmd->tm);
785-
dm_block_manager_destroy(pmd->bm);
786+
if (destroy_bm)
787+
dm_block_manager_destroy(pmd->bm);
786788
}
787789

788790
static int __begin_transaction(struct dm_pool_metadata *pmd)
@@ -989,7 +991,7 @@ int dm_pool_metadata_close(struct dm_pool_metadata *pmd)
989991
}
990992
pmd_write_unlock(pmd);
991993
if (!pmd->fail_io)
992-
__destroy_persistent_data_objects(pmd);
994+
__destroy_persistent_data_objects(pmd, true);
993995

994996
kfree(pmd);
995997
return 0;
@@ -1860,19 +1862,52 @@ static void __set_abort_with_changes_flags(struct dm_pool_metadata *pmd)
18601862
int dm_pool_abort_metadata(struct dm_pool_metadata *pmd)
18611863
{
18621864
int r = -EINVAL;
1865+
struct dm_block_manager *old_bm = NULL, *new_bm = NULL;
1866+
1867+
/* fail_io is double-checked with pmd->root_lock held below */
1868+
if (unlikely(pmd->fail_io))
1869+
return r;
1870+
1871+
/*
1872+
* Replacement block manager (new_bm) is created and old_bm destroyed outside of
1873+
* pmd root_lock to avoid ABBA deadlock that would result (due to life-cycle of
1874+
* shrinker associated with the block manager's bufio client vs pmd root_lock).
1875+
* - must take shrinker_rwsem without holding pmd->root_lock
1876+
*/
1877+
new_bm = dm_block_manager_create(pmd->bdev, THIN_METADATA_BLOCK_SIZE << SECTOR_SHIFT,
1878+
THIN_MAX_CONCURRENT_LOCKS);
18631879

18641880
pmd_write_lock(pmd);
1865-
if (pmd->fail_io)
1881+
if (pmd->fail_io) {
1882+
pmd_write_unlock(pmd);
18661883
goto out;
1884+
}
18671885

18681886
__set_abort_with_changes_flags(pmd);
1869-
__destroy_persistent_data_objects(pmd);
1870-
r = __create_persistent_data_objects(pmd, false);
1887+
__destroy_persistent_data_objects(pmd, false);
1888+
old_bm = pmd->bm;
1889+
if (IS_ERR(new_bm)) {
1890+
DMERR("could not create block manager during abort");
1891+
pmd->bm = NULL;
1892+
r = PTR_ERR(new_bm);
1893+
goto out_unlock;
1894+
}
1895+
1896+
pmd->bm = new_bm;
1897+
r = __open_or_format_metadata(pmd, false);
1898+
if (r) {
1899+
pmd->bm = NULL;
1900+
goto out_unlock;
1901+
}
1902+
new_bm = NULL;
1903+
out_unlock:
18711904
if (r)
18721905
pmd->fail_io = true;
1873-
1874-
out:
18751906
pmd_write_unlock(pmd);
1907+
dm_block_manager_destroy(old_bm);
1908+
out:
1909+
if (new_bm && !IS_ERR(new_bm))
1910+
dm_block_manager_destroy(new_bm);
18761911

18771912
return r;
18781913
}

0 commit comments

Comments
 (0)