Skip to content

Commit f7c7540

Browse files
Kemeng Shigregkh
authored andcommitted
mm: swap: correctly use maxpages in swapon syscall to avoid potential deadloop
commit 255116c upstream. We use maxpages from read_swap_header() to initialize swap_info_struct, however the maxpages might be reduced in setup_swap_extents() and the si->max is assigned with the reduced maxpages from the setup_swap_extents(). Obviously, this could lead to memory waste as we allocated memory based on larger maxpages, besides, this could lead to a potential deadloop as following: 1) When calling setup_clusters() with larger maxpages, unavailable pages within range [si->max, larger maxpages) are not accounted with inc_cluster_info_page(). As a result, these pages are assumed available but can not be allocated. The cluster contains these pages can be moved to frag_clusters list after it's all available pages were allocated. 2) When the cluster mentioned in 1) is the only cluster in frag_clusters list, cluster_alloc_swap_entry() assume order 0 allocation will never failed and will enter a deadloop by keep trying to allocate page from the only cluster in frag_clusters which contains no actually available page. Call setup_swap_extents() to get the final maxpages before swap_info_struct initialization to fix the issue. After this change, span will include badblocks and will become large value which I think is correct value: In summary, there are two kinds of swapfile_activate operations. 1. Filesystem style: Treat all blocks logical continuity and find usable physical extents in logical range. In this way, si->pages will be actual usable physical blocks and span will be "1 + highest_block - lowest_block". 2. Block device style: Treat all blocks physically continue and only one single extent is added. In this way, si->pages will be si->max and span will be "si->pages - 1". Actually, si->pages and si->max is only used in block device style and span value is set with si->pages. As a result, span value in block device style will become a larger value as you mentioned. I think larger value is correct based on: 1. Span value in filesystem style is "1 + highest_block - lowest_block" which is the range cover all possible phisical blocks including the badblocks. 2. For block device style, si->pages is the actual usable block number and is already in pr_info. The original span value before this patch is also refer to usable block number which is redundant in pr_info. [shikemeng@huaweicloud.com: ensure si->pages == si->max - 1 after setup_swap_extents()] Link: https://lkml.kernel.org/r/20250522122554.12209-3-shikemeng@huaweicloud.com Link: https://lkml.kernel.org/r/20250718065139.61989-1-shikemeng@huaweicloud.com Link: https://lkml.kernel.org/r/20250522122554.12209-3-shikemeng@huaweicloud.com Fixes: 661383c ("mm: swap: relaim the cached parts that got scanned") Signed-off-by: Kemeng Shi <shikemeng@huaweicloud.com> Reviewed-by: Baoquan He <bhe@redhat.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Kairui Song <kasong@tencent.com> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
1 parent b85fe4c commit f7c7540

File tree

1 file changed

+26
-27
lines changed

1 file changed

+26
-27
lines changed

mm/swapfile.c

Lines changed: 26 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -3237,43 +3237,30 @@ static unsigned long read_swap_header(struct swap_info_struct *si,
32373237
#define SWAP_CLUSTER_COLS \
32383238
max_t(unsigned int, SWAP_CLUSTER_INFO_COLS, SWAP_CLUSTER_SPACE_COLS)
32393239

3240-
static int setup_swap_map_and_extents(struct swap_info_struct *si,
3241-
union swap_header *swap_header,
3242-
unsigned char *swap_map,
3243-
unsigned long maxpages,
3244-
sector_t *span)
3240+
static int setup_swap_map(struct swap_info_struct *si,
3241+
union swap_header *swap_header,
3242+
unsigned char *swap_map,
3243+
unsigned long maxpages)
32453244
{
3246-
unsigned int nr_good_pages;
32473245
unsigned long i;
3248-
int nr_extents;
3249-
3250-
nr_good_pages = maxpages - 1; /* omit header page */
32513246

3247+
swap_map[0] = SWAP_MAP_BAD; /* omit header page */
32523248
for (i = 0; i < swap_header->info.nr_badpages; i++) {
32533249
unsigned int page_nr = swap_header->info.badpages[i];
32543250
if (page_nr == 0 || page_nr > swap_header->info.last_page)
32553251
return -EINVAL;
32563252
if (page_nr < maxpages) {
32573253
swap_map[page_nr] = SWAP_MAP_BAD;
3258-
nr_good_pages--;
3254+
si->pages--;
32593255
}
32603256
}
32613257

3262-
if (nr_good_pages) {
3263-
swap_map[0] = SWAP_MAP_BAD;
3264-
si->max = maxpages;
3265-
si->pages = nr_good_pages;
3266-
nr_extents = setup_swap_extents(si, span);
3267-
if (nr_extents < 0)
3268-
return nr_extents;
3269-
nr_good_pages = si->pages;
3270-
}
3271-
if (!nr_good_pages) {
3258+
if (!si->pages) {
32723259
pr_warn("Empty swap-file\n");
32733260
return -EINVAL;
32743261
}
32753262

3276-
return nr_extents;
3263+
return 0;
32773264
}
32783265

32793266
static struct swap_cluster_info *setup_clusters(struct swap_info_struct *si,
@@ -3318,7 +3305,7 @@ static struct swap_cluster_info *setup_clusters(struct swap_info_struct *si,
33183305
* Mark unusable pages as unavailable. The clusters aren't
33193306
* marked free yet, so no list operations are involved yet.
33203307
*
3321-
* See setup_swap_map_and_extents(): header page, bad pages,
3308+
* See setup_swap_map(): header page, bad pages,
33223309
* and the EOF part of the last cluster.
33233310
*/
33243311
inc_cluster_info_page(si, cluster_info, 0);
@@ -3456,6 +3443,21 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
34563443
goto bad_swap_unlock_inode;
34573444
}
34583445

3446+
si->max = maxpages;
3447+
si->pages = maxpages - 1;
3448+
nr_extents = setup_swap_extents(si, &span);
3449+
if (nr_extents < 0) {
3450+
error = nr_extents;
3451+
goto bad_swap_unlock_inode;
3452+
}
3453+
if (si->pages != si->max - 1) {
3454+
pr_err("swap:%u != (max:%u - 1)\n", si->pages, si->max);
3455+
error = -EINVAL;
3456+
goto bad_swap_unlock_inode;
3457+
}
3458+
3459+
maxpages = si->max;
3460+
34593461
/* OK, set up the swap map and apply the bad block list */
34603462
swap_map = vzalloc(maxpages);
34613463
if (!swap_map) {
@@ -3467,12 +3469,9 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
34673469
if (error)
34683470
goto bad_swap_unlock_inode;
34693471

3470-
nr_extents = setup_swap_map_and_extents(si, swap_header, swap_map,
3471-
maxpages, &span);
3472-
if (unlikely(nr_extents < 0)) {
3473-
error = nr_extents;
3472+
error = setup_swap_map(si, swap_header, swap_map, maxpages);
3473+
if (error)
34743474
goto bad_swap_unlock_inode;
3475-
}
34763475

34773476
/*
34783477
* Use kvmalloc_array instead of bitmap_zalloc as the allocation order might

0 commit comments

Comments
 (0)