Skip to content

Commit 9ec2304

Browse files
committed
Merge: MM: Proactive fixes for RHEL10.1
MR: https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-10/-/merge_requests/827 JIRA: https://issues.redhat.com/browse/RHEL-77742 Proactively backport available fixes for the MM SST and RHEL10.1 This is part of an ongoing effort to keep the RHEL MM sst as stable as possible by backporting upstream fixes for each release. CVE: CVE-2025-22090\ CVE: CVE-2025-22000\ CVE: CVE-2025-22015\ CVE: CVE-2025-21908\ CVE: CVE-2025-21932\ CVE: CVE-2025-21931\ CVE: CVE-2025-21907\ CVE: CVE-2025-21815 Omitted-fix: 517f496\ Omitted-fix: 41e6ddc\ Omitted-fix: 7ddeb91\ Omitted-fix: 30f62b9\ Omitted-fix: 219bf6e\ Omitted-fix: 0cf4b16 Signed-off-by: Nico Pache <npache@redhat.com> Approved-by: Rafael Aquini <raquini@redhat.com> Approved-by: Luiz Capitulino <luizcap@redhat.com> Approved-by: CKI KWF Bot <cki-ci-bot+kwf-gitlab-com@redhat.com> Merged-by: Jan Stancek <jstancek@redhat.com>
2 parents ee2acc2 + b8321b6 commit 9ec2304

31 files changed

+384
-221
lines changed

arch/x86/mm/pat/memtype.c

Lines changed: 28 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -984,29 +984,42 @@ static int get_pat_info(struct vm_area_struct *vma, resource_size_t *paddr,
984984
return -EINVAL;
985985
}
986986

987-
/*
988-
* track_pfn_copy is called when vma that is covering the pfnmap gets
989-
* copied through copy_page_range().
990-
*
991-
* If the vma has a linear pfn mapping for the entire range, we get the prot
992-
* from pte and reserve the entire vma range with single reserve_pfn_range call.
993-
*/
994-
int track_pfn_copy(struct vm_area_struct *vma)
987+
int track_pfn_copy(struct vm_area_struct *dst_vma,
988+
struct vm_area_struct *src_vma, unsigned long *pfn)
995989
{
990+
const unsigned long vma_size = src_vma->vm_end - src_vma->vm_start;
996991
resource_size_t paddr;
997-
unsigned long vma_size = vma->vm_end - vma->vm_start;
998992
pgprot_t pgprot;
993+
int rc;
999994

1000-
if (vma->vm_flags & VM_PAT) {
1001-
if (get_pat_info(vma, &paddr, &pgprot))
1002-
return -EINVAL;
1003-
/* reserve the whole chunk covered by vma. */
1004-
return reserve_pfn_range(paddr, vma_size, &pgprot, 1);
1005-
}
995+
if (!(src_vma->vm_flags & VM_PAT))
996+
return 0;
997+
998+
/*
999+
* Duplicate the PAT information for the dst VMA based on the src
1000+
* VMA.
1001+
*/
1002+
if (get_pat_info(src_vma, &paddr, &pgprot))
1003+
return -EINVAL;
1004+
rc = reserve_pfn_range(paddr, vma_size, &pgprot, 1);
1005+
if (rc)
1006+
return rc;
10061007

1008+
/* Reservation for the destination VMA succeeded. */
1009+
vm_flags_set(dst_vma, VM_PAT);
1010+
*pfn = PHYS_PFN(paddr);
10071011
return 0;
10081012
}
10091013

1014+
void untrack_pfn_copy(struct vm_area_struct *dst_vma, unsigned long pfn)
1015+
{
1016+
untrack_pfn(dst_vma, pfn, dst_vma->vm_end - dst_vma->vm_start, true);
1017+
/*
1018+
* Reservation was freed, any copied page tables will get cleaned
1019+
* up later, but without getting PAT involved again.
1020+
*/
1021+
}
1022+
10101023
/*
10111024
* prot is passed in as a parameter for the new mapping. If the vma has
10121025
* a linear pfn mapping for the entire range, or no vma is provided,
@@ -1095,15 +1108,6 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
10951108
}
10961109
}
10971110

1098-
/*
1099-
* untrack_pfn_clear is called if the following situation fits:
1100-
*
1101-
* 1) while mremapping a pfnmap for a new region, with the old vma after
1102-
* its pfnmap page table has been removed. The new vma has a new pfnmap
1103-
* to the same pfn & cache type with VM_PAT set.
1104-
* 2) while duplicating vm area, the new vma fails to copy the pgtable from
1105-
* old vma.
1106-
*/
11071111
void untrack_pfn_clear(struct vm_area_struct *vma)
11081112
{
11091113
vm_flags_clear(vma, VM_PAT);

fs/nfs/file.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include <linux/pagemap.h>
3030
#include <linux/gfp.h>
3131
#include <linux/swap.h>
32+
#include <linux/compaction.h>
3233

3334
#include <linux/uaccess.h>
3435
#include <linux/filelock.h>
@@ -457,7 +458,7 @@ static bool nfs_release_folio(struct folio *folio, gfp_t gfp)
457458
/* If the private flag is set, then the folio is not freeable */
458459
if (folio_test_private(folio)) {
459460
if ((current_gfp_context(gfp) & GFP_KERNEL) != GFP_KERNEL ||
460-
current_is_kswapd())
461+
current_is_kswapd() || current_is_kcompactd())
461462
return false;
462463
if (nfs_wb_folio(folio->mapping->host, folio) < 0)
463464
return false;

include/linux/compaction.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,11 @@ static inline unsigned long compact_gap(unsigned int order)
8080
return 2UL << order;
8181
}
8282

83+
static inline int current_is_kcompactd(void)
84+
{
85+
return current->flags & PF_KCOMPACTD;
86+
}
87+
8388
#ifdef CONFIG_COMPACTION
8489

8590
extern unsigned int extfrag_for_order(struct zone *zone, unsigned int order);

include/linux/mmzone.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -970,6 +970,9 @@ struct zone {
970970
#ifdef CONFIG_UNACCEPTED_MEMORY
971971
/* Pages to be accepted. All pages on the list are MAX_PAGE_ORDER */
972972
struct list_head unaccepted_pages;
973+
974+
/* To be called once the last page in the zone is accepted */
975+
struct work_struct unaccepted_cleanup;
973976
#endif
974977

975978
/* zone flags, see below */

include/linux/pgtable.h

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1539,14 +1539,28 @@ static inline void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
15391539
}
15401540

15411541
/*
1542-
* track_pfn_copy is called when vma that is covering the pfnmap gets
1543-
* copied through copy_page_range().
1542+
* track_pfn_copy is called when a VM_PFNMAP VMA is about to get the page
1543+
* tables copied during copy_page_range(). Will store the pfn to be
1544+
* passed to untrack_pfn_copy() only if there is something to be untracked.
1545+
* Callers should initialize the pfn to 0.
15441546
*/
1545-
static inline int track_pfn_copy(struct vm_area_struct *vma)
1547+
static inline int track_pfn_copy(struct vm_area_struct *dst_vma,
1548+
struct vm_area_struct *src_vma, unsigned long *pfn)
15461549
{
15471550
return 0;
15481551
}
15491552

1553+
/*
1554+
* untrack_pfn_copy is called when a VM_PFNMAP VMA failed to copy during
1555+
* copy_page_range(), but after track_pfn_copy() was already called. Can
1556+
* be called even if track_pfn_copy() did not actually track anything:
1557+
* handled internally.
1558+
*/
1559+
static inline void untrack_pfn_copy(struct vm_area_struct *dst_vma,
1560+
unsigned long pfn)
1561+
{
1562+
}
1563+
15501564
/*
15511565
* untrack_pfn is called while unmapping a pfnmap for a region.
15521566
* untrack can be called for a specific region indicated by pfn and size or
@@ -1559,8 +1573,10 @@ static inline void untrack_pfn(struct vm_area_struct *vma,
15591573
}
15601574

15611575
/*
1562-
* untrack_pfn_clear is called while mremapping a pfnmap for a new region
1563-
* or fails to copy pgtable during duplicate vm area.
1576+
* untrack_pfn_clear is called in the following cases on a VM_PFNMAP VMA:
1577+
*
1578+
* 1) During mremap() on the src VMA after the page tables were moved.
1579+
* 2) During fork() on the dst VMA, immediately after duplicating the src VMA.
15641580
*/
15651581
static inline void untrack_pfn_clear(struct vm_area_struct *vma)
15661582
{
@@ -1571,7 +1587,10 @@ extern int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
15711587
unsigned long size);
15721588
extern void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
15731589
pfn_t pfn);
1574-
extern int track_pfn_copy(struct vm_area_struct *vma);
1590+
extern int track_pfn_copy(struct vm_area_struct *dst_vma,
1591+
struct vm_area_struct *src_vma, unsigned long *pfn);
1592+
extern void untrack_pfn_copy(struct vm_area_struct *dst_vma,
1593+
unsigned long pfn);
15751594
extern void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
15761595
unsigned long size, bool mm_wr_locked);
15771596
extern void untrack_pfn_clear(struct vm_area_struct *vma);

include/linux/sched.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1722,7 +1722,7 @@ extern struct pid *cad_pid;
17221722
#define PF_USED_MATH 0x00002000 /* If unset the fpu must be initialized before use */
17231723
#define PF_USER_WORKER 0x00004000 /* Kernel thread cloned from userspace thread */
17241724
#define PF_NOFREEZE 0x00008000 /* This thread should not be frozen */
1725-
#define PF__HOLE__00010000 0x00010000
1725+
#define PF_KCOMPACTD 0x00010000 /* I am kcompactd */
17261726
#define PF_KSWAPD 0x00020000 /* I am kswapd */
17271727
#define PF_MEMALLOC_NOFS 0x00040000 /* All allocations inherit GFP_NOFS. See memalloc_nfs_save() */
17281728
#define PF_MEMALLOC_NOIO 0x00080000 /* All allocations inherit GFP_NOIO. See memalloc_noio_save() */

include/linux/swap.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,7 @@ enum {
223223
};
224224

225225
#define SWAP_CLUSTER_MAX 32UL
226+
#define SWAP_CLUSTER_MAX_SKIPPED (SWAP_CLUSTER_MAX << 10)
226227
#define COMPACT_CLUSTER_MAX SWAP_CLUSTER_MAX
227228

228229
/* Bit flag in swap_map */

kernel/fork.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -691,6 +691,11 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
691691
tmp = vm_area_dup(mpnt);
692692
if (!tmp)
693693
goto fail_nomem;
694+
695+
/* track_pfn_copy() will later take care of copying internal state. */
696+
if (unlikely(tmp->vm_flags & VM_PFNMAP))
697+
untrack_pfn_clear(tmp);
698+
694699
retval = vma_dup_policy(mpnt, tmp);
695700
if (retval)
696701
goto fail_nomem_policy;

mm/compaction.c

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -630,7 +630,8 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
630630
if (PageCompound(page)) {
631631
const unsigned int order = compound_order(page);
632632

633-
if (blockpfn + (1UL << order) <= end_pfn) {
633+
if ((order <= MAX_PAGE_ORDER) &&
634+
(blockpfn + (1UL << order) <= end_pfn)) {
634635
blockpfn += (1UL << order) - 1;
635636
page += (1UL << order) - 1;
636637
nr_scanned += (1UL << order) - 1;
@@ -979,13 +980,13 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
979980
}
980981

981982
if (PageHuge(page)) {
983+
const unsigned int order = compound_order(page);
982984
/*
983985
* skip hugetlbfs if we are not compacting for pages
984986
* bigger than its order. THPs and other compound pages
985987
* are handled below.
986988
*/
987989
if (!cc->alloc_contig) {
988-
const unsigned int order = compound_order(page);
989990

990991
if (order <= MAX_PAGE_ORDER) {
991992
low_pfn += (1UL << order) - 1;
@@ -1009,8 +1010,8 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
10091010
/* Do not report -EBUSY down the chain */
10101011
if (ret == -EBUSY)
10111012
ret = 0;
1012-
low_pfn += compound_nr(page) - 1;
1013-
nr_scanned += compound_nr(page) - 1;
1013+
low_pfn += (1UL << order) - 1;
1014+
nr_scanned += (1UL << order) - 1;
10141015
goto isolate_fail;
10151016
}
10161017

@@ -3157,6 +3158,7 @@ static int kcompactd(void *p)
31573158
long default_timeout = msecs_to_jiffies(HPAGE_FRAG_CHECK_INTERVAL_MSEC);
31583159
long timeout = default_timeout;
31593160

3161+
current->flags |= PF_KCOMPACTD;
31603162
set_freezable();
31613163

31623164
pgdat->kcompactd_max_order = 0;
@@ -3213,6 +3215,8 @@ static int kcompactd(void *p)
32133215
pgdat->proactive_compact_trigger = false;
32143216
}
32153217

3218+
current->flags &= ~PF_KCOMPACTD;
3219+
32163220
return 0;
32173221
}
32183222

mm/damon/ops-common.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ struct folio *damon_get_folio(unsigned long pfn)
2424
struct page *page = pfn_to_online_page(pfn);
2525
struct folio *folio;
2626

27-
if (!page || PageTail(page))
27+
if (!page)
2828
return NULL;
2929

3030
folio = page_folio(page);

0 commit comments

Comments
 (0)