Skip to content

Commit f3851d6

Browse files
author
Herton R. Krzesinski
committed
Merge: mm/hugetlb: address race condition in hugetlb_no_page()
MR: https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-9/-/merge_requests/1875 Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2158123 CVE: CVE-2022-3522 hugetlb_no_page() is called without holding the page table lock, as it takes the spinlock later down the execution path, when it really is installing the PTE in the page tables. That, however, opens up a window for data races when evaluating the PTE value for USERFAULTD fault handling. Rafael Aquini (4): mm/hugetlb: handle pte markers in page faults mm/hugetlb: fix race condition of uffd missing/minor handling mm/hugetlb: use hugetlb_pte_stable in migration race check mm/selftest: uffd: explain the write missing fault check mm/hugetlb.c | 69 +++++++++++++++++++++--- tools/testing/selftests/vm/userfaultfd.c | 22 +++++++- 2 files changed, 82 insertions(+), 9 deletions(-) Signed-off-by: Rafael Aquini <aquini@redhat.com> Approved-by: Peter Xu <peterx@redhat.com> Approved-by: Waiman Long <longman@redhat.com> Approved-by: Aristeu Rozanski <arozansk@redhat.com> Signed-off-by: Herton R. Krzesinski <herton@redhat.com>
2 parents d9bdbb1 + e090737 commit f3851d6

File tree

2 files changed

+82
-9
lines changed

2 files changed

+82
-9
lines changed

mm/hugetlb.c

Lines changed: 61 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5481,10 +5481,28 @@ static inline vm_fault_t hugetlb_handle_userfault(struct vm_area_struct *vma,
54815481
return ret;
54825482
}
54835483

5484+
/*
5485+
* Recheck pte with pgtable lock. Returns true if pte didn't change, or
5486+
* false if pte changed or is changing.
5487+
*/
5488+
static bool hugetlb_pte_stable(struct hstate *h, struct mm_struct *mm,
5489+
pte_t *ptep, pte_t old_pte)
5490+
{
5491+
spinlock_t *ptl;
5492+
bool same;
5493+
5494+
ptl = huge_pte_lock(h, mm, ptep);
5495+
same = pte_same(huge_ptep_get(ptep), old_pte);
5496+
spin_unlock(ptl);
5497+
5498+
return same;
5499+
}
5500+
54845501
static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
54855502
struct vm_area_struct *vma,
54865503
struct address_space *mapping, pgoff_t idx,
5487-
unsigned long address, pte_t *ptep, unsigned int flags)
5504+
unsigned long address, pte_t *ptep,
5505+
pte_t old_pte, unsigned int flags)
54885506
{
54895507
struct hstate *h = hstate_vma(vma);
54905508
vm_fault_t ret = VM_FAULT_SIGBUS;
@@ -5523,6 +5541,28 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
55235541
if (!page) {
55245542
/* Check for page in userfault range */
55255543
if (userfaultfd_missing(vma)) {
5544+
/*
5545+
* Since hugetlb_no_page() was examining pte
5546+
* without pgtable lock, we need to re-test under
5547+
* lock because the pte may not be stable and could
5548+
* have changed from under us. Try to detect
5549+
* either changed or during-changing ptes and retry
5550+
* properly when needed.
5551+
*
5552+
* Note that userfaultfd is actually fine with
5553+
* false positives (e.g. caused by pte changed),
5554+
* but not wrong logical events (e.g. caused by
5555+
* reading a pte during changing). The latter can
5556+
* confuse the userspace, so the strictness is very
5557+
* much preferred. E.g., MISSING event should
5558+
* never happen on the page after UFFDIO_COPY has
5559+
* correctly installed the page and returned.
5560+
*/
5561+
if (!hugetlb_pte_stable(h, mm, ptep, old_pte)) {
5562+
ret = 0;
5563+
goto out;
5564+
}
5565+
55265566
ret = hugetlb_handle_userfault(vma, mapping, idx,
55275567
flags, haddr, address,
55285568
VM_UFFD_MISSING);
@@ -5543,11 +5583,10 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
55435583
* here. Before returning error, get ptl and make
55445584
* sure there really is no pte entry.
55455585
*/
5546-
ptl = huge_pte_lock(h, mm, ptep);
5547-
ret = 0;
5548-
if (huge_pte_none(huge_ptep_get(ptep)))
5586+
if (hugetlb_pte_stable(h, mm, ptep, old_pte))
55495587
ret = vmf_error(PTR_ERR(page));
5550-
spin_unlock(ptl);
5588+
else
5589+
ret = 0;
55515590
goto out;
55525591
}
55535592
clear_huge_page(page, address, pages_per_huge_page(h));
@@ -5587,6 +5626,11 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
55875626
if (userfaultfd_minor(vma)) {
55885627
unlock_page(page);
55895628
put_page(page);
5629+
/* See comment in userfaultfd_missing() block above */
5630+
if (!hugetlb_pte_stable(h, mm, ptep, old_pte)) {
5631+
ret = 0;
5632+
goto out;
5633+
}
55905634
ret = hugetlb_handle_userfault(vma, mapping, idx,
55915635
flags, haddr, address,
55925636
VM_UFFD_MINOR);
@@ -5611,7 +5655,8 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
56115655

56125656
ptl = huge_pte_lock(h, mm, ptep);
56135657
ret = 0;
5614-
if (!huge_pte_none(huge_ptep_get(ptep)))
5658+
/* If pte changed from under us, retry */
5659+
if (!pte_same(huge_ptep_get(ptep), old_pte))
56155660
goto backout;
56165661

56175662
if (anon_rmap) {
@@ -5621,6 +5666,12 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
56215666
page_dup_file_rmap(page, true);
56225667
new_pte = make_huge_pte(vma, page, ((vma->vm_flags & VM_WRITE)
56235668
&& (vma->vm_flags & VM_SHARED)));
5669+
/*
5670+
* If this pte was previously wr-protected, keep it wr-protected even
5671+
* if populated.
5672+
*/
5673+
if (unlikely(pte_marker_uffd_wp(old_pte)))
5674+
new_pte = huge_pte_wrprotect(huge_pte_mkuffd_wp(new_pte));
56245675
set_huge_pte_at(mm, haddr, ptep, new_pte);
56255676

56265677
hugetlb_count_add(pages_per_huge_page(h), mm);
@@ -5738,8 +5789,10 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
57385789
mutex_lock(&hugetlb_fault_mutex_table[hash]);
57395790

57405791
entry = huge_ptep_get(ptep);
5741-
if (huge_pte_none(entry)) {
5742-
ret = hugetlb_no_page(mm, vma, mapping, idx, address, ptep, flags);
5792+
/* PTE markers should be handled the same way as none pte */
5793+
if (huge_pte_none_mostly(entry)) {
5794+
ret = hugetlb_no_page(mm, vma, mapping, idx, address, ptep,
5795+
entry, flags);
57435796
goto out_mutex;
57445797
}
57455798

tools/testing/selftests/vm/userfaultfd.c

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -703,7 +703,27 @@ static void uffd_handle_page_fault(struct uffd_msg *msg,
703703
continue_range(uffd, msg->arg.pagefault.address, page_size);
704704
stats->minor_faults++;
705705
} else {
706-
/* Missing page faults */
706+
/*
707+
* Missing page faults.
708+
*
709+
* Here we force a write check for each of the missing mode
710+
* faults. It's guaranteed because the only threads that
711+
* will trigger uffd faults are the locking threads, and
712+
* their first instruction to touch the missing page will
713+
* always be pthread_mutex_lock().
714+
*
715+
* Note that here we relied on an NPTL glibc impl detail to
716+
* always read the lock type at the entry of the lock op
717+
* (pthread_mutex_t.__data.__type, offset 0x10) before
718+
* doing any locking operations to guarantee that. It's
719+
* actually not good to rely on this impl detail because
720+
* logically a pthread-compatible lib can implement the
721+
* locks without types and we can fail when linking with
722+
* them. However since we used to find bugs with this
723+
* strict check we still keep it around. Hopefully this
724+
* could be a good hint when it fails again. If one day
725+
* it'll break on some other impl of glibc we'll revisit.
726+
*/
707727
if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE)
708728
err("unexpected write fault");
709729

0 commit comments

Comments
 (0)