Skip to content

Commit 28bdff6

Browse files
committed
vfio/pci: implement huge_fault support
JIRA: https://issues.redhat.com/browse/RHEL-73613 commit f9e54c3 Author: Alex Williamson <alex.williamson@redhat.com> Date: Mon Aug 26 16:43:53 2024 -0400 vfio/pci: implement huge_fault support With the addition of pfnmap support in vmf_insert_pfn_{pmd,pud}() we can take advantage of PMD and PUD faults to PCI BAR mmaps and create more efficient mappings. PCI BARs are always a power of two and will typically get at least PMD alignment without userspace even trying. Userspace alignment for PUD mappings is also not too difficult. Consolidate faults through a single handler with a new wrapper for standard single page faults. The pre-faulting behavior of commit d71a989 ("vfio/pci: Insert full vma on mmap'd MMIO fault") is removed in this refactoring since huge_fault will cover the bulk of the faults and results in more efficient page table usage. We also want to avoid that pre-faulted single page mappings preempt huge page mappings. Link: https://lkml.kernel.org/r/20240826204353.2228736-20-peterx@redhat.com Signed-off-by: Alex Williamson <alex.williamson@redhat.com> Signed-off-by: Peter Xu <peterx@redhat.com> Cc: Alexander Gordeev <agordeev@linux.ibm.com> Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Christian Borntraeger <borntraeger@linux.ibm.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: David Hildenbrand <david@redhat.com> Cc: Gavin Shan <gshan@redhat.com> Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com> Cc: Heiko Carstens <hca@linux.ibm.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jason Gunthorpe <jgg@nvidia.com> Cc: Matthew Wilcox <willy@infradead.org> Cc: Niklas Schnelle <schnelle@linux.ibm.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Ryan Roberts <ryan.roberts@arm.com> Cc: Sean Christopherson <seanjc@google.com> Cc: Sven Schnelle <svens@linux.ibm.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vasily Gorbik <gor@linux.ibm.com> Cc: Will Deacon <will@kernel.org> Cc: Zi Yan <ziy@nvidia.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Donald Dutile <ddutile@redhat.com>
1 parent b70e553 commit 28bdff6

File tree

1 file changed

+43
-17
lines changed

1 file changed

+43
-17
lines changed

drivers/vfio/pci/vfio_pci_core.c

Lines changed: 43 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include <linux/mutex.h>
2121
#include <linux/notifier.h>
2222
#include <linux/pci.h>
23+
#include <linux/pfn_t.h>
2324
#include <linux/pm_runtime.h>
2425
#include <linux/slab.h>
2526
#include <linux/types.h>
@@ -1652,45 +1653,70 @@ static unsigned long vma_to_pfn(struct vm_area_struct *vma)
16521653
return (pci_resource_start(vdev->pdev, index) >> PAGE_SHIFT) + pgoff;
16531654
}
16541655

1655-
static vm_fault_t vfio_pci_mmap_fault(struct vm_fault *vmf)
1656+
static vm_fault_t vfio_pci_mmap_huge_fault(struct vm_fault *vmf,
1657+
unsigned int order)
16561658
{
16571659
struct vm_area_struct *vma = vmf->vma;
16581660
struct vfio_pci_core_device *vdev = vma->vm_private_data;
16591661
unsigned long pfn, pgoff = vmf->pgoff - vma->vm_pgoff;
1660-
unsigned long addr = vma->vm_start;
16611662
vm_fault_t ret = VM_FAULT_SIGBUS;
16621663

1664+
if (order && (vmf->address & ((PAGE_SIZE << order) - 1) ||
1665+
vmf->address + (PAGE_SIZE << order) > vma->vm_end)) {
1666+
ret = VM_FAULT_FALLBACK;
1667+
goto out;
1668+
}
1669+
16631670
pfn = vma_to_pfn(vma);
16641671

16651672
down_read(&vdev->memory_lock);
16661673

16671674
if (vdev->pm_runtime_engaged || !__vfio_pci_memory_enabled(vdev))
16681675
goto out_unlock;
16691676

1670-
ret = vmf_insert_pfn(vma, vmf->address, pfn + pgoff);
1671-
if (ret & VM_FAULT_ERROR)
1672-
goto out_unlock;
1673-
1674-
/*
1675-
* Pre-fault the remainder of the vma, abort further insertions and
1676-
* supress error if fault is encountered during pre-fault.
1677-
*/
1678-
for (; addr < vma->vm_end; addr += PAGE_SIZE, pfn++) {
1679-
if (addr == vmf->address)
1680-
continue;
1681-
1682-
if (vmf_insert_pfn(vma, addr, pfn) & VM_FAULT_ERROR)
1683-
break;
1677+
switch (order) {
1678+
case 0:
1679+
ret = vmf_insert_pfn(vma, vmf->address, pfn + pgoff);
1680+
break;
1681+
#ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP
1682+
case PMD_ORDER:
1683+
ret = vmf_insert_pfn_pmd(vmf, __pfn_to_pfn_t(pfn + pgoff,
1684+
PFN_DEV), false);
1685+
break;
1686+
#endif
1687+
#ifdef CONFIG_ARCH_SUPPORTS_PUD_PFNMAP
1688+
case PUD_ORDER:
1689+
ret = vmf_insert_pfn_pud(vmf, __pfn_to_pfn_t(pfn + pgoff,
1690+
PFN_DEV), false);
1691+
break;
1692+
#endif
1693+
default:
1694+
ret = VM_FAULT_FALLBACK;
16841695
}
16851696

16861697
out_unlock:
16871698
up_read(&vdev->memory_lock);
1699+
out:
1700+
dev_dbg_ratelimited(&vdev->pdev->dev,
1701+
"%s(,order = %d) BAR %ld page offset 0x%lx: 0x%x\n",
1702+
__func__, order,
1703+
vma->vm_pgoff >>
1704+
(VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT),
1705+
pgoff, (unsigned int)ret);
16881706

16891707
return ret;
16901708
}
16911709

1710+
static vm_fault_t vfio_pci_mmap_page_fault(struct vm_fault *vmf)
1711+
{
1712+
return vfio_pci_mmap_huge_fault(vmf, 0);
1713+
}
1714+
16921715
static const struct vm_operations_struct vfio_pci_mmap_ops = {
1693-
.fault = vfio_pci_mmap_fault,
1716+
.fault = vfio_pci_mmap_page_fault,
1717+
#ifdef CONFIG_ARCH_SUPPORTS_HUGE_PFNMAP
1718+
.huge_fault = vfio_pci_mmap_huge_fault,
1719+
#endif
16941720
};
16951721

16961722
int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma)

0 commit comments

Comments
 (0)