Skip to content

Commit 6c88f72

Browse files
apopple-nvidiaakpm00
authored andcommitted
mm/huge_memory: add vmf_insert_folio_pmd()
Currently DAX folio/page reference counts are managed differently to normal pages. To allow these to be managed the same as normal pages introduce vmf_insert_folio_pmd. This will map the entire PMD-sized folio and take references as it would for a normally mapped page. This is distinct from the current mechanism, vmf_insert_pfn_pmd, which simply inserts a special devmap PMD entry into the page table without holding a reference to the page for the mapping. It is not currently useful to implement a more generic vmf_insert_folio() which selects the correct behaviour based on folio_order(). This is because PTE faults require only a subpage of the folio to be PTE mapped rather than the entire folio. It would be possible to add this context somewhere but callers already need to handle PTE faults and PMD faults separately so a more generic function is not useful. Link: https://lkml.kernel.org/r/7bf92a2e68225d13ea368d53bbfee327314d1c40.1740713401.git-series.apopple@nvidia.com Signed-off-by: Alistair Popple <apopple@nvidia.com> Acked-by: David Hildenbrand <david@redhat.com> Tested-by: Alison Schofield <alison.schofield@intel.com> Cc: Alexander Gordeev <agordeev@linux.ibm.com> Cc: Asahi Lina <lina@asahilina.net> Cc: Balbir Singh <balbirs@nvidia.com> Cc: Bjorn Helgaas <bhelgaas@google.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Christian Borntraeger <borntraeger@linux.ibm.com> Cc: Christoph Hellwig <hch@lst.de> Cc: Chunyan Zhang <zhang.lyra@gmail.com> Cc: Dan Wiliams <dan.j.williams@intel.com> Cc: "Darrick J. Wong" <djwong@kernel.org> Cc: Dave Chinner <david@fromorbit.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: Dave Jiang <dave.jiang@intel.com> Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com> Cc: Heiko Carstens <hca@linux.ibm.com> Cc: Huacai Chen <chenhuacai@kernel.org> Cc: Ira Weiny <ira.weiny@intel.com> Cc: Jan Kara <jack@suse.cz> Cc: Jason Gunthorpe <jgg@nvidia.com> Cc: Jason Gunthorpe <jgg@ziepe.ca> Cc: John Hubbard <jhubbard@nvidia.com> Cc: linmiaohe <linmiaohe@huawei.com> Cc: Logan Gunthorpe <logang@deltatee.com> Cc: Matthew Wilcow (Oracle) <willy@infradead.org> Cc: Michael "Camp Drill Sergeant" Ellerman <mpe@ellerman.id.au> Cc: Nicholas Piggin <npiggin@gmail.com> Cc: Peter Xu <peterx@redhat.com> Cc: Sven Schnelle <svens@linux.ibm.com> Cc: Ted Ts'o <tytso@mit.edu> Cc: Vasily Gorbik <gor@linux.ibm.com> Cc: Vishal Verma <vishal.l.verma@intel.com> Cc: Vivek Goyal <vgoyal@redhat.com> Cc: WANG Xuerui <kernel@xen0n.name> Cc: Will Deacon <will@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
1 parent dbe5415 commit 6c88f72

File tree

2 files changed

+55
-12
lines changed

2 files changed

+55
-12
lines changed

include/linux/huge_mm.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
3939

4040
vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, bool write);
4141
vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn, bool write);
42+
vm_fault_t vmf_insert_folio_pmd(struct vm_fault *vmf, struct folio *folio,
43+
bool write);
4244
vm_fault_t vmf_insert_folio_pud(struct vm_fault *vmf, struct folio *folio,
4345
bool write);
4446

mm/huge_memory.c

Lines changed: 53 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1375,28 +1375,28 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
13751375
return __do_huge_pmd_anonymous_page(vmf);
13761376
}
13771377

1378-
static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
1378+
static int insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
13791379
pmd_t *pmd, pfn_t pfn, pgprot_t prot, bool write,
13801380
pgtable_t pgtable)
13811381
{
13821382
struct mm_struct *mm = vma->vm_mm;
13831383
pmd_t entry;
1384-
spinlock_t *ptl;
13851384

1386-
ptl = pmd_lock(mm, pmd);
1385+
lockdep_assert_held(pmd_lockptr(mm, pmd));
1386+
13871387
if (!pmd_none(*pmd)) {
13881388
if (write) {
13891389
if (pmd_pfn(*pmd) != pfn_t_to_pfn(pfn)) {
13901390
WARN_ON_ONCE(!is_huge_zero_pmd(*pmd));
1391-
goto out_unlock;
1391+
return -EEXIST;
13921392
}
13931393
entry = pmd_mkyoung(*pmd);
13941394
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
13951395
if (pmdp_set_access_flags(vma, addr, pmd, entry, 1))
13961396
update_mmu_cache_pmd(vma, addr, pmd);
13971397
}
13981398

1399-
goto out_unlock;
1399+
return -EEXIST;
14001400
}
14011401

14021402
entry = pmd_mkhuge(pfn_t_pmd(pfn, prot));
@@ -1412,16 +1412,11 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
14121412
if (pgtable) {
14131413
pgtable_trans_huge_deposit(mm, pmd, pgtable);
14141414
mm_inc_nr_ptes(mm);
1415-
pgtable = NULL;
14161415
}
14171416

14181417
set_pmd_at(mm, addr, pmd, entry);
14191418
update_mmu_cache_pmd(vma, addr, pmd);
1420-
1421-
out_unlock:
1422-
spin_unlock(ptl);
1423-
if (pgtable)
1424-
pte_free(mm, pgtable);
1419+
return 0;
14251420
}
14261421

14271422
/**
@@ -1440,6 +1435,8 @@ vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, bool write)
14401435
struct vm_area_struct *vma = vmf->vma;
14411436
pgprot_t pgprot = vma->vm_page_prot;
14421437
pgtable_t pgtable = NULL;
1438+
spinlock_t *ptl;
1439+
int error;
14431440

14441441
/*
14451442
* If we had pmd_special, we could avoid all these restrictions,
@@ -1462,12 +1459,56 @@ vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, bool write)
14621459
}
14631460

14641461
track_pfn_insert(vma, &pgprot, pfn);
1462+
ptl = pmd_lock(vma->vm_mm, vmf->pmd);
1463+
error = insert_pfn_pmd(vma, addr, vmf->pmd, pfn, pgprot, write,
1464+
pgtable);
1465+
spin_unlock(ptl);
1466+
if (error && pgtable)
1467+
pte_free(vma->vm_mm, pgtable);
14651468

1466-
insert_pfn_pmd(vma, addr, vmf->pmd, pfn, pgprot, write, pgtable);
14671469
return VM_FAULT_NOPAGE;
14681470
}
14691471
EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd);
14701472

1473+
vm_fault_t vmf_insert_folio_pmd(struct vm_fault *vmf, struct folio *folio,
1474+
bool write)
1475+
{
1476+
struct vm_area_struct *vma = vmf->vma;
1477+
unsigned long addr = vmf->address & PMD_MASK;
1478+
struct mm_struct *mm = vma->vm_mm;
1479+
spinlock_t *ptl;
1480+
pgtable_t pgtable = NULL;
1481+
int error;
1482+
1483+
if (addr < vma->vm_start || addr >= vma->vm_end)
1484+
return VM_FAULT_SIGBUS;
1485+
1486+
if (WARN_ON_ONCE(folio_order(folio) != PMD_ORDER))
1487+
return VM_FAULT_SIGBUS;
1488+
1489+
if (arch_needs_pgtable_deposit()) {
1490+
pgtable = pte_alloc_one(vma->vm_mm);
1491+
if (!pgtable)
1492+
return VM_FAULT_OOM;
1493+
}
1494+
1495+
ptl = pmd_lock(mm, vmf->pmd);
1496+
if (pmd_none(*vmf->pmd)) {
1497+
folio_get(folio);
1498+
folio_add_file_rmap_pmd(folio, &folio->page, vma);
1499+
add_mm_counter(mm, mm_counter_file(folio), HPAGE_PMD_NR);
1500+
}
1501+
error = insert_pfn_pmd(vma, addr, vmf->pmd,
1502+
pfn_to_pfn_t(folio_pfn(folio)), vma->vm_page_prot,
1503+
write, pgtable);
1504+
spin_unlock(ptl);
1505+
if (error && pgtable)
1506+
pte_free(mm, pgtable);
1507+
1508+
return VM_FAULT_NOPAGE;
1509+
}
1510+
EXPORT_SYMBOL_GPL(vmf_insert_folio_pmd);
1511+
14711512
#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
14721513
static pud_t maybe_pud_mkwrite(pud_t pud, struct vm_area_struct *vma)
14731514
{

0 commit comments

Comments
 (0)