Skip to content

Commit aed877c

Browse files
apopple-nvidiaakpm00
authored andcommitted
device/dax: properly refcount device dax pages when mapping
Device DAX pages are currently not reference counted when mapped, instead relying on the devmap PTE bit to ensure mapping code will not get/put references. This requires special handling in various page table walkers, particularly GUP, to manage references on the underlying pgmap to ensure the pages remain valid. However there is no reason these pages can't be refcounted properly at map time. Doning so eliminates the need for the devmap PTE bit, freeing up a precious PTE bit. It also simplifies GUP as it no longer needs to manage the special pgmap references and can instead just treat the pages normally as defined by vm_normal_page(). Link: https://lkml.kernel.org/r/968d3a8e9157e7492e85d065765c027e525f9fc9.1740713401.git-series.apopple@nvidia.com Signed-off-by: Alistair Popple <apopple@nvidia.com> Tested-by: Alison Schofield <alison.schofield@intel.com> Cc: Alexander Gordeev <agordeev@linux.ibm.com> Cc: Asahi Lina <lina@asahilina.net> Cc: Balbir Singh <balbirs@nvidia.com> Cc: Bjorn Helgaas <bhelgaas@google.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Christian Borntraeger <borntraeger@linux.ibm.com> Cc: Christoph Hellwig <hch@lst.de> Cc: Chunyan Zhang <zhang.lyra@gmail.com> Cc: Dan Wiliams <dan.j.williams@intel.com> Cc: "Darrick J. Wong" <djwong@kernel.org> Cc: Dave Chinner <david@fromorbit.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: Dave Jiang <dave.jiang@intel.com> Cc: David Hildenbrand <david@redhat.com> Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com> Cc: Heiko Carstens <hca@linux.ibm.com> Cc: Huacai Chen <chenhuacai@kernel.org> Cc: Ira Weiny <ira.weiny@intel.com> Cc: Jan Kara <jack@suse.cz> Cc: Jason Gunthorpe <jgg@nvidia.com> Cc: Jason Gunthorpe <jgg@ziepe.ca> Cc: John Hubbard <jhubbard@nvidia.com> Cc: linmiaohe <linmiaohe@huawei.com> Cc: Logan Gunthorpe <logang@deltatee.com> Cc: Matthew Wilcow (Oracle) <willy@infradead.org> Cc: Michael "Camp Drill Sergeant" Ellerman <mpe@ellerman.id.au> Cc: Nicholas Piggin <npiggin@gmail.com> Cc: Peter Xu <peterx@redhat.com> Cc: Sven Schnelle <svens@linux.ibm.com> Cc: Ted Ts'o <tytso@mit.edu> Cc: Vasily Gorbik <gor@linux.ibm.com> Cc: Vishal Verma <vishal.l.verma@intel.com> Cc: Vivek Goyal <vgoyal@redhat.com> Cc: WANG Xuerui <kernel@xen0n.name> Cc: Will Deacon <will@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
1 parent 38607c6 commit aed877c

File tree

2 files changed

+16
-13
lines changed

2 files changed

+16
-13
lines changed

drivers/dax/device.c

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -125,11 +125,12 @@ static vm_fault_t __dev_dax_pte_fault(struct dev_dax *dev_dax,
125125
return VM_FAULT_SIGBUS;
126126
}
127127

128-
pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);
128+
pfn = phys_to_pfn_t(phys, 0);
129129

130130
dax_set_mapping(vmf, pfn, fault_size);
131131

132-
return vmf_insert_mixed(vmf->vma, vmf->address, pfn);
132+
return vmf_insert_page_mkwrite(vmf, pfn_t_to_page(pfn),
133+
vmf->flags & FAULT_FLAG_WRITE);
133134
}
134135

135136
static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax,
@@ -168,11 +169,12 @@ static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax,
168169
return VM_FAULT_SIGBUS;
169170
}
170171

171-
pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);
172+
pfn = phys_to_pfn_t(phys, 0);
172173

173174
dax_set_mapping(vmf, pfn, fault_size);
174175

175-
return vmf_insert_pfn_pmd(vmf, pfn, vmf->flags & FAULT_FLAG_WRITE);
176+
return vmf_insert_folio_pmd(vmf, page_folio(pfn_t_to_page(pfn)),
177+
vmf->flags & FAULT_FLAG_WRITE);
176178
}
177179

178180
#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
@@ -213,11 +215,12 @@ static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
213215
return VM_FAULT_SIGBUS;
214216
}
215217

216-
pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);
218+
pfn = phys_to_pfn_t(phys, 0);
217219

218220
dax_set_mapping(vmf, pfn, fault_size);
219221

220-
return vmf_insert_pfn_pud(vmf, pfn, vmf->flags & FAULT_FLAG_WRITE);
222+
return vmf_insert_folio_pud(vmf, page_folio(pfn_t_to_page(pfn)),
223+
vmf->flags & FAULT_FLAG_WRITE);
221224
}
222225
#else
223226
static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,

mm/memremap.c

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -460,11 +460,7 @@ void free_zone_device_folio(struct folio *folio)
460460
{
461461
struct dev_pagemap *pgmap = folio->pgmap;
462462

463-
if (WARN_ON_ONCE(!pgmap->ops))
464-
return;
465-
466-
if (WARN_ON_ONCE(pgmap->type != MEMORY_DEVICE_FS_DAX &&
467-
!pgmap->ops->page_free))
463+
if (WARN_ON_ONCE(!pgmap))
468464
return;
469465

470466
mem_cgroup_uncharge(folio);
@@ -494,12 +490,15 @@ void free_zone_device_folio(struct folio *folio)
494490
* zero which indicating the page has been removed from the file
495491
* system mapping.
496492
*/
497-
if (pgmap->type != MEMORY_DEVICE_FS_DAX)
493+
if (pgmap->type != MEMORY_DEVICE_FS_DAX &&
494+
pgmap->type != MEMORY_DEVICE_GENERIC)
498495
folio->mapping = NULL;
499496

500497
switch (pgmap->type) {
501498
case MEMORY_DEVICE_PRIVATE:
502499
case MEMORY_DEVICE_COHERENT:
500+
if (WARN_ON_ONCE(!pgmap->ops || !pgmap->ops->page_free))
501+
break;
503502
pgmap->ops->page_free(folio_page(folio, 0));
504503
put_dev_pagemap(pgmap);
505504
break;
@@ -509,7 +508,6 @@ void free_zone_device_folio(struct folio *folio)
509508
* Reset the refcount to 1 to prepare for handing out the page
510509
* again.
511510
*/
512-
pgmap->ops->page_free(folio_page(folio, 0));
513511
folio_set_count(folio, 1);
514512
break;
515513

@@ -518,6 +516,8 @@ void free_zone_device_folio(struct folio *folio)
518516
break;
519517

520518
case MEMORY_DEVICE_PCI_P2PDMA:
519+
if (WARN_ON_ONCE(!pgmap->ops || !pgmap->ops->page_free))
520+
break;
521521
pgmap->ops->page_free(folio_page(folio, 0));
522522
break;
523523
}

0 commit comments

Comments
 (0)