Skip to content

Commit 82ba975

Browse files
apopple-nvidiaakpm00
authored andcommitted
mm: allow compound zone device pages
Zone device pages are used to represent various type of device memory managed by device drivers. Currently compound zone device pages are not supported. This is because MEMORY_DEVICE_FS_DAX pages are the only user of higher order zone device pages and have their own page reference counting. A future change will unify FS DAX reference counting with normal page reference counting rules and remove the special FS DAX reference counting. Supporting that requires compound zone device pages. Supporting compound zone device pages requires compound_head() to distinguish between head and tail pages whilst still preserving the special struct page fields that are specific to zone device pages. A tail page is distinguished by having bit zero being set in page->compound_head, with the remaining bits pointing to the head page. For zone device pages page->compound_head is shared with page->pgmap. The page->pgmap field must be common to all pages within a folio, even if the folio spans memory sections. Therefore pgmap is the same for both head and tail pages and can be moved into the folio and we can use the standard scheme to find compound_head from a tail page. Link: https://lkml.kernel.org/r/67055d772e6102accf85161d0b57b0b3944292bf.1740713401.git-series.apopple@nvidia.com Signed-off-by: Alistair Popple <apopple@nvidia.com> Signed-off-by: Balbir Singh <balbirs@nvidia.com> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com> Reviewed-by: Dan Williams <dan.j.williams@intel.com> Acked-by: David Hildenbrand <david@redhat.com> Tested-by: Alison Schofield <alison.schofield@intel.com> Cc: Alexander Gordeev <agordeev@linux.ibm.com> Cc: Asahi Lina <lina@asahilina.net> Cc: Bjorn Helgaas <bhelgaas@google.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Christian Borntraeger <borntraeger@linux.ibm.com> Cc: Christoph Hellwig <hch@lst.de> Cc: Chunyan Zhang <zhang.lyra@gmail.com> Cc: "Darrick J. Wong" <djwong@kernel.org> Cc: Dave Chinner <david@fromorbit.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: Dave Jiang <dave.jiang@intel.com> Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com> Cc: Heiko Carstens <hca@linux.ibm.com> Cc: Huacai Chen <chenhuacai@kernel.org> Cc: Ira Weiny <ira.weiny@intel.com> Cc: Jan Kara <jack@suse.cz> Cc: Jason Gunthorpe <jgg@ziepe.ca> Cc: John Hubbard <jhubbard@nvidia.com> Cc: linmiaohe <linmiaohe@huawei.com> Cc: Logan Gunthorpe <logang@deltatee.com> Cc: Matthew Wilcow (Oracle) <willy@infradead.org> Cc: Michael "Camp Drill Sergeant" Ellerman <mpe@ellerman.id.au> Cc: Nicholas Piggin <npiggin@gmail.com> Cc: Peter Xu <peterx@redhat.com> Cc: Sven Schnelle <svens@linux.ibm.com> Cc: Ted Ts'o <tytso@mit.edu> Cc: Vasily Gorbik <gor@linux.ibm.com> Cc: Vishal Verma <vishal.l.verma@intel.com> Cc: Vivek Goyal <vgoyal@redhat.com> Cc: WANG Xuerui <kernel@xen0n.name> Cc: Will Deacon <will@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
1 parent b7e2823 commit 82ba975

File tree

13 files changed

+56
-29
lines changed

13 files changed

+56
-29
lines changed

drivers/gpu/drm/nouveau/nouveau_dmem.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,8 @@ struct nouveau_dmem {
8888

8989
static struct nouveau_dmem_chunk *nouveau_page_to_chunk(struct page *page)
9090
{
91-
return container_of(page->pgmap, struct nouveau_dmem_chunk, pagemap);
91+
return container_of(page_pgmap(page), struct nouveau_dmem_chunk,
92+
pagemap);
9293
}
9394

9495
static struct nouveau_drm *page_to_drm(struct page *page)

drivers/pci/p2pdma.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,7 @@ static const struct attribute_group p2pmem_group = {
202202

203203
static void p2pdma_page_free(struct page *page)
204204
{
205-
struct pci_p2pdma_pagemap *pgmap = to_p2p_pgmap(page->pgmap);
205+
struct pci_p2pdma_pagemap *pgmap = to_p2p_pgmap(page_pgmap(page));
206206
/* safe to dereference while a reference is held to the percpu ref */
207207
struct pci_p2pdma *p2pdma =
208208
rcu_dereference_protected(pgmap->provider->p2pdma, 1);
@@ -1025,8 +1025,8 @@ enum pci_p2pdma_map_type
10251025
pci_p2pdma_map_segment(struct pci_p2pdma_map_state *state, struct device *dev,
10261026
struct scatterlist *sg)
10271027
{
1028-
if (state->pgmap != sg_page(sg)->pgmap) {
1029-
state->pgmap = sg_page(sg)->pgmap;
1028+
if (state->pgmap != page_pgmap(sg_page(sg))) {
1029+
state->pgmap = page_pgmap(sg_page(sg));
10301030
state->map = pci_p2pdma_map_type(state->pgmap, dev);
10311031
state->bus_off = to_p2p_pgmap(state->pgmap)->bus_offset;
10321032
}

include/linux/memremap.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ static inline bool is_device_private_page(const struct page *page)
161161
{
162162
return IS_ENABLED(CONFIG_DEVICE_PRIVATE) &&
163163
is_zone_device_page(page) &&
164-
page->pgmap->type == MEMORY_DEVICE_PRIVATE;
164+
page_pgmap(page)->type == MEMORY_DEVICE_PRIVATE;
165165
}
166166

167167
static inline bool folio_is_device_private(const struct folio *folio)
@@ -173,13 +173,13 @@ static inline bool is_pci_p2pdma_page(const struct page *page)
173173
{
174174
return IS_ENABLED(CONFIG_PCI_P2PDMA) &&
175175
is_zone_device_page(page) &&
176-
page->pgmap->type == MEMORY_DEVICE_PCI_P2PDMA;
176+
page_pgmap(page)->type == MEMORY_DEVICE_PCI_P2PDMA;
177177
}
178178

179179
static inline bool is_device_coherent_page(const struct page *page)
180180
{
181181
return is_zone_device_page(page) &&
182-
page->pgmap->type == MEMORY_DEVICE_COHERENT;
182+
page_pgmap(page)->type == MEMORY_DEVICE_COHERENT;
183183
}
184184

185185
static inline bool folio_is_device_coherent(const struct folio *folio)

include/linux/migrate.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -205,8 +205,8 @@ struct migrate_vma {
205205
unsigned long end;
206206

207207
/*
208-
* Set to the owner value also stored in page->pgmap->owner for
209-
* migrating out of device private memory. The flags also need to
208+
* Set to the owner value also stored in page_pgmap(page)->owner
209+
* for migrating out of device private memory. The flags also need to
210210
* be set to MIGRATE_VMA_SELECT_DEVICE_PRIVATE.
211211
* The caller should always set this field when using mmu notifier
212212
* callbacks to avoid device MMU invalidations for device private

include/linux/mm_types.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,8 +130,11 @@ struct page {
130130
unsigned long compound_head; /* Bit zero is set */
131131
};
132132
struct { /* ZONE_DEVICE pages */
133-
/** @pgmap: Points to the hosting device page map. */
134-
struct dev_pagemap *pgmap;
133+
/*
134+
* The first word is used for compound_head or folio
135+
* pgmap
136+
*/
137+
void *_unused_pgmap_compound_head;
135138
void *zone_device_data;
136139
/*
137140
* ZONE_DEVICE private pages are counted as being
@@ -300,6 +303,7 @@ typedef struct {
300303
* @_refcount: Do not access this member directly. Use folio_ref_count()
301304
* to find how many references there are to this folio.
302305
* @memcg_data: Memory Control Group data.
306+
* @pgmap: Metadata for ZONE_DEVICE mappings
303307
* @virtual: Virtual address in the kernel direct map.
304308
* @_last_cpupid: IDs of last CPU and last process that accessed the folio.
305309
* @_entire_mapcount: Do not use directly, call folio_entire_mapcount().
@@ -338,6 +342,7 @@ struct folio {
338342
/* private: */
339343
};
340344
/* public: */
345+
struct dev_pagemap *pgmap;
341346
};
342347
struct address_space *mapping;
343348
pgoff_t index;

include/linux/mmzone.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1158,6 +1158,12 @@ static inline bool is_zone_device_page(const struct page *page)
11581158
return page_zonenum(page) == ZONE_DEVICE;
11591159
}
11601160

1161+
static inline struct dev_pagemap *page_pgmap(const struct page *page)
1162+
{
1163+
VM_WARN_ON_ONCE_PAGE(!is_zone_device_page(page), page);
1164+
return page_folio(page)->pgmap;
1165+
}
1166+
11611167
/*
11621168
* Consecutive zone device pages should not be merged into the same sgl
11631169
* or bvec segment with other types of pages or if they belong to different
@@ -1173,7 +1179,7 @@ static inline bool zone_device_pages_have_same_pgmap(const struct page *a,
11731179
return false;
11741180
if (!is_zone_device_page(a))
11751181
return true;
1176-
return a->pgmap == b->pgmap;
1182+
return page_pgmap(a) == page_pgmap(b);
11771183
}
11781184

11791185
extern void memmap_init_zone_device(struct zone *, unsigned long,
@@ -1188,6 +1194,10 @@ static inline bool zone_device_pages_have_same_pgmap(const struct page *a,
11881194
{
11891195
return true;
11901196
}
1197+
static inline struct dev_pagemap *page_pgmap(const struct page *page)
1198+
{
1199+
return NULL;
1200+
}
11911201
#endif
11921202

11931203
static inline bool folio_is_zone_device(const struct folio *folio)

lib/test_hmm.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,8 @@ static int dmirror_fops_release(struct inode *inode, struct file *filp)
195195

196196
static struct dmirror_chunk *dmirror_page_to_chunk(struct page *page)
197197
{
198-
return container_of(page->pgmap, struct dmirror_chunk, pagemap);
198+
return container_of(page_pgmap(page), struct dmirror_chunk,
199+
pagemap);
199200
}
200201

201202
static struct dmirror_device *dmirror_page_to_device(struct page *page)

mm/hmm.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -248,7 +248,7 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
248248
* just report the PFN.
249249
*/
250250
if (is_device_private_entry(entry) &&
251-
pfn_swap_entry_to_page(entry)->pgmap->owner ==
251+
page_pgmap(pfn_swap_entry_to_page(entry))->owner ==
252252
range->dev_private_owner) {
253253
cpu_flags = HMM_PFN_VALID;
254254
if (is_writable_device_private_entry(entry))

mm/memory.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4338,6 +4338,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
43384338
vmf->page = pfn_swap_entry_to_page(entry);
43394339
ret = remove_device_exclusive_entry(vmf);
43404340
} else if (is_device_private_entry(entry)) {
4341+
struct dev_pagemap *pgmap;
43414342
if (vmf->flags & FAULT_FLAG_VMA_LOCK) {
43424343
/*
43434344
* migrate_to_ram is not yet ready to operate
@@ -4362,7 +4363,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
43624363
*/
43634364
get_page(vmf->page);
43644365
pte_unmap_unlock(vmf->pte, vmf->ptl);
4365-
ret = vmf->page->pgmap->ops->migrate_to_ram(vmf);
4366+
pgmap = page_pgmap(vmf->page);
4367+
ret = pgmap->ops->migrate_to_ram(vmf);
43664368
put_page(vmf->page);
43674369
} else if (is_hwpoison_entry(entry)) {
43684370
ret = VM_FAULT_HWPOISON;

mm/memremap.c

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -458,8 +458,8 @@ EXPORT_SYMBOL_GPL(get_dev_pagemap);
458458

459459
void free_zone_device_folio(struct folio *folio)
460460
{
461-
if (WARN_ON_ONCE(!folio->page.pgmap->ops ||
462-
!folio->page.pgmap->ops->page_free))
461+
if (WARN_ON_ONCE(!folio->pgmap->ops ||
462+
!folio->pgmap->ops->page_free))
463463
return;
464464

465465
mem_cgroup_uncharge(folio);
@@ -486,12 +486,12 @@ void free_zone_device_folio(struct folio *folio)
486486
* to clear folio->mapping.
487487
*/
488488
folio->mapping = NULL;
489-
folio->page.pgmap->ops->page_free(folio_page(folio, 0));
489+
folio->pgmap->ops->page_free(folio_page(folio, 0));
490490

491-
switch (folio->page.pgmap->type) {
491+
switch (folio->pgmap->type) {
492492
case MEMORY_DEVICE_PRIVATE:
493493
case MEMORY_DEVICE_COHERENT:
494-
put_dev_pagemap(folio->page.pgmap);
494+
put_dev_pagemap(folio->pgmap);
495495
break;
496496

497497
case MEMORY_DEVICE_FS_DAX:
@@ -514,7 +514,7 @@ void zone_device_page_init(struct page *page)
514514
* Drivers shouldn't be allocating pages after calling
515515
* memunmap_pages().
516516
*/
517-
WARN_ON_ONCE(!percpu_ref_tryget_live(&page->pgmap->ref));
517+
WARN_ON_ONCE(!percpu_ref_tryget_live(&page_pgmap(page)->ref));
518518
set_page_count(page, 1);
519519
lock_page(page);
520520
}
@@ -523,7 +523,7 @@ EXPORT_SYMBOL_GPL(zone_device_page_init);
523523
#ifdef CONFIG_FS_DAX
524524
bool __put_devmap_managed_folio_refs(struct folio *folio, int refs)
525525
{
526-
if (folio->page.pgmap->type != MEMORY_DEVICE_FS_DAX)
526+
if (folio->pgmap->type != MEMORY_DEVICE_FS_DAX)
527527
return false;
528528

529529
/*

0 commit comments

Comments
 (0)