Skip to content

Commit 95920c2

Browse files
tohojokuba-moo
authored andcommitted
page_pool: Fix PP_MAGIC_MASK to avoid crashing on some 32-bit arches
Helge reported that the introduction of PP_MAGIC_MASK let to crashes on boot on his 32-bit parisc machine. The cause of this is the mask is set too wide, so the page_pool_page_is_pp() incurs false positives which crashes the machine. Just disabling the check in page_pool_is_pp() will lead to the page_pool code itself malfunctioning; so instead of doing this, this patch changes the define for PP_DMA_INDEX_BITS to avoid mistaking arbitrary kernel pointers for page_pool-tagged pages. The fix relies on the kernel pointers that alias with the pp_magic field always being above PAGE_OFFSET. With this assumption, we can use the lowest bit of the value of PAGE_OFFSET as the upper bound of the PP_DMA_INDEX_MASK, which should avoid the false positives. Because we cannot rely on PAGE_OFFSET always being a compile-time constant, nor on it always being >0, we fall back to disabling the dma_index storage when there are not enough bits available. This leaves us in the situation we were in before the patch in the Fixes tag, but only on a subset of architecture configurations. This seems to be the best we can do until the transition to page types in complete for page_pool pages. v2: - Make sure there's at least 8 bits available and that the PAGE_OFFSET bit calculation doesn't wrap Link: https://lore.kernel.org/all/aMNJMFa5fDalFmtn@p100/ Fixes: ee62ce7 ("page_pool: Track DMA-mapped pages and unmap them when destroying the pool") Cc: stable@vger.kernel.org # 6.15+ Tested-by: Helge Deller <deller@gmx.de> Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com> Reviewed-by: Mina Almasry <almasrymina@google.com> Tested-by: Helge Deller <deller@gmx.de> Link: https://patch.msgid.link/20250930114331.675412-1-toke@redhat.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
1 parent 521405c commit 95920c2

File tree

2 files changed

+66
-32
lines changed

2 files changed

+66
-32
lines changed

include/linux/mm.h

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4159,14 +4159,13 @@ int arch_lock_shadow_stack_status(struct task_struct *t, unsigned long status);
41594159
* since this value becomes part of PP_SIGNATURE; meaning we can just use the
41604160
* space between the PP_SIGNATURE value (without POISON_POINTER_DELTA), and the
41614161
* lowest bits of POISON_POINTER_DELTA. On arches where POISON_POINTER_DELTA is
4162-
* 0, we make sure that we leave the two topmost bits empty, as that guarantees
4163-
* we won't mistake a valid kernel pointer for a value we set, regardless of the
4164-
* VMSPLIT setting.
4162+
* 0, we use the lowest bit of PAGE_OFFSET as the boundary if that value is
4163+
* known at compile-time.
41654164
*
4166-
* Altogether, this means that the number of bits available is constrained by
4167-
* the size of an unsigned long (at the upper end, subtracting two bits per the
4168-
* above), and the definition of PP_SIGNATURE (with or without
4169-
* POISON_POINTER_DELTA).
4165+
* If the value of PAGE_OFFSET is not known at compile time, or if it is too
4166+
* small to leave at least 8 bits available above PP_SIGNATURE, we define the
4167+
* number of bits to be 0, which turns off the DMA index tracking altogether
4168+
* (see page_pool_register_dma_index()).
41704169
*/
41714170
#define PP_DMA_INDEX_SHIFT (1 + __fls(PP_SIGNATURE - POISON_POINTER_DELTA))
41724171
#if POISON_POINTER_DELTA > 0
@@ -4175,8 +4174,13 @@ int arch_lock_shadow_stack_status(struct task_struct *t, unsigned long status);
41754174
*/
41764175
#define PP_DMA_INDEX_BITS MIN(32, __ffs(POISON_POINTER_DELTA) - PP_DMA_INDEX_SHIFT)
41774176
#else
4178-
/* Always leave out the topmost two; see above. */
4179-
#define PP_DMA_INDEX_BITS MIN(32, BITS_PER_LONG - PP_DMA_INDEX_SHIFT - 2)
4177+
/* Use the lowest bit of PAGE_OFFSET if there's at least 8 bits available; see above */
4178+
#define PP_DMA_INDEX_MIN_OFFSET (1 << (PP_DMA_INDEX_SHIFT + 8))
4179+
#define PP_DMA_INDEX_BITS ((__builtin_constant_p(PAGE_OFFSET) && \
4180+
PAGE_OFFSET >= PP_DMA_INDEX_MIN_OFFSET && \
4181+
!(PAGE_OFFSET & (PP_DMA_INDEX_MIN_OFFSET - 1))) ? \
4182+
MIN(32, __ffs(PAGE_OFFSET) - PP_DMA_INDEX_SHIFT) : 0)
4183+
41804184
#endif
41814185

41824186
#define PP_DMA_INDEX_MASK GENMASK(PP_DMA_INDEX_BITS + PP_DMA_INDEX_SHIFT - 1, \

net/core/page_pool.c

Lines changed: 53 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -468,11 +468,60 @@ page_pool_dma_sync_for_device(const struct page_pool *pool,
468468
}
469469
}
470470

471+
static int page_pool_register_dma_index(struct page_pool *pool,
472+
netmem_ref netmem, gfp_t gfp)
473+
{
474+
int err = 0;
475+
u32 id;
476+
477+
if (unlikely(!PP_DMA_INDEX_BITS))
478+
goto out;
479+
480+
if (in_softirq())
481+
err = xa_alloc(&pool->dma_mapped, &id, netmem_to_page(netmem),
482+
PP_DMA_INDEX_LIMIT, gfp);
483+
else
484+
err = xa_alloc_bh(&pool->dma_mapped, &id, netmem_to_page(netmem),
485+
PP_DMA_INDEX_LIMIT, gfp);
486+
if (err) {
487+
WARN_ONCE(err != -ENOMEM, "couldn't track DMA mapping, please report to netdev@");
488+
goto out;
489+
}
490+
491+
netmem_set_dma_index(netmem, id);
492+
out:
493+
return err;
494+
}
495+
496+
static int page_pool_release_dma_index(struct page_pool *pool,
497+
netmem_ref netmem)
498+
{
499+
struct page *old, *page = netmem_to_page(netmem);
500+
unsigned long id;
501+
502+
if (unlikely(!PP_DMA_INDEX_BITS))
503+
return 0;
504+
505+
id = netmem_get_dma_index(netmem);
506+
if (!id)
507+
return -1;
508+
509+
if (in_softirq())
510+
old = xa_cmpxchg(&pool->dma_mapped, id, page, NULL, 0);
511+
else
512+
old = xa_cmpxchg_bh(&pool->dma_mapped, id, page, NULL, 0);
513+
if (old != page)
514+
return -1;
515+
516+
netmem_set_dma_index(netmem, 0);
517+
518+
return 0;
519+
}
520+
471521
static bool page_pool_dma_map(struct page_pool *pool, netmem_ref netmem, gfp_t gfp)
472522
{
473523
dma_addr_t dma;
474524
int err;
475-
u32 id;
476525

477526
/* Setup DMA mapping: use 'struct page' area for storing DMA-addr
478527
* since dma_addr_t can be either 32 or 64 bits and does not always fit
@@ -491,18 +540,10 @@ static bool page_pool_dma_map(struct page_pool *pool, netmem_ref netmem, gfp_t g
491540
goto unmap_failed;
492541
}
493542

494-
if (in_softirq())
495-
err = xa_alloc(&pool->dma_mapped, &id, netmem_to_page(netmem),
496-
PP_DMA_INDEX_LIMIT, gfp);
497-
else
498-
err = xa_alloc_bh(&pool->dma_mapped, &id, netmem_to_page(netmem),
499-
PP_DMA_INDEX_LIMIT, gfp);
500-
if (err) {
501-
WARN_ONCE(err != -ENOMEM, "couldn't track DMA mapping, please report to netdev@");
543+
err = page_pool_register_dma_index(pool, netmem, gfp);
544+
if (err)
502545
goto unset_failed;
503-
}
504546

505-
netmem_set_dma_index(netmem, id);
506547
page_pool_dma_sync_for_device(pool, netmem, pool->p.max_len);
507548

508549
return true;
@@ -680,8 +721,6 @@ void page_pool_clear_pp_info(netmem_ref netmem)
680721
static __always_inline void __page_pool_release_netmem_dma(struct page_pool *pool,
681722
netmem_ref netmem)
682723
{
683-
struct page *old, *page = netmem_to_page(netmem);
684-
unsigned long id;
685724
dma_addr_t dma;
686725

687726
if (!pool->dma_map)
@@ -690,15 +729,7 @@ static __always_inline void __page_pool_release_netmem_dma(struct page_pool *poo
690729
*/
691730
return;
692731

693-
id = netmem_get_dma_index(netmem);
694-
if (!id)
695-
return;
696-
697-
if (in_softirq())
698-
old = xa_cmpxchg(&pool->dma_mapped, id, page, NULL, 0);
699-
else
700-
old = xa_cmpxchg_bh(&pool->dma_mapped, id, page, NULL, 0);
701-
if (old != page)
732+
if (page_pool_release_dma_index(pool, netmem))
702733
return;
703734

704735
dma = page_pool_get_dma_addr_netmem(netmem);
@@ -708,7 +739,6 @@ static __always_inline void __page_pool_release_netmem_dma(struct page_pool *poo
708739
PAGE_SIZE << pool->p.order, pool->p.dma_dir,
709740
DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
710741
page_pool_set_dma_addr_netmem(netmem, 0);
711-
netmem_set_dma_index(netmem, 0);
712742
}
713743

714744
/* Disconnects a page (from a page_pool). API users can have a need

0 commit comments

Comments
 (0)