|
| 1 | +page_pool: Move pp_magic check into helper functions |
| 2 | + |
| 3 | +jira LE-3187 |
| 4 | +Rebuild_History Non-Buildable kernel-5.14.0-570.19.1.el9_6 |
| 5 | +commit-author Toke Høiland-Jørgensen <toke@redhat.com> |
| 6 | +commit cd3c93167da0e760b5819246eae7a4ea30fd014b |
| 7 | +Empty-Commit: Cherry-Pick Conflicts during history rebuild. |
| 8 | +Will be included in final tarball splat. Ref for failed cherry-pick at: |
| 9 | +ciq/ciq_backports/kernel-5.14.0-570.19.1.el9_6/cd3c9316.failed |
| 10 | + |
| 11 | +Since we are about to stash some more information into the pp_magic |
| 12 | +field, let's move the magic signature checks into a pair of helper |
| 13 | +functions so it can be changed in one place. |
| 14 | + |
| 15 | + Reviewed-by: Mina Almasry <almasrymina@google.com> |
| 16 | + Tested-by: Yonglong Liu <liuyonglong@huawei.com> |
| 17 | + Acked-by: Jesper Dangaard Brouer <hawk@kernel.org> |
| 18 | + Reviewed-by: Ilias Apalodimas <ilias.apalodimas@linaro.org> |
| 19 | + Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com> |
| 20 | +Link: https://patch.msgid.link/20250409-page-pool-track-dma-v9-1-6a9ef2e0cba8@redhat.com |
| 21 | + Signed-off-by: Jakub Kicinski <kuba@kernel.org> |
| 22 | +(cherry picked from commit cd3c93167da0e760b5819246eae7a4ea30fd014b) |
| 23 | + Signed-off-by: Jonathan Maple <jmaple@ciq.com> |
| 24 | + |
| 25 | +# Conflicts: |
| 26 | +# include/linux/mm.h |
| 27 | +# mm/page_alloc.c |
| 28 | +# net/core/netmem_priv.h |
| 29 | +# net/core/skbuff.c |
| 30 | +diff --cc include/linux/mm.h |
| 31 | +index 5d9b789ea0a7,56c47f4a38ca..000000000000 |
| 32 | +--- a/include/linux/mm.h |
| 33 | ++++ b/include/linux/mm.h |
| 34 | +@@@ -4138,9 -4198,74 +4138,79 @@@ static inline void accept_memory(phys_a |
| 35 | + |
| 36 | + static inline bool pfn_is_unaccepted_memory(unsigned long pfn) |
| 37 | + { |
| 38 | + - return range_contains_unaccepted_memory(pfn << PAGE_SHIFT, PAGE_SIZE); |
| 39 | + + phys_addr_t paddr = pfn << PAGE_SHIFT; |
| 40 | + + |
| 41 | + + return range_contains_unaccepted_memory(paddr, paddr + PAGE_SIZE); |
| 42 | + } |
| 43 | + |
| 44 | +++<<<<<<< HEAD |
| 45 | +++======= |
| 46 | ++ void vma_pgtable_walk_begin(struct vm_area_struct *vma); |
| 47 | ++ void vma_pgtable_walk_end(struct vm_area_struct *vma); |
| 48 | ++ |
| 49 | ++ int reserve_mem_find_by_name(const char *name, phys_addr_t *start, phys_addr_t *size); |
| 50 | ++ int reserve_mem_release_by_name(const char *name); |
| 51 | ++ |
| 52 | ++ #ifdef CONFIG_64BIT |
| 53 | ++ int do_mseal(unsigned long start, size_t len_in, unsigned long flags); |
| 54 | ++ #else |
| 55 | ++ static inline int do_mseal(unsigned long start, size_t len_in, unsigned long flags) |
| 56 | ++ { |
| 57 | ++ /* noop on 32 bit */ |
| 58 | ++ return 0; |
| 59 | ++ } |
| 60 | ++ #endif |
| 61 | ++ |
| 62 | ++ /* |
| 63 | ++ * user_alloc_needs_zeroing checks if a user folio from page allocator needs to |
| 64 | ++ * be zeroed or not. |
| 65 | ++ */ |
| 66 | ++ static inline bool user_alloc_needs_zeroing(void) |
| 67 | ++ { |
| 68 | ++ /* |
| 69 | ++ * for user folios, arch with cache aliasing requires cache flush and |
| 70 | ++ * arc changes folio->flags to make icache coherent with dcache, so |
| 71 | ++ * always return false to make caller use |
| 72 | ++ * clear_user_page()/clear_user_highpage(). |
| 73 | ++ */ |
| 74 | ++ return cpu_dcache_is_aliasing() || cpu_icache_is_aliasing() || |
| 75 | ++ !static_branch_maybe(CONFIG_INIT_ON_ALLOC_DEFAULT_ON, |
| 76 | ++ &init_on_alloc); |
| 77 | ++ } |
| 78 | ++ |
| 79 | ++ int arch_get_shadow_stack_status(struct task_struct *t, unsigned long __user *status); |
| 80 | ++ int arch_set_shadow_stack_status(struct task_struct *t, unsigned long status); |
| 81 | ++ int arch_lock_shadow_stack_status(struct task_struct *t, unsigned long status); |
| 82 | ++ |
| 83 | ++ |
| 84 | ++ /* |
| 85 | ++ * mseal of userspace process's system mappings. |
| 86 | ++ */ |
| 87 | ++ #ifdef CONFIG_MSEAL_SYSTEM_MAPPINGS |
| 88 | ++ #define VM_SEALED_SYSMAP VM_SEALED |
| 89 | ++ #else |
| 90 | ++ #define VM_SEALED_SYSMAP VM_NONE |
| 91 | ++ #endif |
| 92 | ++ |
| 93 | ++ /* Mask used for checking in page_pool_page_is_pp() below. page->pp_magic is |
| 94 | ++ * OR'ed with PP_SIGNATURE after the allocation in order to preserve bit 0 for |
| 95 | ++ * the head page of compound page and bit 1 for pfmemalloc page. |
| 96 | ++ * page_is_pfmemalloc() is checked in __page_pool_put_page() to avoid recycling |
| 97 | ++ * the pfmemalloc page. |
| 98 | ++ */ |
| 99 | ++ #define PP_MAGIC_MASK ~0x3UL |
| 100 | ++ |
| 101 | ++ #ifdef CONFIG_PAGE_POOL |
| 102 | ++ static inline bool page_pool_page_is_pp(struct page *page) |
| 103 | ++ { |
| 104 | ++ return (page->pp_magic & PP_MAGIC_MASK) == PP_SIGNATURE; |
| 105 | ++ } |
| 106 | ++ #else |
| 107 | ++ static inline bool page_pool_page_is_pp(struct page *page) |
| 108 | ++ { |
| 109 | ++ return false; |
| 110 | ++ } |
| 111 | ++ #endif |
| 112 | ++ |
| 113 | +++>>>>>>> cd3c93167da0 (page_pool: Move pp_magic check into helper functions) |
| 114 | + #endif /* _LINUX_MM_H */ |
| 115 | +diff --cc mm/page_alloc.c |
| 116 | +index aa2cbab0e18e,a18340b32218..000000000000 |
| 117 | +--- a/mm/page_alloc.c |
| 118 | ++++ b/mm/page_alloc.c |
| 119 | +@@@ -919,6 -897,7 +919,10 @@@ static inline bool page_expected_state( |
| 120 | + #ifdef CONFIG_MEMCG |
| 121 | + page->memcg_data | |
| 122 | + #endif |
| 123 | +++<<<<<<< HEAD |
| 124 | +++======= |
| 125 | ++ page_pool_page_is_pp(page) | |
| 126 | +++>>>>>>> cd3c93167da0 (page_pool: Move pp_magic check into helper functions) |
| 127 | + (page->flags & check_flags))) |
| 128 | + return false; |
| 129 | + |
| 130 | +@@@ -945,6 -924,8 +949,11 @@@ static const char *page_bad_reason(stru |
| 131 | + if (unlikely(page->memcg_data)) |
| 132 | + bad_reason = "page still charged to cgroup"; |
| 133 | + #endif |
| 134 | +++<<<<<<< HEAD |
| 135 | +++======= |
| 136 | ++ if (unlikely(page_pool_page_is_pp(page))) |
| 137 | ++ bad_reason = "page_pool leak"; |
| 138 | +++>>>>>>> cd3c93167da0 (page_pool: Move pp_magic check into helper functions) |
| 139 | + return bad_reason; |
| 140 | + } |
| 141 | + |
| 142 | +diff --cc net/core/skbuff.c |
| 143 | +index ad137d532882,74a2d886a35b..000000000000 |
| 144 | +--- a/net/core/skbuff.c |
| 145 | ++++ b/net/core/skbuff.c |
| 146 | +@@@ -855,22 -893,107 +855,32 @@@ static void skb_clone_fraglist(struct s |
| 147 | + skb_get(list); |
| 148 | + } |
| 149 | + |
| 150 | +++<<<<<<< HEAD |
| 151 | +++======= |
| 152 | ++ int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb, |
| 153 | ++ unsigned int headroom) |
| 154 | ++ { |
| 155 | +++>>>>>>> cd3c93167da0 (page_pool: Move pp_magic check into helper functions) |
| 156 | + #if IS_ENABLED(CONFIG_PAGE_POOL) |
| 157 | + - u32 size, truesize, len, max_head_size, off; |
| 158 | + - struct sk_buff *skb = *pskb, *nskb; |
| 159 | + - int err, i, head_off; |
| 160 | + - void *data; |
| 161 | + - |
| 162 | + - /* XDP does not support fraglist so we need to linearize |
| 163 | + - * the skb. |
| 164 | + - */ |
| 165 | + - if (skb_has_frag_list(skb)) |
| 166 | + - return -EOPNOTSUPP; |
| 167 | + - |
| 168 | + - max_head_size = SKB_WITH_OVERHEAD(PAGE_SIZE - headroom); |
| 169 | + - if (skb->len > max_head_size + MAX_SKB_FRAGS * PAGE_SIZE) |
| 170 | + - return -ENOMEM; |
| 171 | + - |
| 172 | + - size = min_t(u32, skb->len, max_head_size); |
| 173 | + - truesize = SKB_HEAD_ALIGN(size) + headroom; |
| 174 | + - data = page_pool_dev_alloc_va(pool, &truesize); |
| 175 | + - if (!data) |
| 176 | + - return -ENOMEM; |
| 177 | + - |
| 178 | + - nskb = napi_build_skb(data, truesize); |
| 179 | + - if (!nskb) { |
| 180 | + - page_pool_free_va(pool, data, true); |
| 181 | + - return -ENOMEM; |
| 182 | + - } |
| 183 | + - |
| 184 | + - skb_reserve(nskb, headroom); |
| 185 | + - skb_copy_header(nskb, skb); |
| 186 | + - skb_mark_for_recycle(nskb); |
| 187 | + - |
| 188 | + - err = skb_copy_bits(skb, 0, nskb->data, size); |
| 189 | + - if (err) { |
| 190 | + - consume_skb(nskb); |
| 191 | + - return err; |
| 192 | + - } |
| 193 | + - skb_put(nskb, size); |
| 194 | + - |
| 195 | + - head_off = skb_headroom(nskb) - skb_headroom(skb); |
| 196 | + - skb_headers_offset_update(nskb, head_off); |
| 197 | + - |
| 198 | + - off = size; |
| 199 | + - len = skb->len - off; |
| 200 | + - for (i = 0; i < MAX_SKB_FRAGS && off < skb->len; i++) { |
| 201 | + - struct page *page; |
| 202 | + - u32 page_off; |
| 203 | + - |
| 204 | + - size = min_t(u32, len, PAGE_SIZE); |
| 205 | + - truesize = size; |
| 206 | + - |
| 207 | + - page = page_pool_dev_alloc(pool, &page_off, &truesize); |
| 208 | + - if (!page) { |
| 209 | + - consume_skb(nskb); |
| 210 | + - return -ENOMEM; |
| 211 | + - } |
| 212 | + - |
| 213 | + - skb_add_rx_frag(nskb, i, page, page_off, size, truesize); |
| 214 | + - err = skb_copy_bits(skb, off, page_address(page) + page_off, |
| 215 | + - size); |
| 216 | + - if (err) { |
| 217 | + - consume_skb(nskb); |
| 218 | + - return err; |
| 219 | + - } |
| 220 | + - |
| 221 | + - len -= size; |
| 222 | + - off += size; |
| 223 | + - } |
| 224 | + - |
| 225 | + - consume_skb(skb); |
| 226 | + - *pskb = nskb; |
| 227 | + - |
| 228 | + - return 0; |
| 229 | + -#else |
| 230 | + - return -EOPNOTSUPP; |
| 231 | + -#endif |
| 232 | + -} |
| 233 | + -EXPORT_SYMBOL(skb_pp_cow_data); |
| 234 | + - |
| 235 | + -int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb, |
| 236 | + - const struct bpf_prog *prog) |
| 237 | + -{ |
| 238 | + - if (!prog->aux->xdp_has_frags) |
| 239 | + - return -EINVAL; |
| 240 | + - |
| 241 | + - return skb_pp_cow_data(pool, pskb, XDP_PACKET_HEADROOM); |
| 242 | + -} |
| 243 | + -EXPORT_SYMBOL(skb_cow_data_for_xdp); |
| 244 | + - |
| 245 | + -#if IS_ENABLED(CONFIG_PAGE_POOL) |
| 246 | + -bool napi_pp_put_page(netmem_ref netmem) |
| 247 | + +bool napi_pp_put_page(struct page *page) |
| 248 | + { |
| 249 | + - netmem = netmem_compound_head(netmem); |
| 250 | + + page = compound_head(page); |
| 251 | + |
| 252 | +++<<<<<<< HEAD |
| 253 | + + /* page->pp_magic is OR'ed with PP_SIGNATURE after the allocation |
| 254 | + + * in order to preserve any existing bits, such as bit 0 for the |
| 255 | + + * head page of compound page and bit 1 for pfmemalloc page, so |
| 256 | + + * mask those bits for freeing side when doing below checking, |
| 257 | + + * and page_is_pfmemalloc() is checked in __page_pool_put_page() |
| 258 | + + * to avoid recycling the pfmemalloc page. |
| 259 | + + */ |
| 260 | + + if (unlikely((page->pp_magic & ~0x3UL) != PP_SIGNATURE)) |
| 261 | +++======= |
| 262 | ++ if (unlikely(!netmem_is_pp(netmem))) |
| 263 | +++>>>>>>> cd3c93167da0 (page_pool: Move pp_magic check into helper functions) |
| 264 | + return false; |
| 265 | + |
| 266 | + - page_pool_put_full_netmem(netmem_get_pp(netmem), netmem, false); |
| 267 | + + page_pool_put_full_page(page->pp, page, false); |
| 268 | + |
| 269 | + return true; |
| 270 | + } |
| 271 | +@@@ -881,7 -1004,46 +891,50 @@@ static bool skb_pp_recycle(struct sk_bu |
| 272 | + { |
| 273 | + if (!IS_ENABLED(CONFIG_PAGE_POOL) || !skb->pp_recycle) |
| 274 | + return false; |
| 275 | +++<<<<<<< HEAD |
| 276 | + + return napi_pp_put_page(virt_to_page(data)); |
| 277 | +++======= |
| 278 | ++ return napi_pp_put_page(page_to_netmem(virt_to_page(data))); |
| 279 | ++ } |
| 280 | ++ |
| 281 | ++ /** |
| 282 | ++ * skb_pp_frag_ref() - Increase fragment references of a page pool aware skb |
| 283 | ++ * @skb: page pool aware skb |
| 284 | ++ * |
| 285 | ++ * Increase the fragment reference count (pp_ref_count) of a skb. This is |
| 286 | ++ * intended to gain fragment references only for page pool aware skbs, |
| 287 | ++ * i.e. when skb->pp_recycle is true, and not for fragments in a |
| 288 | ++ * non-pp-recycling skb. It has a fallback to increase references on normal |
| 289 | ++ * pages, as page pool aware skbs may also have normal page fragments. |
| 290 | ++ */ |
| 291 | ++ static int skb_pp_frag_ref(struct sk_buff *skb) |
| 292 | ++ { |
| 293 | ++ struct skb_shared_info *shinfo; |
| 294 | ++ netmem_ref head_netmem; |
| 295 | ++ int i; |
| 296 | ++ |
| 297 | ++ if (!skb->pp_recycle) |
| 298 | ++ return -EINVAL; |
| 299 | ++ |
| 300 | ++ shinfo = skb_shinfo(skb); |
| 301 | ++ |
| 302 | ++ for (i = 0; i < shinfo->nr_frags; i++) { |
| 303 | ++ head_netmem = netmem_compound_head(shinfo->frags[i].netmem); |
| 304 | ++ if (likely(netmem_is_pp(head_netmem))) |
| 305 | ++ page_pool_ref_netmem(head_netmem); |
| 306 | ++ else |
| 307 | ++ page_ref_inc(netmem_to_page(head_netmem)); |
| 308 | ++ } |
| 309 | ++ return 0; |
| 310 | ++ } |
| 311 | ++ |
| 312 | ++ static void skb_kfree_head(void *head, unsigned int end_offset) |
| 313 | ++ { |
| 314 | ++ if (end_offset == SKB_SMALL_HEAD_HEADROOM) |
| 315 | ++ kmem_cache_free(net_hotdata.skb_small_head_cache, head); |
| 316 | ++ else |
| 317 | ++ kfree(head); |
| 318 | +++>>>>>>> cd3c93167da0 (page_pool: Move pp_magic check into helper functions) |
| 319 | + } |
| 320 | + |
| 321 | + static void skb_free_head(struct sk_buff *skb) |
| 322 | +* Unmerged path net/core/netmem_priv.h |
| 323 | +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c |
| 324 | +index bb7a60a3251b..9ac1f586b17b 100644 |
| 325 | +--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c |
| 326 | ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c |
| 327 | +@@ -655,8 +655,8 @@ static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq, |
| 328 | + xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo); |
| 329 | + page = xdpi.page.page; |
| 330 | + |
| 331 | +- /* No need to check ((page->pp_magic & ~0x3UL) == PP_SIGNATURE) |
| 332 | +- * as we know this is a page_pool page. |
| 333 | ++ /* No need to check page_pool_page_is_pp() as we |
| 334 | ++ * know this is a page_pool page. |
| 335 | + */ |
| 336 | + page_pool_recycle_direct(page->pp, page); |
| 337 | + } while (++n < num); |
| 338 | +* Unmerged path include/linux/mm.h |
| 339 | +* Unmerged path mm/page_alloc.c |
| 340 | +* Unmerged path net/core/netmem_priv.h |
| 341 | +* Unmerged path net/core/skbuff.c |
| 342 | +diff --git a/net/core/xdp.c b/net/core/xdp.c |
| 343 | +index 04d61dc16544..4836433b5033 100644 |
| 344 | +--- a/net/core/xdp.c |
| 345 | ++++ b/net/core/xdp.c |
| 346 | +@@ -380,8 +380,8 @@ void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct, |
| 347 | + page = virt_to_head_page(data); |
| 348 | + if (napi_direct && xdp_return_frame_no_direct()) |
| 349 | + napi_direct = false; |
| 350 | +- /* No need to check ((page->pp_magic & ~0x3UL) == PP_SIGNATURE) |
| 351 | +- * as mem->type knows this a page_pool page |
| 352 | ++ /* No need to check netmem_is_pp() as mem->type knows this a |
| 353 | ++ * page_pool page |
| 354 | + */ |
| 355 | + page_pool_put_full_page(page->pp, page, napi_direct); |
| 356 | + break; |
0 commit comments