@@ -133,8 +133,66 @@ static void skb_under_panic(struct sk_buff *skb, unsigned int sz, void *addr)
133133#define NAPI_SKB_CACHE_BULK 16
134134#define NAPI_SKB_CACHE_HALF (NAPI_SKB_CACHE_SIZE / 2)
135135
136+ #if PAGE_SIZE == SZ_4K
137+
138+ #define NAPI_HAS_SMALL_PAGE_FRAG 1
139+ #define NAPI_SMALL_PAGE_PFMEMALLOC (nc ) ((nc).pfmemalloc)
140+
141+ /* specialized page frag allocator using a single order 0 page
142+ * and slicing it into 1K sized fragment. Constrained to systems
143+ * with a very limited amount of 1K fragments fitting a single
144+ * page - to avoid excessive truesize underestimation
145+ */
146+
147+ struct page_frag_1k {
148+ void * va ;
149+ u16 offset ;
150+ bool pfmemalloc ;
151+ };
152+
153+ static void * page_frag_alloc_1k (struct page_frag_1k * nc , gfp_t gfp )
154+ {
155+ struct page * page ;
156+ int offset ;
157+
158+ offset = nc -> offset - SZ_1K ;
159+ if (likely (offset >= 0 ))
160+ goto use_frag ;
161+
162+ page = alloc_pages_node (NUMA_NO_NODE , gfp , 0 );
163+ if (!page )
164+ return NULL ;
165+
166+ nc -> va = page_address (page );
167+ nc -> pfmemalloc = page_is_pfmemalloc (page );
168+ offset = PAGE_SIZE - SZ_1K ;
169+ page_ref_add (page , offset / SZ_1K );
170+
171+ use_frag :
172+ nc -> offset = offset ;
173+ return nc -> va + offset ;
174+ }
175+ #else
176+
177+ /* the small page is actually unused in this build; add dummy helpers
178+ * to please the compiler and avoid later preprocessor's conditionals
179+ */
180+ #define NAPI_HAS_SMALL_PAGE_FRAG 0
181+ #define NAPI_SMALL_PAGE_PFMEMALLOC (nc ) false
182+
183+ struct page_frag_1k {
184+ };
185+
186+ static void * page_frag_alloc_1k (struct page_frag_1k * nc , gfp_t gfp_mask )
187+ {
188+ return NULL ;
189+ }
190+
191+ #endif
192+
136193struct napi_alloc_cache {
137194 struct page_frag_cache page ;
195+ struct page_frag_1k page_small ;
138196 unsigned int skb_count ;
139197 void * skb_cache [NAPI_SKB_CACHE_SIZE ];
140198};
@@ -150,6 +208,23 @@ static void *__alloc_frag_align(unsigned int fragsz, gfp_t gfp_mask,
150208 return page_frag_alloc_align (& nc -> page , fragsz , gfp_mask , align_mask );
151209}
152210
211+ /* Double check that napi_get_frags() allocates skbs with
212+ * skb->head being backed by slab, not a page fragment.
213+ * This is to make sure bug fixed in 3226b158e67c
214+ * ("net: avoid 32 x truesize under-estimation for tiny skbs")
215+ * does not accidentally come back.
216+ */
217+ void napi_get_frags_check (struct napi_struct * napi )
218+ {
219+ struct sk_buff * skb ;
220+
221+ local_bh_disable ();
222+ skb = napi_get_frags (napi );
223+ WARN_ON_ONCE (!NAPI_HAS_SMALL_PAGE_FRAG && skb && skb -> head_frag );
224+ napi_free_frags (napi );
225+ local_bh_enable ();
226+ }
227+
153228void * __napi_alloc_frag_align (unsigned int fragsz , unsigned int align_mask )
154229{
155230 fragsz = SKB_DATA_ALIGN (fragsz );
@@ -562,14 +637,17 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
562637{
563638 struct napi_alloc_cache * nc ;
564639 struct sk_buff * skb ;
640+ bool pfmemalloc ;
565641 void * data ;
566642
567643 len += NET_SKB_PAD + NET_IP_ALIGN ;
568644
569645 /* If requested length is either too small or too big,
570646 * we use kmalloc() for skb->head allocation.
647+ * When the small frag allocator is available, prefer it over kmalloc
648+ * for small fragments
571649 */
572- if (len <= SKB_WITH_OVERHEAD (1024 ) ||
650+ if ((! NAPI_HAS_SMALL_PAGE_FRAG && len <= SKB_WITH_OVERHEAD (1024 ) ) ||
573651 len > SKB_WITH_OVERHEAD (PAGE_SIZE ) ||
574652 (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA ))) {
575653 skb = __alloc_skb (len , gfp_mask , SKB_ALLOC_RX | SKB_ALLOC_NAPI ,
@@ -580,13 +658,33 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
580658 }
581659
582660 nc = this_cpu_ptr (& napi_alloc_cache );
583- len += SKB_DATA_ALIGN (sizeof (struct skb_shared_info ));
584- len = SKB_DATA_ALIGN (len );
585661
586662 if (sk_memalloc_socks ())
587663 gfp_mask |= __GFP_MEMALLOC ;
588664
589- data = page_frag_alloc (& nc -> page , len , gfp_mask );
665+ if (NAPI_HAS_SMALL_PAGE_FRAG && len <= SKB_WITH_OVERHEAD (1024 )) {
666+ /* we are artificially inflating the allocation size, but
667+ * that is not as bad as it may look like, as:
668+ * - 'len' less than GRO_MAX_HEAD makes little sense
669+ * - On most systems, larger 'len' values lead to fragment
670+ * size above 512 bytes
671+ * - kmalloc would use the kmalloc-1k slab for such values
672+ * - Builds with smaller GRO_MAX_HEAD will very likely do
673+ * little networking, as that implies no WiFi and no
674+ * tunnels support, and 32 bits arches.
675+ */
676+ len = SZ_1K ;
677+
678+ data = page_frag_alloc_1k (& nc -> page_small , gfp_mask );
679+ pfmemalloc = NAPI_SMALL_PAGE_PFMEMALLOC (nc -> page_small );
680+ } else {
681+ len += SKB_DATA_ALIGN (sizeof (struct skb_shared_info ));
682+ len = SKB_DATA_ALIGN (len );
683+
684+ data = page_frag_alloc (& nc -> page , len , gfp_mask );
685+ pfmemalloc = nc -> page .pfmemalloc ;
686+ }
687+
590688 if (unlikely (!data ))
591689 return NULL ;
592690
@@ -596,7 +694,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
596694 return NULL ;
597695 }
598696
599- if (nc -> page . pfmemalloc )
697+ if (pfmemalloc )
600698 skb -> pfmemalloc = 1 ;
601699 skb -> head_frag = 1 ;
602700
0 commit comments