@@ -88,6 +88,9 @@ typedef int __bitwise fpi_t;
8888 */
8989#define FPI_TO_TAIL ((__force fpi_t)BIT(1))
9090
91+ /* Free the page without taking locks. Rely on trylock only. */
92+ #define FPI_TRYLOCK ((__force fpi_t)BIT(2))
93+
9194/* prevent >1 _updater_ of zone percpu pageset ->high and ->batch fields */
9295static DEFINE_MUTEX (pcp_batch_high_lock );
9396#define MIN_PERCPU_PAGELIST_HIGH_FRACTION (8)
@@ -1249,13 +1252,44 @@ static void split_large_buddy(struct zone *zone, struct page *page,
12491252 } while (1 );
12501253}
12511254
1255+ static void add_page_to_zone_llist (struct zone * zone , struct page * page ,
1256+ unsigned int order )
1257+ {
1258+ /* Remember the order */
1259+ page -> order = order ;
1260+ /* Add the page to the free list */
1261+ llist_add (& page -> pcp_llist , & zone -> trylock_free_pages );
1262+ }
1263+
12521264static void free_one_page (struct zone * zone , struct page * page ,
12531265 unsigned long pfn , unsigned int order ,
12541266 fpi_t fpi_flags )
12551267{
1268+ struct llist_head * llhead ;
12561269 unsigned long flags ;
12571270
1258- spin_lock_irqsave (& zone -> lock , flags );
1271+ if (!spin_trylock_irqsave (& zone -> lock , flags )) {
1272+ if (unlikely (fpi_flags & FPI_TRYLOCK )) {
1273+ add_page_to_zone_llist (zone , page , order );
1274+ return ;
1275+ }
1276+ spin_lock_irqsave (& zone -> lock , flags );
1277+ }
1278+
1279+ /* The lock succeeded. Process deferred pages. */
1280+ llhead = & zone -> trylock_free_pages ;
1281+ if (unlikely (!llist_empty (llhead ) && !(fpi_flags & FPI_TRYLOCK ))) {
1282+ struct llist_node * llnode ;
1283+ struct page * p , * tmp ;
1284+
1285+ llnode = llist_del_all (llhead );
1286+ llist_for_each_entry_safe (p , tmp , llnode , pcp_llist ) {
1287+ unsigned int p_order = p -> order ;
1288+
1289+ split_large_buddy (zone , p , page_to_pfn (p ), p_order , fpi_flags );
1290+ __count_vm_events (PGFREE , 1 << p_order );
1291+ }
1292+ }
12591293 split_large_buddy (zone , page , pfn , order , fpi_flags );
12601294 spin_unlock_irqrestore (& zone -> lock , flags );
12611295
@@ -2599,7 +2633,7 @@ static int nr_pcp_high(struct per_cpu_pages *pcp, struct zone *zone,
25992633
26002634static void free_frozen_page_commit (struct zone * zone ,
26012635 struct per_cpu_pages * pcp , struct page * page , int migratetype ,
2602- unsigned int order )
2636+ unsigned int order , fpi_t fpi_flags )
26032637{
26042638 int high , batch ;
26052639 int pindex ;
@@ -2634,6 +2668,14 @@ static void free_frozen_page_commit(struct zone *zone,
26342668 }
26352669 if (pcp -> free_count < (batch << CONFIG_PCP_BATCH_SCALE_MAX ))
26362670 pcp -> free_count += (1 << order );
2671+
2672+ if (unlikely (fpi_flags & FPI_TRYLOCK )) {
2673+ /*
2674+ * Do not attempt to take a zone lock. Let pcp->count get
2675+ * over high mark temporarily.
2676+ */
2677+ return ;
2678+ }
26372679 high = nr_pcp_high (pcp , zone , batch , free_high );
26382680 if (pcp -> count >= high ) {
26392681 free_pcppages_bulk (zone , nr_pcp_free (pcp , batch , high , free_high ),
@@ -2648,7 +2690,8 @@ static void free_frozen_page_commit(struct zone *zone,
26482690/*
26492691 * Free a pcp page
26502692 */
2651- void free_frozen_pages (struct page * page , unsigned int order )
2693+ static void __free_frozen_pages (struct page * page , unsigned int order ,
2694+ fpi_t fpi_flags )
26522695{
26532696 unsigned long __maybe_unused UP_flags ;
26542697 struct per_cpu_pages * pcp ;
@@ -2657,7 +2700,7 @@ void free_frozen_pages(struct page *page, unsigned int order)
26572700 int migratetype ;
26582701
26592702 if (!pcp_allowed_order (order )) {
2660- __free_pages_ok (page , order , FPI_NONE );
2703+ __free_pages_ok (page , order , fpi_flags );
26612704 return ;
26622705 }
26632706
@@ -2675,23 +2718,33 @@ void free_frozen_pages(struct page *page, unsigned int order)
26752718 migratetype = get_pfnblock_migratetype (page , pfn );
26762719 if (unlikely (migratetype >= MIGRATE_PCPTYPES )) {
26772720 if (unlikely (is_migrate_isolate (migratetype ))) {
2678- free_one_page (zone , page , pfn , order , FPI_NONE );
2721+ free_one_page (zone , page , pfn , order , fpi_flags );
26792722 return ;
26802723 }
26812724 migratetype = MIGRATE_MOVABLE ;
26822725 }
26832726
2727+ if (unlikely ((fpi_flags & FPI_TRYLOCK ) && IS_ENABLED (CONFIG_PREEMPT_RT )
2728+ && (in_nmi () || in_hardirq ()))) {
2729+ add_page_to_zone_llist (zone , page , order );
2730+ return ;
2731+ }
26842732 pcp_trylock_prepare (UP_flags );
26852733 pcp = pcp_spin_trylock (zone -> per_cpu_pageset );
26862734 if (pcp ) {
2687- free_frozen_page_commit (zone , pcp , page , migratetype , order );
2735+ free_frozen_page_commit (zone , pcp , page , migratetype , order , fpi_flags );
26882736 pcp_spin_unlock (pcp );
26892737 } else {
2690- free_one_page (zone , page , pfn , order , FPI_NONE );
2738+ free_one_page (zone , page , pfn , order , fpi_flags );
26912739 }
26922740 pcp_trylock_finish (UP_flags );
26932741}
26942742
2743+ void free_frozen_pages (struct page * page , unsigned int order )
2744+ {
2745+ __free_frozen_pages (page , order , FPI_NONE );
2746+ }
2747+
26952748/*
26962749 * Free a batch of folios
26972750 */
@@ -2780,7 +2833,7 @@ void free_unref_folios(struct folio_batch *folios)
27802833
27812834 trace_mm_page_free_batched (& folio -> page );
27822835 free_frozen_page_commit (zone , pcp , & folio -> page , migratetype ,
2783- order );
2836+ order , FPI_NONE );
27842837 }
27852838
27862839 if (pcp ) {
@@ -4822,9 +4875,10 @@ unsigned long get_zeroed_page_noprof(gfp_t gfp_mask)
48224875EXPORT_SYMBOL (get_zeroed_page_noprof );
48234876
48244877/**
4825- * __free_pages - Free pages allocated with alloc_pages().
4878+ * ___free_pages - Free pages allocated with alloc_pages().
48264879 * @page: The page pointer returned from alloc_pages().
48274880 * @order: The order of the allocation.
4881+ * @fpi_flags: Free Page Internal flags.
48284882 *
48294883 * This function can free multi-page allocations that are not compound
48304884 * pages. It does not check that the @order passed in matches that of
@@ -4841,22 +4895,37 @@ EXPORT_SYMBOL(get_zeroed_page_noprof);
48414895 * Context: May be called in interrupt context or while holding a normal
48424896 * spinlock, but not in NMI context or while holding a raw spinlock.
48434897 */
4844- void __free_pages (struct page * page , unsigned int order )
4898+ static void ___free_pages (struct page * page , unsigned int order ,
4899+ fpi_t fpi_flags )
48454900{
48464901 /* get PageHead before we drop reference */
48474902 int head = PageHead (page );
48484903 struct alloc_tag * tag = pgalloc_tag_get (page );
48494904
48504905 if (put_page_testzero (page ))
4851- free_frozen_pages (page , order );
4906+ __free_frozen_pages (page , order , fpi_flags );
48524907 else if (!head ) {
48534908 pgalloc_tag_sub_pages (tag , (1 << order ) - 1 );
48544909 while (order -- > 0 )
4855- free_frozen_pages (page + (1 << order ), order );
4910+ __free_frozen_pages (page + (1 << order ), order ,
4911+ fpi_flags );
48564912 }
48574913}
4914+ void __free_pages (struct page * page , unsigned int order )
4915+ {
4916+ ___free_pages (page , order , FPI_NONE );
4917+ }
48584918EXPORT_SYMBOL (__free_pages );
48594919
4920+ /*
4921+ * Can be called while holding raw_spin_lock or from IRQ and NMI for any
4922+ * page type (not only those that came from try_alloc_pages)
4923+ */
4924+ void free_pages_nolock (struct page * page , unsigned int order )
4925+ {
4926+ ___free_pages (page , order , FPI_TRYLOCK );
4927+ }
4928+
48604929void free_pages (unsigned long addr , unsigned int order )
48614930{
48624931 if (addr != 0 ) {
0 commit comments