Skip to content

Commit 62a2ff7

Browse files
committed
dm-bufio: remove maximum age based eviction
JIRA: https://issues.redhat.com/browse/RHEL-119009 Upstream Status: kernel/git/torvalds/linux.git commit 9769378 Author: Eric Biggers <ebiggers@google.com> Date: Tue Apr 22 13:07:35 2025 -0700 dm-bufio: remove maximum age based eviction Every 30 seconds, dm-bufio evicts all buffers that were not accessed within the last max_age_seconds, except those pinned in memory via retain_bytes. By default max_age_seconds is 300 (i.e. 5 minutes), and retain_bytes is 262144 (i.e. 256 KiB) per dm-bufio client. This eviction algorithm is much too eager and is also redundant with the shinker based eviction. Testing on an Android phone shows that about 30 MB of dm-bufio buffers (from dm-verity Merkle tree blocks) are loaded at boot time, and then about 90% of them are suddenly thrown away 5 minutes after boot. This results in unnecessary Merkle tree I/O later. Meanwhile, if the system actually encounters memory pressure, testing also shows that the shrinker is effective at evicting the buffers. Other major Linux kernel caches, such as the page cache, do not enforce a maximum age, instead relying on the shrinker. For these reasons, Android is now setting max_age_seconds to 86400 (i.e. 1 day), which mostly disables it; see https://android.googlesource.com/platform/system/core/+/cadad290a79d5b0a30add935aaadab7c1b1ef5e9%5E%21/ That is a much better default, but really the maximum age based eviction should not exist at all. Let's remove it. Note that this also eliminates the need to run work every 30 seconds, which is beneficial too. Signed-off-by: Eric Biggers <ebiggers@google.com> Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
1 parent eec6973 commit 62a2ff7

File tree

1 file changed

+36
-153
lines changed

1 file changed

+36
-153
lines changed

drivers/md/dm-bufio.c

Lines changed: 36 additions & 153 deletions
Original file line numberDiff line numberDiff line change
@@ -40,16 +40,6 @@
4040
#define DM_BUFIO_WRITEBACK_RATIO 3
4141
#define DM_BUFIO_LOW_WATERMARK_RATIO 16
4242

43-
/*
44-
* Check buffer ages in this interval (seconds)
45-
*/
46-
#define DM_BUFIO_WORK_TIMER_SECS 30
47-
48-
/*
49-
* Free buffers when they are older than this (seconds)
50-
*/
51-
#define DM_BUFIO_DEFAULT_AGE_SECS 300
52-
5343
/*
5444
* The nr of bytes of cached data to keep around.
5545
*/
@@ -1057,10 +1047,8 @@ static unsigned long dm_bufio_cache_size_latch;
10571047

10581048
static DEFINE_SPINLOCK(global_spinlock);
10591049

1060-
/*
1061-
* Buffers are freed after this timeout
1062-
*/
1063-
static unsigned int dm_bufio_max_age = DM_BUFIO_DEFAULT_AGE_SECS;
1050+
static unsigned int dm_bufio_max_age; /* No longer does anything */
1051+
10641052
static unsigned long dm_bufio_retain_bytes = DM_BUFIO_DEFAULT_RETAIN_BYTES;
10651053

10661054
static unsigned long dm_bufio_peak_allocated;
@@ -1088,7 +1076,6 @@ static LIST_HEAD(dm_bufio_all_clients);
10881076
static DEFINE_MUTEX(dm_bufio_clients_lock);
10891077

10901078
static struct workqueue_struct *dm_bufio_wq;
1091-
static struct delayed_work dm_bufio_cleanup_old_work;
10921079
static struct work_struct dm_bufio_replacement_work;
10931080

10941081

@@ -2680,130 +2667,6 @@ EXPORT_SYMBOL_GPL(dm_bufio_set_sector_offset);
26802667

26812668
/*--------------------------------------------------------------*/
26822669

2683-
static unsigned int get_max_age_hz(void)
2684-
{
2685-
unsigned int max_age = READ_ONCE(dm_bufio_max_age);
2686-
2687-
if (max_age > UINT_MAX / HZ)
2688-
max_age = UINT_MAX / HZ;
2689-
2690-
return max_age * HZ;
2691-
}
2692-
2693-
static bool older_than(struct dm_buffer *b, unsigned long age_hz)
2694-
{
2695-
return time_after_eq(jiffies, READ_ONCE(b->last_accessed) + age_hz);
2696-
}
2697-
2698-
struct evict_params {
2699-
gfp_t gfp;
2700-
unsigned long age_hz;
2701-
2702-
/*
2703-
* This gets updated with the largest last_accessed (ie. most
2704-
* recently used) of the evicted buffers. It will not be reinitialised
2705-
* by __evict_many(), so you can use it across multiple invocations.
2706-
*/
2707-
unsigned long last_accessed;
2708-
};
2709-
2710-
/*
2711-
* We may not be able to evict this buffer if IO pending or the client
2712-
* is still using it.
2713-
*
2714-
* And if GFP_NOFS is used, we must not do any I/O because we hold
2715-
* dm_bufio_clients_lock and we would risk deadlock if the I/O gets
2716-
* rerouted to different bufio client.
2717-
*/
2718-
static enum evict_result select_for_evict(struct dm_buffer *b, void *context)
2719-
{
2720-
struct evict_params *params = context;
2721-
2722-
if (!(params->gfp & __GFP_FS) ||
2723-
(static_branch_unlikely(&no_sleep_enabled) && b->c->no_sleep)) {
2724-
if (test_bit_acquire(B_READING, &b->state) ||
2725-
test_bit(B_WRITING, &b->state) ||
2726-
test_bit(B_DIRTY, &b->state))
2727-
return ER_DONT_EVICT;
2728-
}
2729-
2730-
return older_than(b, params->age_hz) ? ER_EVICT : ER_STOP;
2731-
}
2732-
2733-
static unsigned long __evict_many(struct dm_bufio_client *c,
2734-
struct evict_params *params,
2735-
int list_mode, unsigned long max_count)
2736-
{
2737-
unsigned long count;
2738-
unsigned long last_accessed;
2739-
struct dm_buffer *b;
2740-
2741-
for (count = 0; count < max_count; count++) {
2742-
b = cache_evict(&c->cache, list_mode, select_for_evict, params);
2743-
if (!b)
2744-
break;
2745-
2746-
last_accessed = READ_ONCE(b->last_accessed);
2747-
if (time_after_eq(params->last_accessed, last_accessed))
2748-
params->last_accessed = last_accessed;
2749-
2750-
__make_buffer_clean(b);
2751-
__free_buffer_wake(b);
2752-
2753-
cond_resched();
2754-
}
2755-
2756-
return count;
2757-
}
2758-
2759-
static void evict_old_buffers(struct dm_bufio_client *c, unsigned long age_hz)
2760-
{
2761-
struct evict_params params = {.gfp = 0, .age_hz = age_hz, .last_accessed = 0};
2762-
unsigned long retain = get_retain_buffers(c);
2763-
unsigned long count;
2764-
LIST_HEAD(write_list);
2765-
2766-
dm_bufio_lock(c);
2767-
2768-
__check_watermark(c, &write_list);
2769-
if (unlikely(!list_empty(&write_list))) {
2770-
dm_bufio_unlock(c);
2771-
__flush_write_list(&write_list);
2772-
dm_bufio_lock(c);
2773-
}
2774-
2775-
count = cache_total(&c->cache);
2776-
if (count > retain)
2777-
__evict_many(c, &params, LIST_CLEAN, count - retain);
2778-
2779-
dm_bufio_unlock(c);
2780-
}
2781-
2782-
static void cleanup_old_buffers(void)
2783-
{
2784-
unsigned long max_age_hz = get_max_age_hz();
2785-
struct dm_bufio_client *c;
2786-
2787-
mutex_lock(&dm_bufio_clients_lock);
2788-
2789-
__cache_size_refresh();
2790-
2791-
list_for_each_entry(c, &dm_bufio_all_clients, client_list)
2792-
evict_old_buffers(c, max_age_hz);
2793-
2794-
mutex_unlock(&dm_bufio_clients_lock);
2795-
}
2796-
2797-
static void work_fn(struct work_struct *w)
2798-
{
2799-
cleanup_old_buffers();
2800-
2801-
queue_delayed_work(dm_bufio_wq, &dm_bufio_cleanup_old_work,
2802-
DM_BUFIO_WORK_TIMER_SECS * HZ);
2803-
}
2804-
2805-
/*--------------------------------------------------------------*/
2806-
28072670
/*
28082671
* Global cleanup tries to evict the oldest buffers from across _all_
28092672
* the clients. It does this by repeatedly evicting a few buffers from
@@ -2841,27 +2704,51 @@ static void __insert_client(struct dm_bufio_client *new_client)
28412704
list_add_tail(&new_client->client_list, h);
28422705
}
28432706

2707+
static enum evict_result select_for_evict(struct dm_buffer *b, void *context)
2708+
{
2709+
/* In no-sleep mode, we cannot wait on IO. */
2710+
if (static_branch_unlikely(&no_sleep_enabled) && b->c->no_sleep) {
2711+
if (test_bit_acquire(B_READING, &b->state) ||
2712+
test_bit(B_WRITING, &b->state) ||
2713+
test_bit(B_DIRTY, &b->state))
2714+
return ER_DONT_EVICT;
2715+
}
2716+
return ER_EVICT;
2717+
}
2718+
28442719
static unsigned long __evict_a_few(unsigned long nr_buffers)
28452720
{
2846-
unsigned long count;
28472721
struct dm_bufio_client *c;
2848-
struct evict_params params = {
2849-
.gfp = GFP_KERNEL,
2850-
.age_hz = 0,
2851-
/* set to jiffies in case there are no buffers in this client */
2852-
.last_accessed = jiffies
2853-
};
2722+
unsigned long oldest_buffer = jiffies;
2723+
unsigned long last_accessed;
2724+
unsigned long count;
2725+
struct dm_buffer *b;
28542726

28552727
c = __pop_client();
28562728
if (!c)
28572729
return 0;
28582730

28592731
dm_bufio_lock(c);
2860-
count = __evict_many(c, &params, LIST_CLEAN, nr_buffers);
2732+
2733+
for (count = 0; count < nr_buffers; count++) {
2734+
b = cache_evict(&c->cache, LIST_CLEAN, select_for_evict, NULL);
2735+
if (!b)
2736+
break;
2737+
2738+
last_accessed = READ_ONCE(b->last_accessed);
2739+
if (time_after_eq(oldest_buffer, last_accessed))
2740+
oldest_buffer = last_accessed;
2741+
2742+
__make_buffer_clean(b);
2743+
__free_buffer_wake(b);
2744+
2745+
cond_resched();
2746+
}
2747+
28612748
dm_bufio_unlock(c);
28622749

28632750
if (count)
2864-
c->oldest_buffer = params.last_accessed;
2751+
c->oldest_buffer = oldest_buffer;
28652752
__insert_client(c);
28662753

28672754
return count;
@@ -2944,10 +2831,7 @@ static int __init dm_bufio_init(void)
29442831
if (!dm_bufio_wq)
29452832
return -ENOMEM;
29462833

2947-
INIT_DELAYED_WORK(&dm_bufio_cleanup_old_work, work_fn);
29482834
INIT_WORK(&dm_bufio_replacement_work, do_global_cleanup);
2949-
queue_delayed_work(dm_bufio_wq, &dm_bufio_cleanup_old_work,
2950-
DM_BUFIO_WORK_TIMER_SECS * HZ);
29512835

29522836
return 0;
29532837
}
@@ -2959,7 +2843,6 @@ static void __exit dm_bufio_exit(void)
29592843
{
29602844
int bug = 0;
29612845

2962-
cancel_delayed_work_sync(&dm_bufio_cleanup_old_work);
29632846
destroy_workqueue(dm_bufio_wq);
29642847

29652848
if (dm_bufio_client_count) {
@@ -2996,7 +2879,7 @@ module_param_named(max_cache_size_bytes, dm_bufio_cache_size, ulong, 0644);
29962879
MODULE_PARM_DESC(max_cache_size_bytes, "Size of metadata cache");
29972880

29982881
module_param_named(max_age_seconds, dm_bufio_max_age, uint, 0644);
2999-
MODULE_PARM_DESC(max_age_seconds, "Max age of a buffer in seconds");
2882+
MODULE_PARM_DESC(max_age_seconds, "No longer does anything");
30002883

30012884
module_param_named(retain_bytes, dm_bufio_retain_bytes, ulong, 0644);
30022885
MODULE_PARM_DESC(retain_bytes, "Try to keep at least this many bytes cached in memory");

0 commit comments

Comments
 (0)