Skip to content

Commit 50ff9eb

Browse files
author
Brian Foster
committed
ext4: do not mark inode dirty every time when appending using delalloc
JIRA: https://issues.redhat.com/browse/RHEL-109217 Conflicts: Work around lack of folio conversion in CS9. commit 03de20b Author: Liu Song <liusong@linux.alibaba.com> Date: Thu Aug 10 23:43:33 2023 +0800 ext4: do not mark inode dirty every time when appending using delalloc In the delalloc append write scenario, if inode's i_size is extended due to buffer write, there are delalloc writes pending in the range up to i_size, and no need to touch i_disksize since writeback will push i_disksize up to i_size eventually. Offers significant performance improvement in high-frequency append write scenarios. I conducted tests in my 32-core environment by launching 32 concurrent threads to append write to the same file. Each write operation had a length of 1024 bytes and was repeated 100000 times. Without using this patch, the test was completed in 7705 ms. However, with this patch, the test was completed in 5066 ms, resulting in a performance improvement of 34%. Moreover, in test scenarios of Kafka version 2.6.2, using packet size of 2K, with this patch resulted in a 10% performance improvement. Signed-off-by: Liu Song <liusong@linux.alibaba.com> Suggested-by: Jan Kara <jack@suse.cz> Reviewed-by: Jan Kara <jack@suse.cz> Link: https://lore.kernel.org/r/20230810154333.84921-1-liusong@linux.alibaba.com Signed-off-by: Theodore Ts'o <tytso@mit.edu> Signed-off-by: Brian Foster <bfoster@redhat.com>
1 parent f830580 commit 50ff9eb

File tree

1 file changed

+62
-26
lines changed

1 file changed

+62
-26
lines changed

fs/ext4/inode.c

Lines changed: 62 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -3147,14 +3147,73 @@ static int ext4_da_should_update_i_disksize(struct page *page,
31473147
return 1;
31483148
}
31493149

3150+
static int ext4_da_do_write_end(struct address_space *mapping,
3151+
loff_t pos, unsigned len, unsigned copied,
3152+
struct page *page)
3153+
{
3154+
struct inode *inode = mapping->host;
3155+
loff_t old_size = inode->i_size;
3156+
bool disksize_changed = false;
3157+
loff_t new_i_size;
3158+
3159+
/*
3160+
* block_write_end() will mark the inode as dirty with I_DIRTY_PAGES
3161+
* flag, which all that's needed to trigger page writeback.
3162+
*/
3163+
copied = block_write_end(NULL, mapping, pos, len, copied, page, NULL);
3164+
new_i_size = pos + copied;
3165+
3166+
/*
3167+
* It's important to update i_size while still holding page lock,
3168+
* because page writeout could otherwise come in and zero beyond
3169+
* i_size.
3170+
*
3171+
* Since we are holding inode lock, we are sure i_disksize <=
3172+
* i_size. We also know that if i_disksize < i_size, there are
3173+
* delalloc writes pending in the range up to i_size. If the end of
3174+
* the current write is <= i_size, there's no need to touch
3175+
* i_disksize since writeback will push i_disksize up to i_size
3176+
* eventually. If the end of the current write is > i_size and
3177+
* inside an allocated block which ext4_da_should_update_i_disksize()
3178+
* checked, we need to update i_disksize here as certain
3179+
* ext4_writepages() paths not allocating blocks and update i_disksize.
3180+
*/
3181+
if (new_i_size > inode->i_size) {
3182+
unsigned long end;
3183+
3184+
i_size_write(inode, new_i_size);
3185+
end = (new_i_size - 1) & (PAGE_SIZE - 1);
3186+
if (copied && ext4_da_should_update_i_disksize(page, end)) {
3187+
ext4_update_i_disksize(inode, new_i_size);
3188+
disksize_changed = true;
3189+
}
3190+
}
3191+
3192+
unlock_page(page);
3193+
put_page(page);
3194+
3195+
if (old_size < pos)
3196+
pagecache_isize_extended(inode, old_size, pos);
3197+
3198+
if (disksize_changed) {
3199+
handle_t *handle;
3200+
3201+
handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
3202+
if (IS_ERR(handle))
3203+
return PTR_ERR(handle);
3204+
ext4_mark_inode_dirty(handle, inode);
3205+
ext4_journal_stop(handle);
3206+
}
3207+
3208+
return copied;
3209+
}
3210+
31503211
static int ext4_da_write_end(struct file *file,
31513212
struct address_space *mapping,
31523213
loff_t pos, unsigned len, unsigned copied,
31533214
struct page *page, void *fsdata)
31543215
{
31553216
struct inode *inode = mapping->host;
3156-
loff_t new_i_size;
3157-
unsigned long start, end;
31583217
int write_mode = (int)(unsigned long)fsdata;
31593218

31603219
if (write_mode == FALL_BACK_TO_NONDELALLOC)
@@ -3177,30 +3236,7 @@ static int ext4_da_write_end(struct file *file,
31773236
return -EIO;
31783237
}
31793238

3180-
start = pos & (PAGE_SIZE - 1);
3181-
end = start + copied - 1;
3182-
3183-
/*
3184-
* Since we are holding inode lock, we are sure i_disksize <=
3185-
* i_size. We also know that if i_disksize < i_size, there are
3186-
* delalloc writes pending in the range upto i_size. If the end of
3187-
* the current write is <= i_size, there's no need to touch
3188-
* i_disksize since writeback will push i_disksize upto i_size
3189-
* eventually. If the end of the current write is > i_size and
3190-
* inside an allocated block (ext4_da_should_update_i_disksize()
3191-
* check), we need to update i_disksize here as neither
3192-
* ext4_writepage() nor certain ext4_writepages() paths not
3193-
* allocating blocks update i_disksize.
3194-
*
3195-
* Note that we defer inode dirtying to generic_write_end() /
3196-
* ext4_da_write_inline_data_end().
3197-
*/
3198-
new_i_size = pos + copied;
3199-
if (copied && new_i_size > inode->i_size &&
3200-
ext4_da_should_update_i_disksize(page, end))
3201-
ext4_update_i_disksize(inode, new_i_size);
3202-
3203-
return generic_write_end(file, mapping, pos, len, copied, page, fsdata);
3239+
return ext4_da_do_write_end(mapping, pos, len, copied, page);
32043240
}
32053241

32063242
/*

0 commit comments

Comments
 (0)