Skip to content

Commit 7d16a2c

Browse files
author
Brian Foster
committed
ext4: partial zero eof block on unaligned inode size extension
JIRA: https://issues.redhat.com/browse/RHEL-109217 Conflicts: Minor context difference in setting inode timestamps. commit c7fc036 Author: Brian Foster <bfoster@redhat.com> Date: Thu Sep 19 12:07:40 2024 -0400 ext4: partial zero eof block on unaligned inode size extension Using mapped writes, it's technically possible to expose stale post-eof data on a truncate up operation. Consider the following example: $ xfs_io -fc "pwrite 0 2k" -c "mmap 0 4k" -c "mwrite 2k 2k" \ -c "truncate 8k" -c "pread -v 2k 16" <file> ... 00000800: 58 58 58 58 58 58 58 58 58 58 58 58 58 58 58 58 XXXXXXXXXXXXXXXX ... This shows that the post-eof data written via mwrite lands within EOF after a truncate up. While this is deliberate of the test case, behavior is somewhat unpredictable because writeback does post-eof zeroing, and writeback can occur at any time in the background. For example, an fsync inserted between the mwrite and truncate causes the subsequent read to instead return zeroes. This basically means that there is a race window in this situation between any subsequent extending operation and writeback that dictates whether post-eof data is exposed to the file or zeroed. To prevent this problem, perform partial block zeroing as part of the various inode size extending operations that are susceptible to it. For truncate extension, zero around the original eof similar to how truncate down does partial zeroing of the new eof. For extension via writes and fallocate related operations, zero the newly exposed range of the file to cover any partial zeroing that must occur at the original and new eof blocks. Signed-off-by: Brian Foster <bfoster@redhat.com> Link: https://patch.msgid.link/20240919160741.208162-2-bfoster@redhat.com Signed-off-by: Theodore Ts'o <tytso@mit.edu> Signed-off-by: Brian Foster <bfoster@redhat.com>
1 parent 50ff9eb commit 7d16a2c

File tree

2 files changed

+40
-15
lines changed

2 files changed

+40
-15
lines changed

fs/ext4/extents.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4483,7 +4483,7 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
44834483
int depth = 0;
44844484
struct ext4_map_blocks map;
44854485
unsigned int credits;
4486-
loff_t epos;
4486+
loff_t epos, old_size = i_size_read(inode);
44874487

44884488
BUG_ON(!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS));
44894489
map.m_lblk = offset;
@@ -4541,6 +4541,11 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
45414541
epos = new_size;
45424542
if (ext4_update_inode_size(inode, epos) & 0x1)
45434543
inode->i_mtime = inode->i_ctime;
4544+
if (epos > old_size) {
4545+
pagecache_isize_extended(inode, old_size, epos);
4546+
ext4_zero_partial_blocks(handle, inode,
4547+
old_size, epos - old_size);
4548+
}
45444549
}
45454550
ret2 = ext4_mark_inode_dirty(handle, inode);
45464551
ext4_update_inode_fsync_trans(handle, inode, 1);

fs/ext4/inode.c

Lines changed: 34 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1359,8 +1359,10 @@ static int ext4_write_end(struct file *file,
13591359
unlock_page(page);
13601360
put_page(page);
13611361

1362-
if (old_size < pos && !verity)
1362+
if (old_size < pos && !verity) {
13631363
pagecache_isize_extended(inode, old_size, pos);
1364+
ext4_zero_partial_blocks(handle, inode, old_size, pos - old_size);
1365+
}
13641366
/*
13651367
* Don't mark the inode dirty under page lock. First, it unnecessarily
13661368
* makes the holding time of page lock longer. Second, it forces lock
@@ -1473,8 +1475,10 @@ static int ext4_journalled_write_end(struct file *file,
14731475
unlock_page(page);
14741476
put_page(page);
14751477

1476-
if (old_size < pos && !verity)
1478+
if (old_size < pos && !verity) {
14771479
pagecache_isize_extended(inode, old_size, pos);
1480+
ext4_zero_partial_blocks(handle, inode, old_size, pos - old_size);
1481+
}
14781482

14791483
if (size_changed) {
14801484
ret2 = ext4_mark_inode_dirty(handle, inode);
@@ -3154,7 +3158,8 @@ static int ext4_da_do_write_end(struct address_space *mapping,
31543158
struct inode *inode = mapping->host;
31553159
loff_t old_size = inode->i_size;
31563160
bool disksize_changed = false;
3157-
loff_t new_i_size;
3161+
loff_t new_i_size, zero_len = 0;
3162+
handle_t *handle;
31583163

31593164
/*
31603165
* block_write_end() will mark the inode as dirty with I_DIRTY_PAGES
@@ -3192,18 +3197,21 @@ static int ext4_da_do_write_end(struct address_space *mapping,
31923197
unlock_page(page);
31933198
put_page(page);
31943199

3195-
if (old_size < pos)
3200+
if (pos > old_size) {
31963201
pagecache_isize_extended(inode, old_size, pos);
3202+
zero_len = pos - old_size;
3203+
}
31973204

3198-
if (disksize_changed) {
3199-
handle_t *handle;
3205+
if (!disksize_changed && !zero_len)
3206+
return copied;
32003207

3201-
handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
3202-
if (IS_ERR(handle))
3203-
return PTR_ERR(handle);
3204-
ext4_mark_inode_dirty(handle, inode);
3205-
ext4_journal_stop(handle);
3206-
}
3208+
handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
3209+
if (IS_ERR(handle))
3210+
return PTR_ERR(handle);
3211+
if (zero_len)
3212+
ext4_zero_partial_blocks(handle, inode, old_size, zero_len);
3213+
ext4_mark_inode_dirty(handle, inode);
3214+
ext4_journal_stop(handle);
32073215

32083216
return copied;
32093217
}
@@ -5642,6 +5650,14 @@ int ext4_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
56425650
}
56435651

56445652
if (attr->ia_size != inode->i_size) {
5653+
/* attach jbd2 jinode for EOF folio tail zeroing */
5654+
if (attr->ia_size & (inode->i_sb->s_blocksize - 1) ||
5655+
oldsize & (inode->i_sb->s_blocksize - 1)) {
5656+
error = ext4_inode_attach_jinode(inode);
5657+
if (error)
5658+
goto err_out;
5659+
}
5660+
56455661
handle = ext4_journal_start(inode, EXT4_HT_INODE, 3);
56465662
if (IS_ERR(handle)) {
56475663
error = PTR_ERR(handle);
@@ -5652,12 +5668,16 @@ int ext4_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
56525668
orphan = 1;
56535669
}
56545670
/*
5655-
* Update c/mtime on truncate up, ext4_truncate() will
5656-
* update c/mtime in shrink case below
5671+
* Update c/mtime and tail zero the EOF folio on
5672+
* truncate up. ext4_truncate() handles the shrink case
5673+
* below.
56575674
*/
56585675
if (!shrink) {
56595676
inode->i_mtime = current_time(inode);
56605677
inode->i_ctime = inode->i_mtime;
5678+
if (oldsize & (inode->i_sb->s_blocksize - 1))
5679+
ext4_block_truncate_page(handle,
5680+
inode->i_mapping, oldsize);
56615681
}
56625682

56635683
if (shrink)

0 commit comments

Comments
 (0)