Skip to content

Commit f323bcd

Browse files
committed
xfs,iomap: move delalloc punching to iomap
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2155605 Tested: With xfstests and bz reproducer Conflicts: - Context conflict due the lack of iomap_read_folio() Because that's what Christoph wants for this error handling path only XFS uses. It requires a new iomap export for handling errors over delalloc ranges. This is basically the XFS code as is stands, but even though Christoph wants this as iomap funcitonality, we still have to call it from the filesystem specific ->iomap_end callback, and call into the iomap code with yet another filesystem specific callback to punch the delalloc extent within the defined ranges. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Darrick J. Wong <djwong@kernel.org> (cherry picked from commit 9c7babf) Signed-off-by: Carlos Maiolino <cmaiolino@redhat.com>
1 parent 51ea66c commit f323bcd

File tree

3 files changed

+72
-39
lines changed

3 files changed

+72
-39
lines changed

fs/iomap/buffered-io.c

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -810,6 +810,66 @@ iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *i,
810810
}
811811
EXPORT_SYMBOL_GPL(iomap_file_buffered_write);
812812

813+
/*
814+
* When a short write occurs, the filesystem may need to remove reserved space
815+
* that was allocated in ->iomap_begin from it's ->iomap_end method. For
816+
* filesystems that use delayed allocation, we need to punch out delalloc
817+
* extents from the range that are not dirty in the page cache. As the write can
818+
* race with page faults, there can be dirty pages over the delalloc extent
819+
* outside the range of a short write but still within the delalloc extent
820+
* allocated for this iomap.
821+
*
822+
* This function uses [start_byte, end_byte) intervals (i.e. open ended) to
823+
* simplify range iterations, but converts them back to {offset,len} tuples for
824+
* the punch callback.
825+
*/
826+
int iomap_file_buffered_write_punch_delalloc(struct inode *inode,
827+
struct iomap *iomap, loff_t pos, loff_t length,
828+
ssize_t written,
829+
int (*punch)(struct inode *inode, loff_t pos, loff_t length))
830+
{
831+
loff_t start_byte;
832+
loff_t end_byte;
833+
int blocksize = i_blocksize(inode);
834+
int error = 0;
835+
836+
if (iomap->type != IOMAP_DELALLOC)
837+
return 0;
838+
839+
/* If we didn't reserve the blocks, we're not allowed to punch them. */
840+
if (!(iomap->flags & IOMAP_F_NEW))
841+
return 0;
842+
843+
/*
844+
* start_byte refers to the first unused block after a short write. If
845+
* nothing was written, round offset down to point at the first block in
846+
* the range.
847+
*/
848+
if (unlikely(!written))
849+
start_byte = round_down(pos, blocksize);
850+
else
851+
start_byte = round_up(pos + written, blocksize);
852+
end_byte = round_up(pos + length, blocksize);
853+
854+
/* Nothing to do if we've written the entire delalloc extent */
855+
if (start_byte >= end_byte)
856+
return 0;
857+
858+
/*
859+
* Lock the mapping to avoid races with page faults re-instantiating
860+
* folios and dirtying them via ->page_mkwrite between the page cache
861+
* truncation and the delalloc extent removal. Failing to do this can
862+
* leave dirty pages with no space reservation in the cache.
863+
*/
864+
filemap_invalidate_lock(inode->i_mapping);
865+
truncate_pagecache_range(inode, start_byte, end_byte - 1);
866+
error = punch(inode, start_byte, end_byte - start_byte);
867+
filemap_invalidate_unlock(inode->i_mapping);
868+
869+
return error;
870+
}
871+
EXPORT_SYMBOL_GPL(iomap_file_buffered_write_punch_delalloc);
872+
813873
static loff_t iomap_unshare_iter(struct iomap_iter *iter)
814874
{
815875
struct iomap *iomap = &iter->iomap;

fs/xfs/xfs_iomap.c

Lines changed: 8 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1078,12 +1078,12 @@ xfs_buffered_write_iomap_begin(
10781078
static int
10791079
xfs_buffered_write_delalloc_punch(
10801080
struct inode *inode,
1081-
loff_t start_byte,
1082-
loff_t end_byte)
1081+
loff_t offset,
1082+
loff_t length)
10831083
{
10841084
struct xfs_mount *mp = XFS_M(inode->i_sb);
1085-
xfs_fileoff_t start_fsb = XFS_B_TO_FSBT(mp, start_byte);
1086-
xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, end_byte);
1085+
xfs_fileoff_t start_fsb = XFS_B_TO_FSBT(mp, offset);
1086+
xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + length);
10871087

10881088
return xfs_bmap_punch_delalloc_range(XFS_I(inode), start_fsb,
10891089
end_fsb - start_fsb);
@@ -1098,13 +1098,9 @@ xfs_buffered_write_iomap_end(
10981098
unsigned flags,
10991099
struct iomap *iomap)
11001100
{
1101-
struct xfs_mount *mp = XFS_M(inode->i_sb);
1102-
loff_t start_byte;
1103-
loff_t end_byte;
1104-
int error = 0;
11051101

1106-
if (iomap->type != IOMAP_DELALLOC)
1107-
return 0;
1102+
struct xfs_mount *mp = XFS_M(inode->i_sb);
1103+
int error;
11081104

11091105
/*
11101106
* Behave as if the write failed if drop writes is enabled. Set the NEW
@@ -1115,35 +1111,8 @@ xfs_buffered_write_iomap_end(
11151111
written = 0;
11161112
}
11171113

1118-
/* If we didn't reserve the blocks, we're not allowed to punch them. */
1119-
if (!(iomap->flags & IOMAP_F_NEW))
1120-
return 0;
1121-
1122-
/*
1123-
* start_fsb refers to the first unused block after a short write. If
1124-
* nothing was written, round offset down to point at the first block in
1125-
* the range.
1126-
*/
1127-
if (unlikely(!written))
1128-
start_byte = round_down(offset, mp->m_sb.sb_blocksize);
1129-
else
1130-
start_byte = round_up(offset + written, mp->m_sb.sb_blocksize);
1131-
end_byte = round_up(offset + length, mp->m_sb.sb_blocksize);
1132-
1133-
/* Nothing to do if we've written the entire delalloc extent */
1134-
if (start_byte >= end_byte)
1135-
return 0;
1136-
1137-
/*
1138-
* Lock the mapping to avoid races with page faults re-instantiating
1139-
* folios and dirtying them via ->page_mkwrite between the page cache
1140-
* truncation and the delalloc extent removal. Failing to do this can
1141-
* leave dirty pages with no space reservation in the cache.
1142-
*/
1143-
filemap_invalidate_lock(inode->i_mapping);
1144-
truncate_pagecache_range(inode, start_byte, end_byte - 1);
1145-
error = xfs_buffered_write_delalloc_punch(inode, start_byte, end_byte);
1146-
filemap_invalidate_unlock(inode->i_mapping);
1114+
error = iomap_file_buffered_write_punch_delalloc(inode, iomap, offset,
1115+
length, written, &xfs_buffered_write_delalloc_punch);
11471116
if (error && !xfs_is_shutdown(mp)) {
11481117
xfs_alert(mp, "%s: unable to clean up ino 0x%llx",
11491118
__func__, XFS_I(inode)->i_ino);

include/linux/iomap.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,10 @@ static inline const struct iomap *iomap_iter_srcmap(const struct iomap_iter *i)
221221
ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
222222
const struct iomap_ops *ops);
223223
int iomap_readpage(struct page *page, const struct iomap_ops *ops);
224+
int iomap_file_buffered_write_punch_delalloc(struct inode *inode,
225+
struct iomap *iomap, loff_t pos, loff_t length, ssize_t written,
226+
int (*punch)(struct inode *inode, loff_t pos, loff_t length));
227+
224228
void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops);
225229
bool iomap_is_partially_uptodate(struct folio *, size_t from, size_t count);
226230
int iomap_releasepage(struct page *page, gfp_t gfp_mask);

0 commit comments

Comments
 (0)