Skip to content

Commit 43ad2cf

Browse files
author
Herton R. Krzesinski
committed
Merge: statx: add direct I/O alignment information
MR: https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-9/-/merge_requests/1812 Bugzilla: https://bugzilla.redhat.com/2150284 Tested: xfstests Upstream Status: upstream Implement STATX_DIOALIGN for statx. Signed-off-by: Lukas Czerner <lczerner@redhat.com> Approved-by: Ming Lei <ming.lei@redhat.com> Approved-by: Carlos Maiolino <cmaiolino@redhat.com> Approved-by: Brian Foster <bfoster@redhat.com> Signed-off-by: Herton R. Krzesinski <herton@redhat.com>
2 parents 4f557d9 + 8ef18f9 commit 43ad2cf

File tree

11 files changed

+125
-13
lines changed

11 files changed

+125
-13
lines changed

block/bdev.c

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include <linux/namei.h>
2727
#include <linux/part_stat.h>
2828
#include <linux/uaccess.h>
29+
#include <linux/stat.h>
2930
#include "../fs/internal.h"
3031
#include "blk.h"
3132

@@ -1062,3 +1063,25 @@ void sync_bdevs(bool wait)
10621063
spin_unlock(&blockdev_superblock->s_inode_list_lock);
10631064
iput(old_inode);
10641065
}
1066+
1067+
/*
1068+
* Handle STATX_DIOALIGN for block devices.
1069+
*
1070+
* Note that the inode passed to this is the inode of a block device node file,
1071+
* not the block device's internal inode. Therefore it is *not* valid to use
1072+
* I_BDEV() here; the block device has to be looked up by i_rdev instead.
1073+
*/
1074+
void bdev_statx_dioalign(struct inode *inode, struct kstat *stat)
1075+
{
1076+
struct block_device *bdev;
1077+
1078+
bdev = blkdev_get_no_open(inode->i_rdev);
1079+
if (!bdev)
1080+
return;
1081+
1082+
stat->dio_mem_align = bdev_dma_alignment(bdev) + 1;
1083+
stat->dio_offset_align = bdev_logical_block_size(bdev);
1084+
stat->result_mask |= STATX_DIOALIGN;
1085+
1086+
blkdev_put_no_open(bdev);
1087+
}

fs/ext4/ext4.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3013,6 +3013,7 @@ extern struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
30133013
extern int ext4_write_inode(struct inode *, struct writeback_control *);
30143014
extern int ext4_setattr(struct user_namespace *, struct dentry *,
30153015
struct iattr *);
3016+
extern u32 ext4_dio_alignment(struct inode *inode);
30163017
extern int ext4_getattr(struct user_namespace *, const struct path *,
30173018
struct kstat *, u32, unsigned int);
30183019
extern void ext4_evict_inode(struct inode *);

fs/ext4/file.c

Lines changed: 28 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -36,17 +36,34 @@
3636
#include "acl.h"
3737
#include "truncate.h"
3838

39-
static bool ext4_dio_supported(struct inode *inode)
39+
/*
40+
* Returns %true if the given DIO request should be attempted with DIO, or
41+
* %false if it should fall back to buffered I/O.
42+
*
43+
* DIO isn't well specified; when it's unsupported (either due to the request
44+
* being misaligned, or due to the file not supporting DIO at all), filesystems
45+
* either fall back to buffered I/O or return EINVAL. For files that don't use
46+
* any special features like encryption or verity, ext4 has traditionally
47+
* returned EINVAL for misaligned DIO. iomap_dio_rw() uses this convention too.
48+
* In this case, we should attempt the DIO, *not* fall back to buffered I/O.
49+
*
50+
* In contrast, in cases where DIO is unsupported due to ext4 features, ext4
51+
* traditionally falls back to buffered I/O.
52+
*
53+
* This function implements the traditional ext4 behavior in all these cases.
54+
*/
55+
static bool ext4_should_use_dio(struct kiocb *iocb, struct iov_iter *iter)
4056
{
41-
if (IS_ENABLED(CONFIG_FS_ENCRYPTION) && IS_ENCRYPTED(inode))
42-
return false;
43-
if (fsverity_active(inode))
44-
return false;
45-
if (ext4_should_journal_data(inode))
46-
return false;
47-
if (ext4_has_inline_data(inode))
57+
struct inode *inode = file_inode(iocb->ki_filp);
58+
u32 dio_align = ext4_dio_alignment(inode);
59+
60+
if (dio_align == 0)
4861
return false;
49-
return true;
62+
63+
if (dio_align == 1)
64+
return true;
65+
66+
return IS_ALIGNED(iocb->ki_pos | iov_iter_alignment(iter), dio_align);
5067
}
5168

5269
static ssize_t ext4_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
@@ -61,7 +78,7 @@ static ssize_t ext4_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
6178
inode_lock_shared(inode);
6279
}
6380

64-
if (!ext4_dio_supported(inode)) {
81+
if (!ext4_should_use_dio(iocb, to)) {
6582
inode_unlock_shared(inode);
6683
/*
6784
* Fallback to buffered I/O if the operation being performed on
@@ -509,7 +526,7 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
509526
}
510527

511528
/* Fallback to buffered I/O if the inode does not support direct I/O. */
512-
if (!ext4_dio_supported(inode)) {
529+
if (!ext4_should_use_dio(iocb, from)) {
513530
if (ilock_shared)
514531
inode_unlock_shared(inode);
515532
else

fs/ext4/inode.c

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5515,6 +5515,19 @@ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
55155515
return error;
55165516
}
55175517

5518+
u32 ext4_dio_alignment(struct inode *inode)
5519+
{
5520+
if (fsverity_active(inode))
5521+
return 0;
5522+
if (ext4_should_journal_data(inode))
5523+
return 0;
5524+
if (ext4_has_inline_data(inode))
5525+
return 0;
5526+
if (IS_ENCRYPTED(inode))
5527+
return 0;
5528+
return 1; /* use the iomap defaults */
5529+
}
5530+
55185531
int ext4_getattr(struct user_namespace *mnt_userns, const struct path *path,
55195532
struct kstat *stat, u32 request_mask, unsigned int query_flags)
55205533
{
@@ -5530,6 +5543,29 @@ int ext4_getattr(struct user_namespace *mnt_userns, const struct path *path,
55305543
stat->btime.tv_nsec = ei->i_crtime.tv_nsec;
55315544
}
55325545

5546+
/*
5547+
* Return the DIO alignment restrictions if requested. We only return
5548+
* this information when requested, since on encrypted files it might
5549+
* take a fair bit of work to get if the file wasn't opened recently.
5550+
* (RHEL9: this comment isn't exactly correct on rhel9, because there
5551+
* is no DIO support in fscrypt. But we'll leave the code as is.)
5552+
*/
5553+
if ((request_mask & STATX_DIOALIGN) && S_ISREG(inode->i_mode)) {
5554+
u32 dio_align = ext4_dio_alignment(inode);
5555+
5556+
stat->result_mask |= STATX_DIOALIGN;
5557+
if (dio_align == 1) {
5558+
struct block_device *bdev = inode->i_sb->s_bdev;
5559+
5560+
/* iomap defaults */
5561+
stat->dio_mem_align = bdev_dma_alignment(bdev) + 1;
5562+
stat->dio_offset_align = bdev_logical_block_size(bdev);
5563+
} else {
5564+
stat->dio_mem_align = dio_align;
5565+
stat->dio_offset_align = dio_align;
5566+
}
5567+
}
5568+
55335569
flags = ei->i_flags & EXT4_FL_USER_VISIBLE;
55345570
if (flags & EXT4_APPEND_FL)
55355571
stat->attributes |= STATX_ATTR_APPEND;

fs/stat.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
* Copyright (C) 1991, 1992 Linus Torvalds
66
*/
77

8+
#include <linux/blkdev.h>
89
#include <linux/export.h>
910
#include <linux/mm.h>
1011
#include <linux/errno.h>
@@ -212,11 +213,22 @@ static int vfs_statx(int dfd, struct filename *filename, int flags,
212213
goto out;
213214

214215
error = vfs_getattr(&path, stat, request_mask, flags);
216+
215217
stat->mnt_id = real_mount(path.mnt)->mnt_id;
216218
stat->result_mask |= STATX_MNT_ID;
219+
217220
if (path.mnt->mnt_root == path.dentry)
218221
stat->attributes |= STATX_ATTR_MOUNT_ROOT;
219222
stat->attributes_mask |= STATX_ATTR_MOUNT_ROOT;
223+
224+
/* Handle STATX_DIOALIGN for block devices. */
225+
if (request_mask & STATX_DIOALIGN) {
226+
struct inode *inode = d_backing_inode(path.dentry);
227+
228+
if (S_ISBLK(inode->i_mode))
229+
bdev_statx_dioalign(inode, stat);
230+
}
231+
220232
path_put(&path);
221233
if (retry_estale(error, lookup_flags)) {
222234
lookup_flags |= LOOKUP_REVAL;
@@ -594,6 +606,8 @@ cp_statx(const struct kstat *stat, struct statx __user *buffer)
594606
tmp.stx_dev_major = MAJOR(stat->dev);
595607
tmp.stx_dev_minor = MINOR(stat->dev);
596608
tmp.stx_mnt_id = stat->mnt_id;
609+
tmp.stx_dio_mem_align = stat->dio_mem_align;
610+
tmp.stx_dio_offset_align = stat->dio_offset_align;
597611

598612
return copy_to_user(buffer, &tmp, sizeof(tmp)) ? -EFAULT : 0;
599613
}

fs/xfs/xfs_iops.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -604,6 +604,16 @@ xfs_vn_getattr(
604604
stat->blksize = BLKDEV_IOSIZE;
605605
stat->rdev = inode->i_rdev;
606606
break;
607+
case S_IFREG:
608+
if (request_mask & STATX_DIOALIGN) {
609+
struct xfs_buftarg *target = xfs_inode_buftarg(ip);
610+
struct block_device *bdev = target->bt_bdev;
611+
612+
stat->result_mask |= STATX_DIOALIGN;
613+
stat->dio_mem_align = bdev_dma_alignment(bdev) + 1;
614+
stat->dio_offset_align = bdev_logical_block_size(bdev);
615+
}
616+
fallthrough;
607617
default:
608618
stat->blksize = xfs_stat_blksize(ip);
609619
stat->rdev = 0;

include/linux/blkdev.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1521,6 +1521,7 @@ void invalidate_bdev(struct block_device *bdev);
15211521
int sync_blockdev(struct block_device *bdev);
15221522
int sync_blockdev_nowait(struct block_device *bdev);
15231523
void sync_bdevs(bool wait);
1524+
void bdev_statx_dioalign(struct inode *inode, struct kstat *stat);
15241525
void printk_all_partitions(void);
15251526
#else
15261527
static inline void invalidate_bdev(struct block_device *bdev)
@@ -1537,6 +1538,9 @@ static inline int sync_blockdev_nowait(struct block_device *bdev)
15371538
static inline void sync_bdevs(bool wait)
15381539
{
15391540
}
1541+
static inline void bdev_statx_dioalign(struct inode *inode, struct kstat *stat)
1542+
{
1543+
}
15401544
static inline void printk_all_partitions(void)
15411545
{
15421546
}

include/linux/stat.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ struct kstat {
4646
struct timespec64 btime; /* File creation time */
4747
u64 blocks;
4848
u64 mnt_id;
49+
u32 dio_mem_align;
50+
u32 dio_offset_align;
4951
};
5052

5153
#endif

include/uapi/linux/stat.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,8 @@ struct statx {
124124
__u32 stx_dev_minor;
125125
/* 0x90 */
126126
__u64 stx_mnt_id;
127-
__u64 __spare2;
127+
__u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */
128+
__u32 stx_dio_offset_align; /* File offset alignment for direct I/O */
128129
/* 0xa0 */
129130
__u64 __spare3[12]; /* Spare space for future expansion */
130131
/* 0x100 */
@@ -152,6 +153,7 @@ struct statx {
152153
#define STATX_BASIC_STATS 0x000007ffU /* The stuff in the normal stat struct */
153154
#define STATX_BTIME 0x00000800U /* Want/got stx_btime */
154155
#define STATX_MNT_ID 0x00001000U /* Got stx_mnt_id */
156+
#define STATX_DIOALIGN 0x00002000U /* Want/got direct I/O alignment info */
155157

156158
#define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */
157159

tools/include/uapi/linux/stat.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,8 @@ struct statx {
124124
__u32 stx_dev_minor;
125125
/* 0x90 */
126126
__u64 stx_mnt_id;
127-
__u64 __spare2;
127+
__u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */
128+
__u32 stx_dio_offset_align; /* File offset alignment for direct I/O */
128129
/* 0xa0 */
129130
__u64 __spare3[12]; /* Spare space for future expansion */
130131
/* 0x100 */
@@ -152,6 +153,7 @@ struct statx {
152153
#define STATX_BASIC_STATS 0x000007ffU /* The stuff in the normal stat struct */
153154
#define STATX_BTIME 0x00000800U /* Want/got stx_btime */
154155
#define STATX_MNT_ID 0x00001000U /* Got stx_mnt_id */
156+
#define STATX_DIOALIGN 0x00002000U /* Want/got direct I/O alignment info */
155157

156158
#define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */
157159

0 commit comments

Comments
 (0)