Skip to content

Commit ca3d643

Browse files
Christoph Hellwigcmaiolino
authored andcommitted
xfs: cache open zone in inode->i_private
The MRU cache for open zones is unfortunately still not ideal, as it can time out pretty easily when doing heavy I/O to hard disks using up most or all open zones. One option would be to just increase the timeout, but while looking into that I realized we're just better off caching it indefinitely as there is no real downside to that once we don't hold a reference to the cache open zone. So switch the open zone to RCU freeing, and then stash the last used open zone into inode->i_private. This helps to significantly reduce fragmentation by keeping I/O localized to zones for workloads that write using many open files to HDD. Fixes: 4e4d520 ("xfs: add the zoned space allocator") Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Hans Holmberg <hans.holmberg@wdc.com> Reviewed-by: Damien Le Moal <dlemoal@kernel.org> Tested-by: Damien Le Moal <dlemoal@kernel.org> Signed-off-by: Carlos Maiolino <cem@kernel.org>
1 parent a8c861f commit ca3d643

File tree

4 files changed

+53
-85
lines changed

4 files changed

+53
-85
lines changed

fs/xfs/xfs_mount.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,6 @@ typedef struct xfs_mount {
236236
bool m_update_sb; /* sb needs update in mount */
237237
unsigned int m_max_open_zones;
238238
unsigned int m_zonegc_low_space;
239-
struct xfs_mru_cache *m_zone_cache; /* Inode to open zone cache */
240239

241240
/* max_atomic_write mount option value */
242241
unsigned long long m_awu_max_bytes;

fs/xfs/xfs_super.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -786,6 +786,12 @@ xfs_fs_evict_inode(
786786

787787
truncate_inode_pages_final(&inode->i_data);
788788
clear_inode(inode);
789+
790+
if (IS_ENABLED(CONFIG_XFS_RT) &&
791+
S_ISREG(inode->i_mode) && inode->i_private) {
792+
xfs_open_zone_put(inode->i_private);
793+
inode->i_private = NULL;
794+
}
789795
}
790796

791797
static void

fs/xfs/xfs_zone_alloc.c

Lines changed: 45 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,22 @@
2626
#include "xfs_trace.h"
2727
#include "xfs_mru_cache.h"
2828

29+
static void
30+
xfs_open_zone_free_rcu(
31+
struct callback_head *cb)
32+
{
33+
struct xfs_open_zone *oz = container_of(cb, typeof(*oz), oz_rcu);
34+
35+
xfs_rtgroup_rele(oz->oz_rtg);
36+
kfree(oz);
37+
}
38+
2939
void
3040
xfs_open_zone_put(
3141
struct xfs_open_zone *oz)
3242
{
33-
if (atomic_dec_and_test(&oz->oz_ref)) {
34-
xfs_rtgroup_rele(oz->oz_rtg);
35-
kfree(oz);
36-
}
43+
if (atomic_dec_and_test(&oz->oz_ref))
44+
call_rcu(&oz->oz_rcu, xfs_open_zone_free_rcu);
3745
}
3846

3947
static inline uint32_t
@@ -756,98 +764,55 @@ xfs_mark_rtg_boundary(
756764
ioend->io_flags |= IOMAP_IOEND_BOUNDARY;
757765
}
758766

759-
/*
760-
* Cache the last zone written to for an inode so that it is considered first
761-
* for subsequent writes.
762-
*/
763-
struct xfs_zone_cache_item {
764-
struct xfs_mru_cache_elem mru;
765-
struct xfs_open_zone *oz;
766-
};
767-
768-
static inline struct xfs_zone_cache_item *
769-
xfs_zone_cache_item(struct xfs_mru_cache_elem *mru)
770-
{
771-
return container_of(mru, struct xfs_zone_cache_item, mru);
772-
}
773-
774-
static void
775-
xfs_zone_cache_free_func(
776-
void *data,
777-
struct xfs_mru_cache_elem *mru)
778-
{
779-
struct xfs_zone_cache_item *item = xfs_zone_cache_item(mru);
780-
781-
xfs_open_zone_put(item->oz);
782-
kfree(item);
783-
}
784-
785767
/*
786768
* Check if we have a cached last open zone available for the inode and
787769
* if yes return a reference to it.
788770
*/
789771
static struct xfs_open_zone *
790-
xfs_cached_zone(
791-
struct xfs_mount *mp,
792-
struct xfs_inode *ip)
772+
xfs_get_cached_zone(
773+
struct xfs_inode *ip)
793774
{
794-
struct xfs_mru_cache_elem *mru;
795-
struct xfs_open_zone *oz;
775+
struct xfs_open_zone *oz;
796776

797-
mru = xfs_mru_cache_lookup(mp->m_zone_cache, ip->i_ino);
798-
if (!mru)
799-
return NULL;
800-
oz = xfs_zone_cache_item(mru)->oz;
777+
rcu_read_lock();
778+
oz = VFS_I(ip)->i_private;
801779
if (oz) {
802780
/*
803781
* GC only steals open zones at mount time, so no GC zones
804782
* should end up in the cache.
805783
*/
806784
ASSERT(!oz->oz_is_gc);
807-
ASSERT(atomic_read(&oz->oz_ref) > 0);
808-
atomic_inc(&oz->oz_ref);
785+
if (!atomic_inc_not_zero(&oz->oz_ref))
786+
oz = NULL;
809787
}
810-
xfs_mru_cache_done(mp->m_zone_cache);
788+
rcu_read_unlock();
789+
811790
return oz;
812791
}
813792

814793
/*
815-
* Update the last used zone cache for a given inode.
794+
* Stash our zone in the inode so that is is reused for future allocations.
816795
*
817-
* The caller must have a reference on the open zone.
796+
* The open_zone structure will be pinned until either the inode is freed or
797+
* until the cached open zone is replaced with a different one because the
798+
* current one was full when we tried to use it. This means we keep any
799+
* open zone around forever as long as any inode that used it for the last
800+
* write is cached, which slightly increases the memory use of cached inodes
801+
* that were every written to, but significantly simplifies the cached zone
802+
* lookup. Because the open_zone is clearly marked as full when all data
803+
* in the underlying RTG was written, the caching is always safe.
818804
*/
819805
static void
820-
xfs_zone_cache_create_association(
821-
struct xfs_inode *ip,
822-
struct xfs_open_zone *oz)
806+
xfs_set_cached_zone(
807+
struct xfs_inode *ip,
808+
struct xfs_open_zone *oz)
823809
{
824-
struct xfs_mount *mp = ip->i_mount;
825-
struct xfs_zone_cache_item *item = NULL;
826-
struct xfs_mru_cache_elem *mru;
810+
struct xfs_open_zone *old_oz;
827811

828-
ASSERT(atomic_read(&oz->oz_ref) > 0);
829812
atomic_inc(&oz->oz_ref);
830-
831-
mru = xfs_mru_cache_lookup(mp->m_zone_cache, ip->i_ino);
832-
if (mru) {
833-
/*
834-
* If we have an association already, update it to point to the
835-
* new zone.
836-
*/
837-
item = xfs_zone_cache_item(mru);
838-
xfs_open_zone_put(item->oz);
839-
item->oz = oz;
840-
xfs_mru_cache_done(mp->m_zone_cache);
841-
return;
842-
}
843-
844-
item = kmalloc(sizeof(*item), GFP_KERNEL);
845-
if (!item) {
846-
xfs_open_zone_put(oz);
847-
return;
848-
}
849-
item->oz = oz;
850-
xfs_mru_cache_insert(mp->m_zone_cache, ip->i_ino, &item->mru);
813+
old_oz = xchg(&VFS_I(ip)->i_private, oz);
814+
if (old_oz)
815+
xfs_open_zone_put(old_oz);
851816
}
852817

853818
static void
@@ -891,15 +856,14 @@ xfs_zone_alloc_and_submit(
891856
* the inode is still associated with a zone and use that if so.
892857
*/
893858
if (!*oz)
894-
*oz = xfs_cached_zone(mp, ip);
859+
*oz = xfs_get_cached_zone(ip);
895860

896861
if (!*oz) {
897862
select_zone:
898863
*oz = xfs_select_zone(mp, write_hint, pack_tight);
899864
if (!*oz)
900865
goto out_error;
901-
902-
xfs_zone_cache_create_association(ip, *oz);
866+
xfs_set_cached_zone(ip, *oz);
903867
}
904868

905869
alloc_len = xfs_zone_alloc_blocks(*oz, XFS_B_TO_FSB(mp, ioend->io_size),
@@ -977,6 +941,12 @@ xfs_free_open_zones(
977941
xfs_open_zone_put(oz);
978942
}
979943
spin_unlock(&zi->zi_open_zones_lock);
944+
945+
/*
946+
* Wait for all open zones to be freed so that they drop the group
947+
* references:
948+
*/
949+
rcu_barrier();
980950
}
981951

982952
struct xfs_init_zones {
@@ -1290,14 +1260,6 @@ xfs_mount_zones(
12901260
error = xfs_zone_gc_mount(mp);
12911261
if (error)
12921262
goto out_free_zone_info;
1293-
1294-
/*
1295-
* Set up a mru cache to track inode to open zone for data placement
1296-
* purposes. The magic values for group count and life time is the
1297-
* same as the defaults for file streams, which seems sane enough.
1298-
*/
1299-
xfs_mru_cache_create(&mp->m_zone_cache, mp,
1300-
5000, 10, xfs_zone_cache_free_func);
13011263
return 0;
13021264

13031265
out_free_zone_info:
@@ -1311,5 +1273,4 @@ xfs_unmount_zones(
13111273
{
13121274
xfs_zone_gc_unmount(mp);
13131275
xfs_free_zone_info(mp->m_zone_info);
1314-
xfs_mru_cache_destroy(mp->m_zone_cache);
13151276
}

fs/xfs/xfs_zone_priv.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ struct xfs_open_zone {
4444
* the life time of an open zone.
4545
*/
4646
struct xfs_rtgroup *oz_rtg;
47+
48+
struct rcu_head oz_rcu;
4749
};
4850

4951
/*

0 commit comments

Comments
 (0)