Skip to content

Commit 33f5f61

Browse files
author
Ming Lei
committed
block: introduce a dedicated lock for protecting queue elevator updates
JIRA: https://issues.redhat.com/browse/RHEL-112997 commit 1bf70d0 Author: Nilay Shroff <nilay@linux.ibm.com> Date: Tue Mar 4 15:52:33 2025 +0530 block: introduce a dedicated lock for protecting queue elevator updates A queue's elevator can be updated either when modifying nr_hw_queues or through the sysfs scheduler attribute. Currently, elevator switching/ updating is protected using q->sysfs_lock, but this has led to lockdep splats[1] due to inconsistent lock ordering between q->sysfs_lock and the freeze-lock in multiple block layer call sites. As the scope of q->sysfs_lock is not well-defined, its (mis)use has resulted in numerous lockdep warnings. To address this, introduce a new q->elevator_lock, dedicated specifically for protecting elevator switches/updates. And we'd now use this new q->elevator_lock instead of q->sysfs_lock for protecting elevator switches/updates. While at it, make elv_iosched_load_module() a static function, as it is only called from elv_iosched_store(). Also, remove redundant parameters from elv_iosched_load_module() function signature. [1] https://lore.kernel.org/all/67637e70.050a0220.3157ee.000c.GAE@google.com/ Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Hannes Reinecke <hare@suse.de> Reviewed-by: Ming Lei <ming.lei@redhat.com> Signed-off-by: Nilay Shroff <nilay@linux.ibm.com> Link: https://lore.kernel.org/r/20250304102551.2533767-5-nilay@linux.ibm.com Signed-off-by: Jens Axboe <axboe@kernel.dk> Signed-off-by: Ming Lei <ming.lei@redhat.com>
1 parent c49b088 commit 33f5f61

File tree

7 files changed

+60
-42
lines changed

7 files changed

+60
-42
lines changed

block/blk-core.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -429,6 +429,7 @@ struct request_queue *blk_alloc_queue(struct queue_limits *lim, int node_id)
429429

430430
refcount_set(&q->refs, 1);
431431
mutex_init(&q->debugfs_mutex);
432+
mutex_init(&q->elevator_lock);
432433
mutex_init(&q->sysfs_lock);
433434
mutex_init(&q->limits_lock);
434435
mutex_init(&q->rq_qos_mutex);

block/blk-mq.c

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4468,7 +4468,7 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
44684468
unsigned long i, j;
44694469

44704470
/* protect against switching io scheduler */
4471-
mutex_lock(&q->sysfs_lock);
4471+
mutex_lock(&q->elevator_lock);
44724472
for (i = 0; i < set->nr_hw_queues; i++) {
44734473
int old_node;
44744474
int node = blk_mq_get_hctx_node(set, i);
@@ -4501,7 +4501,7 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
45014501

45024502
xa_for_each_start(&q->hctx_table, j, hctx, j)
45034503
blk_mq_exit_hctx(q, set, hctx, j);
4504-
mutex_unlock(&q->sysfs_lock);
4504+
mutex_unlock(&q->elevator_lock);
45054505

45064506
/* unregister cpuhp callbacks for exited hctxs */
45074507
blk_mq_remove_hw_queues_cpuhp(q);
@@ -4934,10 +4934,9 @@ static bool blk_mq_elv_switch_none(struct list_head *head,
49344934
if (!qe)
49354935
return false;
49364936

4937-
/* q->elevator needs protection from ->sysfs_lock */
4938-
mutex_lock(&q->sysfs_lock);
4937+
/* Accessing q->elevator needs protection from ->elevator_lock. */
4938+
mutex_lock(&q->elevator_lock);
49394939

4940-
/* the check has to be done with holding sysfs_lock */
49414940
if (!q->elevator) {
49424941
kfree(qe);
49434942
goto unlock;
@@ -4951,7 +4950,7 @@ static bool blk_mq_elv_switch_none(struct list_head *head,
49514950
list_add(&qe->node, head);
49524951
elevator_disable(q);
49534952
unlock:
4954-
mutex_unlock(&q->sysfs_lock);
4953+
mutex_unlock(&q->elevator_lock);
49554954

49564955
return true;
49574956
}
@@ -4981,11 +4980,11 @@ static void blk_mq_elv_switch_back(struct list_head *head,
49814980
list_del(&qe->node);
49824981
kfree(qe);
49834982

4984-
mutex_lock(&q->sysfs_lock);
4983+
mutex_lock(&q->elevator_lock);
49854984
elevator_switch(q, t);
49864985
/* drop the reference acquired in blk_mq_elv_switch_none */
49874986
elevator_put(t);
4988-
mutex_unlock(&q->sysfs_lock);
4987+
mutex_unlock(&q->elevator_lock);
49894988
}
49904989

49914990
static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,

block/blk-sysfs.c

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -699,10 +699,15 @@ static struct attribute *blk_mq_queue_attrs[] = {
699699
* Attributes which are protected with q->sysfs_lock.
700700
*/
701701
&queue_requests_entry.attr,
702-
&elv_iosched_entry.attr,
703702
#ifdef CONFIG_BLK_WBT
704703
&queue_wb_lat_entry.attr,
705704
#endif
705+
/*
706+
* Attributes which require some form of locking other than
707+
* q->sysfs_lock.
708+
*/
709+
&elv_iosched_entry.attr,
710+
706711
/*
707712
* Attributes which don't require locking.
708713
*/
@@ -871,15 +876,19 @@ int blk_register_queue(struct gendisk *disk)
871876
if (ret)
872877
goto out_debugfs_remove;
873878

879+
ret = blk_crypto_sysfs_register(disk);
880+
if (ret)
881+
goto out_unregister_ia_ranges;
882+
883+
mutex_lock(&q->elevator_lock);
874884
if (q->elevator) {
875885
ret = elv_register_queue(q, false);
876-
if (ret)
877-
goto out_unregister_ia_ranges;
886+
if (ret) {
887+
mutex_unlock(&q->elevator_lock);
888+
goto out_crypto_sysfs_unregister;
889+
}
878890
}
879-
880-
ret = blk_crypto_sysfs_register(disk);
881-
if (ret)
882-
goto out_elv_unregister;
891+
mutex_unlock(&q->elevator_lock);
883892

884893
blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q);
885894
wbt_enable_default(disk);
@@ -904,8 +913,8 @@ int blk_register_queue(struct gendisk *disk)
904913

905914
return ret;
906915

907-
out_elv_unregister:
908-
elv_unregister_queue(q);
916+
out_crypto_sysfs_unregister:
917+
blk_crypto_sysfs_unregister(disk);
909918
out_unregister_ia_ranges:
910919
disk_unregister_independent_access_ranges(disk);
911920
out_debugfs_remove:
@@ -951,8 +960,11 @@ void blk_unregister_queue(struct gendisk *disk)
951960
blk_mq_sysfs_unregister(disk);
952961
blk_crypto_sysfs_unregister(disk);
953962

954-
mutex_lock(&q->sysfs_lock);
963+
mutex_lock(&q->elevator_lock);
955964
elv_unregister_queue(q);
965+
mutex_unlock(&q->elevator_lock);
966+
967+
mutex_lock(&q->sysfs_lock);
956968
disk_unregister_independent_access_ranges(disk);
957969
mutex_unlock(&q->sysfs_lock);
958970

block/elevator.c

Lines changed: 16 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -457,7 +457,7 @@ int elv_register_queue(struct request_queue *q, bool uevent)
457457
struct elevator_queue *e = q->elevator;
458458
int error;
459459

460-
lockdep_assert_held(&q->sysfs_lock);
460+
lockdep_assert_held(&q->elevator_lock);
461461

462462
error = kobject_add(&e->kobj, &q->disk->queue_kobj, "iosched");
463463
if (!error) {
@@ -481,7 +481,7 @@ void elv_unregister_queue(struct request_queue *q)
481481
{
482482
struct elevator_queue *e = q->elevator;
483483

484-
lockdep_assert_held(&q->sysfs_lock);
484+
lockdep_assert_held(&q->elevator_lock);
485485

486486
if (e && test_and_clear_bit(ELEVATOR_FLAG_REGISTERED, &e->flags)) {
487487
kobject_uevent(&e->kobj, KOBJ_REMOVE);
@@ -618,7 +618,7 @@ int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
618618
unsigned int memflags;
619619
int ret;
620620

621-
lockdep_assert_held(&q->sysfs_lock);
621+
lockdep_assert_held(&q->elevator_lock);
622622

623623
memflags = blk_mq_freeze_queue(q);
624624
blk_mq_quiesce_queue(q);
@@ -655,7 +655,7 @@ void elevator_disable(struct request_queue *q)
655655
{
656656
unsigned int memflags;
657657

658-
lockdep_assert_held(&q->sysfs_lock);
658+
lockdep_assert_held(&q->elevator_lock);
659659

660660
memflags = blk_mq_freeze_queue(q);
661661
blk_mq_quiesce_queue(q);
@@ -700,28 +700,23 @@ static int elevator_change(struct request_queue *q, const char *elevator_name)
700700
return ret;
701701
}
702702

703-
void elv_iosched_load_module(struct gendisk *disk, const char *buf,
704-
size_t count)
703+
static void elv_iosched_load_module(char *elevator_name)
705704
{
706-
char elevator_name[ELV_NAME_MAX];
707705
struct elevator_type *found;
708-
const char *name;
709-
710-
strscpy(elevator_name, buf, sizeof(elevator_name));
711-
name = strstrip(elevator_name);
712706

713707
spin_lock(&elv_list_lock);
714-
found = __elevator_find(name);
708+
found = __elevator_find(elevator_name);
715709
spin_unlock(&elv_list_lock);
716710

717711
if (!found)
718-
request_module("%s-iosched", name);
712+
request_module("%s-iosched", elevator_name);
719713
}
720714

721715
ssize_t elv_iosched_store(struct gendisk *disk, const char *buf,
722716
size_t count)
723717
{
724718
char elevator_name[ELV_NAME_MAX];
719+
char *name;
725720
int ret;
726721
unsigned int memflags;
727722
struct request_queue *q = disk->queue;
@@ -731,16 +726,18 @@ ssize_t elv_iosched_store(struct gendisk *disk, const char *buf,
731726
* queue to ensure that the module file can be read when the request
732727
* queue is the one for the device storing the module file.
733728
*/
734-
elv_iosched_load_module(disk, buf, count);
735729
strscpy(elevator_name, buf, sizeof(elevator_name));
730+
name = strstrip(elevator_name);
731+
732+
elv_iosched_load_module(name);
736733

737-
mutex_lock(&q->sysfs_lock);
738734
memflags = blk_mq_freeze_queue(q);
739-
ret = elevator_change(q, strstrip(elevator_name));
735+
mutex_lock(&q->elevator_lock);
736+
ret = elevator_change(q, name);
740737
if (!ret)
741738
ret = count;
739+
mutex_unlock(&q->elevator_lock);
742740
blk_mq_unfreeze_queue(q, memflags);
743-
mutex_unlock(&q->sysfs_lock);
744741
return ret;
745742
}
746743

@@ -751,7 +748,7 @@ ssize_t elv_iosched_show(struct gendisk *disk, char *name)
751748
struct elevator_type *cur = NULL, *e;
752749
int len = 0;
753750

754-
mutex_lock(&q->sysfs_lock);
751+
mutex_lock(&q->elevator_lock);
755752
if (!q->elevator) {
756753
len += sprintf(name+len, "[none] ");
757754
} else {
@@ -769,7 +766,7 @@ ssize_t elv_iosched_show(struct gendisk *disk, char *name)
769766
spin_unlock(&elv_list_lock);
770767

771768
len += sprintf(name+len, "\n");
772-
mutex_unlock(&q->sysfs_lock);
769+
mutex_unlock(&q->elevator_lock);
773770

774771
return len;
775772
}

block/elevator.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -159,8 +159,6 @@ extern void elv_unregister(struct elevator_type *);
159159
* io scheduler sysfs switching
160160
*/
161161
ssize_t elv_iosched_show(struct gendisk *disk, char *page);
162-
void elv_iosched_load_module(struct gendisk *disk, const char *page,
163-
size_t count);
164162
ssize_t elv_iosched_store(struct gendisk *disk, const char *page, size_t count);
165163

166164
extern bool elv_bio_merge_ok(struct request *, struct bio *);

block/genhd.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -565,8 +565,11 @@ int __must_check add_disk_fwnode(struct device *parent, struct gendisk *disk,
565565
if (disk->major == BLOCK_EXT_MAJOR)
566566
blk_free_ext_minor(disk->first_minor);
567567
out_exit_elevator:
568-
if (disk->queue->elevator)
568+
if (disk->queue->elevator) {
569+
mutex_lock(&disk->queue->elevator_lock);
569570
elevator_exit(disk->queue);
571+
mutex_unlock(&disk->queue->elevator_lock);
572+
}
570573
return ret;
571574
}
572575
EXPORT_SYMBOL_GPL(add_disk_fwnode);
@@ -735,9 +738,9 @@ void del_gendisk(struct gendisk *disk)
735738

736739
blk_mq_quiesce_queue(q);
737740
if (q->elevator) {
738-
mutex_lock(&q->sysfs_lock);
741+
mutex_lock(&q->elevator_lock);
739742
elevator_exit(q);
740-
mutex_unlock(&q->sysfs_lock);
743+
mutex_unlock(&q->elevator_lock);
741744
}
742745
rq_qos_exit(q);
743746
blk_mq_unquiesce_queue(q);

include/linux/blkdev.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -579,6 +579,14 @@ struct request_queue {
579579
struct blk_flush_queue *fq;
580580
struct list_head flush_list;
581581

582+
/*
583+
* Protects against I/O scheduler switching, specifically when
584+
* updating q->elevator. To ensure proper locking order during
585+
* an elevator update, first freeze the queue, then acquire
586+
* ->elevator_lock.
587+
*/
588+
struct mutex elevator_lock;
589+
582590
struct mutex sysfs_lock;
583591
struct mutex limits_lock;
584592

0 commit comments

Comments
 (0)