Skip to content

Commit 91129fe

Browse files
author
Ming Lei
committed
block: remove q->sysfs_lock for attributes which don't need it
JIRA: https://issues.redhat.com/browse/RHEL-112997 commit d23977f Author: Nilay Shroff <nilay@linux.ibm.com> Date: Tue Mar 4 15:52:32 2025 +0530 block: remove q->sysfs_lock for attributes which don't need it There're few sysfs attributes in block layer which don't really need acquiring q->sysfs_lock while accessing it. The reason being, reading/ writing a value from/to such attributes are either atomic or could be easily protected using READ_ONCE()/WRITE_ONCE(). Moreover, sysfs attributes are inherently protected with sysfs/kernfs internal locking. So this change help segregate all existing sysfs attributes for which we could avoid acquiring q->sysfs_lock. For all read-only attributes we removed the q->sysfs_lock from show method of such attributes. In case attribute is read/write then we removed the q->sysfs_lock from both show and store methods of these attributes. We audited all block sysfs attributes and found following list of attributes which shouldn't require q->sysfs_lock protection: 1. io_poll: Write to this attribute is ignored. So, we don't need q->sysfs_lock. 2. io_poll_delay: Write to this attribute is NOP, so we don't need q->sysfs_lock. 3. io_timeout: Write to this attribute updates q->rq_timeout and read of this attribute returns the value stored in q->rq_timeout Moreover, the q->rq_timeout is set only once when we init the queue (under blk_mq_ init_allocated_queue()) even before disk is added. So that means that we don't need to protect it with q->sysfs_lock. As this attribute is not directly correlated with anything else simply using READ_ONCE/WRITE_ONCE should be enough. 4. nomerges: Write to this attribute file updates two q->flags : QUEUE_FLAG_ NOMERGES and QUEUE_FLAG_NOXMERGES. These flags are accessed during bio-merge which anyways doesn't run with q->sysfs_lock held. Moreover, the q->flags are updated/accessed with bitops which are atomic. So, protecting it with q->sysfs_lock is not necessary. 5. rq_affinity: Write to this attribute file makes atomic updates to q->flags: QUEUE_FLAG_SAME_COMP and QUEUE_FLAG_SAME_FORCE. These flags are also accessed from blk_mq_complete_need_ipi() using test_bit macro. As read/write to q->flags uses bitops which are atomic, protecting it with q->stsys_lock is not necessary. 6. nr_zones: Write to this attribute happens in the driver probe method (except nvme) before disk is added and outside of q->sysfs_lock or any other lock. Moreover nr_zones is defined as "unsigned int" and so reading this attribute, even when it's simultaneously being updated on other cpu, should not return torn value on any architecture supported by linux. So we can avoid using q->sysfs_lock or any other lock/ protection while reading this attribute. 7. discard_zeroes_data: Reading of this attribute always returns 0, so we don't require holding q->sysfs_lock. 8. write_same_max_bytes Reading of this attribute always returns 0, so we don't require holding q->sysfs_lock. Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Hannes Reinecke <hare@suse.de> Reviewed-by: Ming Lei <ming.lei@redhat.com> Signed-off-by: Nilay Shroff <nilay@linux.ibm.com> Link: https://lore.kernel.org/r/20250304102551.2533767-4-nilay@linux.ibm.com Signed-off-by: Jens Axboe <axboe@kernel.dk> Signed-off-by: Ming Lei <ming.lei@redhat.com>
1 parent 9555790 commit 91129fe

File tree

2 files changed

+29
-54
lines changed

2 files changed

+29
-54
lines changed

block/blk-settings.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121

2222
void blk_queue_rq_timeout(struct request_queue *q, unsigned int timeout)
2323
{
24-
q->rq_timeout = timeout;
24+
WRITE_ONCE(q->rq_timeout, timeout);
2525
}
2626
EXPORT_SYMBOL_GPL(blk_queue_rq_timeout);
2727

block/blk-sysfs.c

Lines changed: 28 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -172,12 +172,7 @@ QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_KB(max_hw_sectors)
172172
#define QUEUE_SYSFS_SHOW_CONST(_name, _val) \
173173
static ssize_t queue_##_name##_show(struct gendisk *disk, char *page) \
174174
{ \
175-
ssize_t ret; \
176-
\
177-
mutex_lock(&disk->queue->sysfs_lock); \
178-
ret = sysfs_emit(page, "%d\n", _val); \
179-
mutex_unlock(&disk->queue->sysfs_lock); \
180-
return ret; \
175+
return sysfs_emit(page, "%d\n", _val); \
181176
}
182177

183178
/* deprecated fields */
@@ -266,17 +261,11 @@ QUEUE_SYSFS_FEATURE_SHOW(dax, BLK_FEAT_DAX);
266261

267262
static ssize_t queue_poll_show(struct gendisk *disk, char *page)
268263
{
269-
ssize_t ret;
264+
if (queue_is_mq(disk->queue))
265+
return sysfs_emit(page, "%u\n", blk_mq_can_poll(disk->queue));
270266

271-
mutex_lock(&disk->queue->sysfs_lock);
272-
if (queue_is_mq(disk->queue)) {
273-
ret = sysfs_emit(page, "%u\n", blk_mq_can_poll(disk->queue));
274-
} else {
275-
ret = sysfs_emit(page, "%u\n",
267+
return sysfs_emit(page, "%u\n",
276268
!!(disk->queue->limits.features & BLK_FEAT_POLL));
277-
}
278-
mutex_unlock(&disk->queue->sysfs_lock);
279-
return ret;
280269
}
281270

282271
static ssize_t queue_zoned_show(struct gendisk *disk, char *page)
@@ -288,12 +277,7 @@ static ssize_t queue_zoned_show(struct gendisk *disk, char *page)
288277

289278
static ssize_t queue_nr_zones_show(struct gendisk *disk, char *page)
290279
{
291-
ssize_t ret;
292-
293-
mutex_lock(&disk->queue->sysfs_lock);
294-
ret = queue_var_show(disk_nr_zones(disk), page);
295-
mutex_unlock(&disk->queue->sysfs_lock);
296-
return ret;
280+
return queue_var_show(disk_nr_zones(disk), page);
297281
}
298282

299283
static ssize_t queue_iostats_passthrough_show(struct gendisk *disk, char *page)
@@ -326,13 +310,8 @@ static ssize_t queue_iostats_passthrough_store(struct gendisk *disk,
326310
}
327311
static ssize_t queue_nomerges_show(struct gendisk *disk, char *page)
328312
{
329-
ssize_t ret;
330-
331-
mutex_lock(&disk->queue->sysfs_lock);
332-
ret = queue_var_show((blk_queue_nomerges(disk->queue) << 1) |
313+
return queue_var_show((blk_queue_nomerges(disk->queue) << 1) |
333314
blk_queue_noxmerges(disk->queue), page);
334-
mutex_unlock(&disk->queue->sysfs_lock);
335-
return ret;
336315
}
337316

338317
static ssize_t queue_nomerges_store(struct gendisk *disk, const char *page,
@@ -346,7 +325,6 @@ static ssize_t queue_nomerges_store(struct gendisk *disk, const char *page,
346325
if (ret < 0)
347326
return ret;
348327

349-
mutex_lock(&q->sysfs_lock);
350328
memflags = blk_mq_freeze_queue(q);
351329
blk_queue_flag_clear(QUEUE_FLAG_NOMERGES, q);
352330
blk_queue_flag_clear(QUEUE_FLAG_NOXMERGES, q);
@@ -355,22 +333,16 @@ static ssize_t queue_nomerges_store(struct gendisk *disk, const char *page,
355333
else if (nm)
356334
blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
357335
blk_mq_unfreeze_queue(q, memflags);
358-
mutex_unlock(&q->sysfs_lock);
359336

360337
return ret;
361338
}
362339

363340
static ssize_t queue_rq_affinity_show(struct gendisk *disk, char *page)
364341
{
365-
ssize_t ret;
366-
bool set, force;
342+
bool set = test_bit(QUEUE_FLAG_SAME_COMP, &disk->queue->queue_flags);
343+
bool force = test_bit(QUEUE_FLAG_SAME_FORCE, &disk->queue->queue_flags);
367344

368-
mutex_lock(&disk->queue->sysfs_lock);
369-
set = test_bit(QUEUE_FLAG_SAME_COMP, &disk->queue->queue_flags);
370-
force = test_bit(QUEUE_FLAG_SAME_FORCE, &disk->queue->queue_flags);
371-
ret = queue_var_show(set << force, page);
372-
mutex_unlock(&disk->queue->sysfs_lock);
373-
return ret;
345+
return queue_var_show(set << force, page);
374346
}
375347

376348
static ssize_t
@@ -386,7 +358,12 @@ queue_rq_affinity_store(struct gendisk *disk, const char *page, size_t count)
386358
if (ret < 0)
387359
return ret;
388360

389-
mutex_lock(&q->sysfs_lock);
361+
/*
362+
* Here we update two queue flags each using atomic bitops, although
363+
* updating two flags isn't atomic it should be harmless as those flags
364+
* are accessed individually using atomic test_bit operation. So we
365+
* don't grab any lock while updating these flags.
366+
*/
390367
memflags = blk_mq_freeze_queue(q);
391368
if (val == 2) {
392369
blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, q);
@@ -399,7 +376,6 @@ queue_rq_affinity_store(struct gendisk *disk, const char *page, size_t count)
399376
blk_queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q);
400377
}
401378
blk_mq_unfreeze_queue(q, memflags);
402-
mutex_unlock(&q->sysfs_lock);
403379
#endif
404380
return ret;
405381
}
@@ -417,30 +393,23 @@ static ssize_t queue_poll_store(struct gendisk *disk, const char *page,
417393
ssize_t ret = count;
418394
struct request_queue *q = disk->queue;
419395

420-
mutex_lock(&q->sysfs_lock);
421396
memflags = blk_mq_freeze_queue(q);
422397
if (!(q->limits.features & BLK_FEAT_POLL)) {
423398
ret = -EINVAL;
424399
goto out;
425400
}
401+
426402
pr_info_ratelimited("writes to the poll attribute are ignored.\n");
427403
pr_info_ratelimited("please use driver specific parameters instead.\n");
428404
out:
429405
blk_mq_unfreeze_queue(q, memflags);
430-
mutex_unlock(&q->sysfs_lock);
431-
432406
return ret;
433407
}
434408

435409
static ssize_t queue_io_timeout_show(struct gendisk *disk, char *page)
436410
{
437-
ssize_t ret;
438-
439-
mutex_lock(&disk->queue->sysfs_lock);
440-
ret = sysfs_emit(page, "%u\n",
441-
jiffies_to_msecs(disk->queue->rq_timeout));
442-
mutex_unlock(&disk->queue->sysfs_lock);
443-
return ret;
411+
return sysfs_emit(page, "%u\n",
412+
jiffies_to_msecs(READ_ONCE(disk->queue->rq_timeout)));
444413
}
445414

446415
static ssize_t queue_io_timeout_store(struct gendisk *disk, const char *page,
@@ -454,11 +423,9 @@ static ssize_t queue_io_timeout_store(struct gendisk *disk, const char *page,
454423
if (err || val == 0)
455424
return -EINVAL;
456425

457-
mutex_lock(&q->sysfs_lock);
458426
memflags = blk_mq_freeze_queue(q);
459427
blk_queue_rq_timeout(q, msecs_to_jiffies(val));
460428
blk_mq_unfreeze_queue(q, memflags);
461-
mutex_unlock(&q->sysfs_lock);
462429

463430
return count;
464431
}
@@ -712,6 +679,10 @@ static struct attribute *queue_attrs[] = {
712679
* Attributes which are protected with q->sysfs_lock.
713680
*/
714681
&queue_ra_entry.attr,
682+
683+
/*
684+
* Attributes which don't require locking.
685+
*/
715686
&queue_discard_zeroes_data_entry.attr,
716687
&queue_write_same_max_entry.attr,
717688
&queue_nr_zones_entry.attr,
@@ -729,11 +700,15 @@ static struct attribute *blk_mq_queue_attrs[] = {
729700
*/
730701
&queue_requests_entry.attr,
731702
&elv_iosched_entry.attr,
732-
&queue_rq_affinity_entry.attr,
733-
&queue_io_timeout_entry.attr,
734703
#ifdef CONFIG_BLK_WBT
735704
&queue_wb_lat_entry.attr,
736705
#endif
706+
/*
707+
* Attributes which don't require locking.
708+
*/
709+
&queue_rq_affinity_entry.attr,
710+
&queue_io_timeout_entry.attr,
711+
737712
NULL,
738713
};
739714

0 commit comments

Comments
 (0)