Skip to content

Commit 91ad7e2

Browse files
committed
Merge: blk-mq: fix scsi lockup
MR: https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-10/-/merge_requests/1569 blk-mq: fix scsi lockup JIRA: https://issues.redhat.com/browse/RHEL-120078 Signed-off-by: Ming Lei <ming.lei@redhat.com> Approved-by: Jeff Moyer <jmoyer@redhat.com> Approved-by: Ewan D. Milne <emilne@redhat.com> Approved-by: CKI KWF Bot <cki-ci-bot+kwf-gitlab-com@redhat.com> Merged-by: Scott Weaver <scweaver@redhat.com>
2 parents def8a69 + 8f0a42e commit 91ad7e2

File tree

6 files changed

+91
-55
lines changed

6 files changed

+91
-55
lines changed

block/blk-mq-sysfs.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@ static void blk_mq_hw_sysfs_release(struct kobject *kobj)
3434
struct blk_mq_hw_ctx *hctx = container_of(kobj, struct blk_mq_hw_ctx,
3535
kobj);
3636

37-
blk_free_flush_queue(hctx->fq);
3837
sbitmap_free(&hctx->ctx_map);
3938
free_cpumask_var(hctx->cpumask);
4039
kfree(hctx->ctxs);

block/blk-mq-tag.c

Lines changed: 41 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@
88
*/
99
#include <linux/kernel.h>
1010
#include <linux/module.h>
11+
#include <linux/slab.h>
12+
#include <linux/mm.h>
13+
#include <linux/kmemleak.h>
1114

1215
#include <linux/delay.h>
1316
#include "blk.h"
@@ -253,13 +256,10 @@ static struct request *blk_mq_find_and_get_req(struct blk_mq_tags *tags,
253256
unsigned int bitnr)
254257
{
255258
struct request *rq;
256-
unsigned long flags;
257259

258-
spin_lock_irqsave(&tags->lock, flags);
259260
rq = tags->rqs[bitnr];
260261
if (!rq || rq->tag != bitnr || !req_ref_inc_not_zero(rq))
261262
rq = NULL;
262-
spin_unlock_irqrestore(&tags->lock, flags);
263263
return rq;
264264
}
265265

@@ -437,7 +437,9 @@ void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
437437
busy_tag_iter_fn *fn, void *priv)
438438
{
439439
unsigned int flags = tagset->flags;
440-
int i, nr_tags;
440+
int i, nr_tags, srcu_idx;
441+
442+
srcu_idx = srcu_read_lock(&tagset->tags_srcu);
441443

442444
nr_tags = blk_mq_is_shared_tags(flags) ? 1 : tagset->nr_hw_queues;
443445

@@ -446,6 +448,7 @@ void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
446448
__blk_mq_all_tag_iter(tagset->tags[i], fn, priv,
447449
BT_TAG_ITER_STARTED);
448450
}
451+
srcu_read_unlock(&tagset->tags_srcu, srcu_idx);
449452
}
450453
EXPORT_SYMBOL(blk_mq_tagset_busy_iter);
451454

@@ -496,6 +499,8 @@ EXPORT_SYMBOL(blk_mq_tagset_wait_completed_request);
496499
void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_tag_iter_fn *fn,
497500
void *priv)
498501
{
502+
int srcu_idx;
503+
499504
/*
500505
* __blk_mq_update_nr_hw_queues() updates nr_hw_queues and hctx_table
501506
* while the queue is frozen. So we can use q_usage_counter to avoid
@@ -504,6 +509,7 @@ void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_tag_iter_fn *fn,
504509
if (!percpu_ref_tryget(&q->q_usage_counter))
505510
return;
506511

512+
srcu_idx = srcu_read_lock(&q->tag_set->tags_srcu);
507513
if (blk_mq_is_shared_tags(q->tag_set->flags)) {
508514
struct blk_mq_tags *tags = q->tag_set->shared_tags;
509515
struct sbitmap_queue *bresv = &tags->breserved_tags;
@@ -533,6 +539,7 @@ void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_tag_iter_fn *fn,
533539
bt_for_each(hctx, q, btags, fn, priv, false);
534540
}
535541
}
542+
srcu_read_unlock(&q->tag_set->tags_srcu, srcu_idx);
536543
blk_queue_exit(q);
537544
}
538545

@@ -562,6 +569,8 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,
562569
tags->nr_tags = total_tags;
563570
tags->nr_reserved_tags = reserved_tags;
564571
spin_lock_init(&tags->lock);
572+
INIT_LIST_HEAD(&tags->page_list);
573+
565574
if (bt_alloc(&tags->bitmap_tags, depth, round_robin, node))
566575
goto out_free_tags;
567576
if (bt_alloc(&tags->breserved_tags, reserved_tags, round_robin, node))
@@ -576,11 +585,37 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,
576585
return NULL;
577586
}
578587

579-
void blk_mq_free_tags(struct blk_mq_tags *tags)
588+
static void blk_mq_free_tags_callback(struct rcu_head *head)
589+
{
590+
struct blk_mq_tags *tags = container_of(head, struct blk_mq_tags,
591+
rcu_head);
592+
struct page *page;
593+
594+
while (!list_empty(&tags->page_list)) {
595+
page = list_first_entry(&tags->page_list, struct page, lru);
596+
list_del_init(&page->lru);
597+
/*
598+
* Remove kmemleak object previously allocated in
599+
* blk_mq_alloc_rqs().
600+
*/
601+
kmemleak_free(page_address(page));
602+
__free_pages(page, page->private);
603+
}
604+
kfree(tags);
605+
}
606+
607+
void blk_mq_free_tags(struct blk_mq_tag_set *set, struct blk_mq_tags *tags)
580608
{
581609
sbitmap_queue_free(&tags->bitmap_tags);
582610
sbitmap_queue_free(&tags->breserved_tags);
583-
kfree(tags);
611+
612+
/* if tags pages is not allocated yet, free tags directly */
613+
if (list_empty(&tags->page_list)) {
614+
kfree(tags);
615+
return;
616+
}
617+
618+
call_srcu(&set->tags_srcu, &tags->rcu_head, blk_mq_free_tags_callback);
584619
}
585620

586621
int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx,

block/blk-mq.c

Lines changed: 43 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -3413,7 +3413,6 @@ static void blk_mq_clear_rq_mapping(struct blk_mq_tags *drv_tags,
34133413
struct blk_mq_tags *tags)
34143414
{
34153415
struct page *page;
3416-
unsigned long flags;
34173416

34183417
/*
34193418
* There is no need to clear mapping if driver tags is not initialized
@@ -3437,22 +3436,12 @@ static void blk_mq_clear_rq_mapping(struct blk_mq_tags *drv_tags,
34373436
}
34383437
}
34393438
}
3440-
3441-
/*
3442-
* Wait until all pending iteration is done.
3443-
*
3444-
* Request reference is cleared and it is guaranteed to be observed
3445-
* after the ->lock is released.
3446-
*/
3447-
spin_lock_irqsave(&drv_tags->lock, flags);
3448-
spin_unlock_irqrestore(&drv_tags->lock, flags);
34493439
}
34503440

34513441
void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
34523442
unsigned int hctx_idx)
34533443
{
34543444
struct blk_mq_tags *drv_tags;
3455-
struct page *page;
34563445

34573446
if (list_empty(&tags->page_list))
34583447
return;
@@ -3476,27 +3465,20 @@ void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
34763465
}
34773466

34783467
blk_mq_clear_rq_mapping(drv_tags, tags);
3479-
3480-
while (!list_empty(&tags->page_list)) {
3481-
page = list_first_entry(&tags->page_list, struct page, lru);
3482-
list_del_init(&page->lru);
3483-
/*
3484-
* Remove kmemleak object previously allocated in
3485-
* blk_mq_alloc_rqs().
3486-
*/
3487-
kmemleak_free(page_address(page));
3488-
__free_pages(page, page->private);
3489-
}
3468+
/*
3469+
* Free request pages in SRCU callback, which is called from
3470+
* blk_mq_free_tags().
3471+
*/
34903472
}
34913473

3492-
void blk_mq_free_rq_map(struct blk_mq_tags *tags)
3474+
void blk_mq_free_rq_map(struct blk_mq_tag_set *set, struct blk_mq_tags *tags)
34933475
{
34943476
kfree(tags->rqs);
34953477
tags->rqs = NULL;
34963478
kfree(tags->static_rqs);
34973479
tags->static_rqs = NULL;
34983480

3499-
blk_mq_free_tags(tags);
3481+
blk_mq_free_tags(set, tags);
35003482
}
35013483

35023484
static enum hctx_type hctx_idx_to_type(struct blk_mq_tag_set *set,
@@ -3558,7 +3540,7 @@ static struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set,
35583540
err_free_rqs:
35593541
kfree(tags->rqs);
35603542
err_free_tags:
3561-
blk_mq_free_tags(tags);
3543+
blk_mq_free_tags(set, tags);
35623544
return NULL;
35633545
}
35643546

@@ -3588,8 +3570,6 @@ static int blk_mq_alloc_rqs(struct blk_mq_tag_set *set,
35883570
if (node == NUMA_NO_NODE)
35893571
node = set->numa_node;
35903572

3591-
INIT_LIST_HEAD(&tags->page_list);
3592-
35933573
/*
35943574
* rq_size is the size of the request plus driver payload, rounded
35953575
* to the cacheline size
@@ -3676,8 +3656,12 @@ static bool blk_mq_hctx_has_requests(struct blk_mq_hw_ctx *hctx)
36763656
struct rq_iter_data data = {
36773657
.hctx = hctx,
36783658
};
3659+
int srcu_idx;
36793660

3661+
srcu_idx = srcu_read_lock(&hctx->queue->tag_set->tags_srcu);
36803662
blk_mq_all_tag_iter(tags, blk_mq_has_request, &data);
3663+
srcu_read_unlock(&hctx->queue->tag_set->tags_srcu, srcu_idx);
3664+
36813665
return data.has_rq;
36823666
}
36833667

@@ -3897,7 +3881,6 @@ static void blk_mq_clear_flush_rq_mapping(struct blk_mq_tags *tags,
38973881
unsigned int queue_depth, struct request *flush_rq)
38983882
{
38993883
int i;
3900-
unsigned long flags;
39013884

39023885
/* The hw queue may not be mapped yet */
39033886
if (!tags)
@@ -3907,15 +3890,14 @@ static void blk_mq_clear_flush_rq_mapping(struct blk_mq_tags *tags,
39073890

39083891
for (i = 0; i < queue_depth; i++)
39093892
cmpxchg(&tags->rqs[i], flush_rq, NULL);
3893+
}
39103894

3911-
/*
3912-
* Wait until all pending iteration is done.
3913-
*
3914-
* Request reference is cleared and it is guaranteed to be observed
3915-
* after the ->lock is released.
3916-
*/
3917-
spin_lock_irqsave(&tags->lock, flags);
3918-
spin_unlock_irqrestore(&tags->lock, flags);
3895+
static void blk_free_flush_queue_callback(struct rcu_head *head)
3896+
{
3897+
struct blk_flush_queue *fq =
3898+
container_of(head, struct blk_flush_queue, rcu_head);
3899+
3900+
blk_free_flush_queue(fq);
39193901
}
39203902

39213903
/* hctx->ctxs will be freed in queue's release handler */
@@ -3937,6 +3919,10 @@ static void blk_mq_exit_hctx(struct request_queue *q,
39373919
if (set->ops->exit_hctx)
39383920
set->ops->exit_hctx(hctx, hctx_idx);
39393921

3922+
call_srcu(&set->tags_srcu, &hctx->fq->rcu_head,
3923+
blk_free_flush_queue_callback);
3924+
hctx->fq = NULL;
3925+
39403926
xa_erase(&q->hctx_table, hctx_idx);
39413927

39423928
spin_lock(&q->unused_hctx_lock);
@@ -3962,13 +3948,19 @@ static int blk_mq_init_hctx(struct request_queue *q,
39623948
struct blk_mq_tag_set *set,
39633949
struct blk_mq_hw_ctx *hctx, unsigned hctx_idx)
39643950
{
3951+
gfp_t gfp = GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY;
3952+
3953+
hctx->fq = blk_alloc_flush_queue(hctx->numa_node, set->cmd_size, gfp);
3954+
if (!hctx->fq)
3955+
goto fail;
3956+
39653957
hctx->queue_num = hctx_idx;
39663958

39673959
hctx->tags = set->tags[hctx_idx];
39683960

39693961
if (set->ops->init_hctx &&
39703962
set->ops->init_hctx(hctx, set->driver_data, hctx_idx))
3971-
goto fail;
3963+
goto fail_free_fq;
39723964

39733965
if (blk_mq_init_request(set, hctx->fq->flush_rq, hctx_idx,
39743966
hctx->numa_node))
@@ -3985,6 +3977,9 @@ static int blk_mq_init_hctx(struct request_queue *q,
39853977
exit_hctx:
39863978
if (set->ops->exit_hctx)
39873979
set->ops->exit_hctx(hctx, hctx_idx);
3980+
fail_free_fq:
3981+
blk_free_flush_queue(hctx->fq);
3982+
hctx->fq = NULL;
39883983
fail:
39893984
return -1;
39903985
}
@@ -4036,16 +4031,10 @@ blk_mq_alloc_hctx(struct request_queue *q, struct blk_mq_tag_set *set,
40364031
init_waitqueue_func_entry(&hctx->dispatch_wait, blk_mq_dispatch_wake);
40374032
INIT_LIST_HEAD(&hctx->dispatch_wait.entry);
40384033

4039-
hctx->fq = blk_alloc_flush_queue(hctx->numa_node, set->cmd_size, gfp);
4040-
if (!hctx->fq)
4041-
goto free_bitmap;
4042-
40434034
blk_mq_hctx_kobj_init(hctx);
40444035

40454036
return hctx;
40464037

4047-
free_bitmap:
4048-
sbitmap_free(&hctx->ctx_map);
40494038
free_ctxs:
40504039
kfree(hctx->ctxs);
40514040
free_cpumask:
@@ -4099,7 +4088,7 @@ struct blk_mq_tags *blk_mq_alloc_map_and_rqs(struct blk_mq_tag_set *set,
40994088

41004089
ret = blk_mq_alloc_rqs(set, tags, hctx_idx, depth);
41014090
if (ret) {
4102-
blk_mq_free_rq_map(tags);
4091+
blk_mq_free_rq_map(set, tags);
41034092
return NULL;
41044093
}
41054094

@@ -4127,7 +4116,7 @@ void blk_mq_free_map_and_rqs(struct blk_mq_tag_set *set,
41274116
{
41284117
if (tags) {
41294118
blk_mq_free_rqs(set, tags, hctx_idx);
4130-
blk_mq_free_rq_map(tags);
4119+
blk_mq_free_rq_map(set, tags);
41314120
}
41324121
}
41334122

@@ -4826,6 +4815,9 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
48264815
if (ret)
48274816
goto out_free_srcu;
48284817
}
4818+
ret = init_srcu_struct(&set->tags_srcu);
4819+
if (ret)
4820+
goto out_cleanup_srcu;
48294821

48304822
init_rwsem(&set->update_nr_hwq_lock);
48314823

@@ -4834,7 +4826,7 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
48344826
sizeof(struct blk_mq_tags *), GFP_KERNEL,
48354827
set->numa_node);
48364828
if (!set->tags)
4837-
goto out_cleanup_srcu;
4829+
goto out_cleanup_tags_srcu;
48384830

48394831
for (i = 0; i < set->nr_maps; i++) {
48404832
set->map[i].mq_map = kcalloc_node(nr_cpu_ids,
@@ -4863,6 +4855,8 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
48634855
}
48644856
kfree(set->tags);
48654857
set->tags = NULL;
4858+
out_cleanup_tags_srcu:
4859+
cleanup_srcu_struct(&set->tags_srcu);
48664860
out_cleanup_srcu:
48674861
if (set->flags & BLK_MQ_F_BLOCKING)
48684862
cleanup_srcu_struct(set->srcu);
@@ -4908,6 +4902,9 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
49084902

49094903
kfree(set->tags);
49104904
set->tags = NULL;
4905+
4906+
srcu_barrier(&set->tags_srcu);
4907+
cleanup_srcu_struct(&set->tags_srcu);
49114908
if (set->flags & BLK_MQ_F_BLOCKING) {
49124909
cleanup_srcu_struct(set->srcu);
49134910
kfree(set->srcu);

block/blk-mq.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ void blk_mq_put_rq_ref(struct request *rq);
5959
*/
6060
void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
6161
unsigned int hctx_idx);
62-
void blk_mq_free_rq_map(struct blk_mq_tags *tags);
62+
void blk_mq_free_rq_map(struct blk_mq_tag_set *set, struct blk_mq_tags *tags);
6363
struct blk_mq_tags *blk_mq_alloc_map_and_rqs(struct blk_mq_tag_set *set,
6464
unsigned int hctx_idx, unsigned int depth);
6565
void blk_mq_free_map_and_rqs(struct blk_mq_tag_set *set,
@@ -162,7 +162,7 @@ struct blk_mq_alloc_data {
162162

163163
struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags,
164164
unsigned int reserved_tags, unsigned int flags, int node);
165-
void blk_mq_free_tags(struct blk_mq_tags *tags);
165+
void blk_mq_free_tags(struct blk_mq_tag_set *set, struct blk_mq_tags *tags);
166166

167167
unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data);
168168
unsigned long blk_mq_get_tags(struct blk_mq_alloc_data *data, int nr_tags,

block/blk.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ struct blk_flush_queue {
3232
struct list_head flush_queue[2];
3333
unsigned long flush_data_in_flight;
3434
struct request *flush_rq;
35+
struct rcu_head rcu_head;
3536
};
3637

3738
bool is_flush_rq(struct request *req);

0 commit comments

Comments
 (0)