Skip to content

Commit 4d8ac48

Browse files
committed
sbitmap: fix batching wakeup
jira LE-4034 Rebuild_History Non-Buildable kernel-4.18.0-553.72.1.el8_10 commit-author David Jeffery <djeffery@redhat.com> commit 1063973 Empty-Commit: Cherry-Pick Conflicts during history rebuild. Will be included in final tarball splat. Ref for failed cherry-pick at: ciq/ciq_backports/kernel-4.18.0-553.72.1.el8_10/10639737.failed Current code supposes that it is enough to provide forward progress by just waking up one wait queue after one completion batch is done. Unfortunately this way isn't enough, cause waiter can be added to wait queue just after it is woken up. Follows one example(64 depth, wake_batch is 8) 1) all 64 tags are active 2) in each wait queue, there is only one single waiter 3) each time one completion batch(8 completions) wakes up just one waiter in each wait queue, then immediately one new sleeper is added to this wait queue 4) after 64 completions, 8 waiters are wakeup, and there are still 8 waiters in each wait queue 5) after another 8 active tags are completed, only one waiter can be wakeup, and the other 7 can't be waken up anymore. Turns out it isn't easy to fix this problem, so simply wakeup enough waiters for single batch. Cc: Kemeng Shi <shikemeng@huaweicloud.com> Cc: Chengming Zhou <zhouchengming@bytedance.com> Cc: Jan Kara <jack@suse.cz> Signed-off-by: David Jeffery <djeffery@redhat.com> Signed-off-by: Ming Lei <ming.lei@redhat.com> Reviewed-by: Gabriel Krisman Bertazi <krisman@suse.de> Reviewed-by: Keith Busch <kbusch@kernel.org> Link: https://lore.kernel.org/r/20230721095715.232728-1-ming.lei@redhat.com Signed-off-by: Jens Axboe <axboe@kernel.dk> (cherry picked from commit 1063973) Signed-off-by: Jonathan Maple <jmaple@ciq.com> # Conflicts: # lib/sbitmap.c
1 parent 945505f commit 4d8ac48

File tree

1 file changed

+194
-0
lines changed

1 file changed

+194
-0
lines changed
Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
sbitmap: fix batching wakeup
2+
3+
jira LE-4034
4+
Rebuild_History Non-Buildable kernel-4.18.0-553.72.1.el8_10
5+
commit-author David Jeffery <djeffery@redhat.com>
6+
commit 106397376c0369fcc01c58dd189ff925a2724a57
7+
Empty-Commit: Cherry-Pick Conflicts during history rebuild.
8+
Will be included in final tarball splat. Ref for failed cherry-pick at:
9+
ciq/ciq_backports/kernel-4.18.0-553.72.1.el8_10/10639737.failed
10+
11+
Current code supposes that it is enough to provide forward progress by
12+
just waking up one wait queue after one completion batch is done.
13+
14+
Unfortunately this way isn't enough, cause waiter can be added to wait
15+
queue just after it is woken up.
16+
17+
Follows one example(64 depth, wake_batch is 8)
18+
19+
1) all 64 tags are active
20+
21+
2) in each wait queue, there is only one single waiter
22+
23+
3) each time one completion batch(8 completions) wakes up just one
24+
waiter in each wait queue, then immediately one new sleeper is added
25+
to this wait queue
26+
27+
4) after 64 completions, 8 waiters are wakeup, and there are still 8
28+
waiters in each wait queue
29+
30+
5) after another 8 active tags are completed, only one waiter can be
31+
wakeup, and the other 7 can't be waken up anymore.
32+
33+
Turns out it isn't easy to fix this problem, so simply wakeup enough
34+
waiters for single batch.
35+
36+
Cc: Kemeng Shi <shikemeng@huaweicloud.com>
37+
Cc: Chengming Zhou <zhouchengming@bytedance.com>
38+
Cc: Jan Kara <jack@suse.cz>
39+
Signed-off-by: David Jeffery <djeffery@redhat.com>
40+
Signed-off-by: Ming Lei <ming.lei@redhat.com>
41+
Reviewed-by: Gabriel Krisman Bertazi <krisman@suse.de>
42+
Reviewed-by: Keith Busch <kbusch@kernel.org>
43+
Link: https://lore.kernel.org/r/20230721095715.232728-1-ming.lei@redhat.com
44+
Signed-off-by: Jens Axboe <axboe@kernel.dk>
45+
(cherry picked from commit 106397376c0369fcc01c58dd189ff925a2724a57)
46+
Signed-off-by: Jonathan Maple <jmaple@ciq.com>
47+
48+
# Conflicts:
49+
# lib/sbitmap.c
50+
diff --cc lib/sbitmap.c
51+
index 082298015b44,d0a5081dfd12..000000000000
52+
--- a/lib/sbitmap.c
53+
+++ b/lib/sbitmap.c
54+
@@@ -566,106 -548,55 +566,115 @@@ void sbitmap_queue_min_shallow_depth(st
55+
}
56+
EXPORT_SYMBOL_GPL(sbitmap_queue_min_shallow_depth);
57+
58+
-static void __sbitmap_queue_wake_up(struct sbitmap_queue *sbq, int nr)
59+
+static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq)
60+
{
61+
- int i, wake_index;
62+
+ int i, wake_index, woken;
63+
64+
if (!atomic_read(&sbq->ws_active))
65+
- return;
66+
+ return NULL;
67+
68+
wake_index = atomic_read(&sbq->wake_index);
69+
for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
70+
struct sbq_wait_state *ws = &sbq->ws[wake_index];
71+
72+
- /*
73+
- * Advance the index before checking the current queue.
74+
- * It improves fairness, by ensuring the queue doesn't
75+
- * need to be fully emptied before trying to wake up
76+
- * from the next one.
77+
- */
78+
- wake_index = sbq_index_inc(wake_index);
79+
+ if (waitqueue_active(&ws->wait) && atomic_read(&ws->wait_cnt)) {
80+
+ if (wake_index != atomic_read(&sbq->wake_index))
81+
+ atomic_set(&sbq->wake_index, wake_index);
82+
+ return ws;
83+
+ }
84+
85+
++<<<<<<< HEAD
86+
+ wake_index = sbq_index_inc(wake_index);
87+
++=======
88+
+ if (waitqueue_active(&ws->wait)) {
89+
+ woken = wake_up_nr(&ws->wait, nr);
90+
+ if (woken == nr)
91+
+ break;
92+
+ nr -= woken;
93+
+ }
94+
++>>>>>>> 106397376c03 (sbitmap: fix batching wakeup)
95+
}
96+
97+
- if (wake_index != atomic_read(&sbq->wake_index))
98+
- atomic_set(&sbq->wake_index, wake_index);
99+
+ return NULL;
100+
}
101+
102+
-void sbitmap_queue_wake_up(struct sbitmap_queue *sbq, int nr)
103+
+static bool __sbq_wake_up(struct sbitmap_queue *sbq, int *nr)
104+
{
105+
- unsigned int wake_batch = READ_ONCE(sbq->wake_batch);
106+
- unsigned int wakeups;
107+
+ struct sbq_wait_state *ws;
108+
+ unsigned int wake_batch;
109+
+ int wait_cnt, cur, sub;
110+
+ bool ret;
111+
112+
- if (!atomic_read(&sbq->ws_active))
113+
- return;
114+
+ if (*nr <= 0)
115+
+ return false;
116+
117+
- atomic_add(nr, &sbq->completion_cnt);
118+
- wakeups = atomic_read(&sbq->wakeup_cnt);
119+
+ ws = sbq_wake_ptr(sbq);
120+
+ if (!ws)
121+
+ return false;
122+
123+
+ cur = atomic_read(&ws->wait_cnt);
124+
do {
125+
- if (atomic_read(&sbq->completion_cnt) - wakeups < wake_batch)
126+
- return;
127+
- } while (!atomic_try_cmpxchg(&sbq->wakeup_cnt,
128+
- &wakeups, wakeups + wake_batch));
129+
+ /*
130+
+ * For concurrent callers of this, callers should call this
131+
+ * function again to wakeup a new batch on a different 'ws'.
132+
+ */
133+
+ if (cur == 0)
134+
+ return true;
135+
+ sub = min(*nr, cur);
136+
+ wait_cnt = cur - sub;
137+
+ } while (!atomic_try_cmpxchg(&ws->wait_cnt, &cur, wait_cnt));
138+
+
139+
+ /*
140+
+ * If we decremented queue without waiters, retry to avoid lost
141+
+ * wakeups.
142+
+ */
143+
+ if (wait_cnt > 0)
144+
+ return !waitqueue_active(&ws->wait);
145+
+
146+
+ *nr -= sub;
147+
+
148+
+ /*
149+
+ * When wait_cnt == 0, we have to be particularly careful as we are
150+
+ * responsible to reset wait_cnt regardless whether we've actually
151+
+ * woken up anybody. But in case we didn't wakeup anybody, we still
152+
+ * need to retry.
153+
+ */
154+
+ ret = !waitqueue_active(&ws->wait);
155+
+ wake_batch = READ_ONCE(sbq->wake_batch);
156+
+
157+
+ /*
158+
+ * Wake up first in case that concurrent callers decrease wait_cnt
159+
+ * while waitqueue is empty.
160+
+ */
161+
+ wake_up_nr(&ws->wait, wake_batch);
162+
+
163+
+ /*
164+
+ * Pairs with the memory barrier in sbitmap_queue_resize() to
165+
+ * ensure that we see the batch size update before the wait
166+
+ * count is reset.
167+
+ *
168+
+ * Also pairs with the implicit barrier between decrementing wait_cnt
169+
+ * and checking for waitqueue_active() to make sure waitqueue_active()
170+
+ * sees result of the wakeup if atomic_dec_return() has seen the result
171+
+ * of atomic_set().
172+
+ */
173+
+ smp_mb__before_atomic();
174+
+
175+
+ /*
176+
+ * Increase wake_index before updating wait_cnt, otherwise concurrent
177+
+ * callers can see valid wait_cnt in old waitqueue, which can cause
178+
+ * invalid wakeup on the old waitqueue.
179+
+ */
180+
+ sbq_index_atomic_inc(&sbq->wake_index);
181+
+ atomic_set(&ws->wait_cnt, wake_batch);
182+
+
183+
+ return ret || *nr;
184+
+}
185+
186+
- __sbitmap_queue_wake_up(sbq, wake_batch);
187+
+void sbitmap_queue_wake_up(struct sbitmap_queue *sbq, int nr)
188+
+{
189+
+ while (__sbq_wake_up(sbq, &nr))
190+
+ ;
191+
}
192+
EXPORT_SYMBOL_GPL(sbitmap_queue_wake_up);
193+
194+
* Unmerged path lib/sbitmap.c

0 commit comments

Comments
 (0)