Skip to content

Commit 12c4368

Browse files
committed
aio: keep poll requests on waitqueue until completed
jira VULN-63551 cve-pre CVE-2021-47505 commit-author Eric Biggers <ebiggers@google.com> commit 363bee2 Currently, aio_poll_wake() will always remove the poll request from the waitqueue. Then, if aio_poll_complete_work() sees that none of the polled events are ready and the request isn't cancelled, it re-adds the request to the waitqueue. (This can easily happen when polling a file that doesn't pass an event mask when waking up its waitqueue.) This is fundamentally broken for two reasons: 1. If a wakeup occurs between vfs_poll() and the request being re-added to the waitqueue, it will be missed because the request wasn't on the waitqueue at the time. Therefore, IOCB_CMD_POLL might never complete even if the polled file is ready. 2. When the request isn't on the waitqueue, there is no way to be notified that the waitqueue is being freed (which happens when its lifetime is shorter than the struct file's). This is supposed to happen via the waitqueue entries being woken up with POLLFREE. Therefore, leave the requests on the waitqueue until they are actually completed (or cancelled). To keep track of when aio_poll_complete_work needs to be scheduled, use new fields in struct poll_iocb. Remove the 'done' field which is now redundant. Note that this is consistent with how sys_poll() and eventpoll work; their wakeup functions do *not* remove the waitqueue entries. Fixes: 2c14fa8 ("aio: implement IOCB_CMD_POLL") Cc: <stable@vger.kernel.org> # v4.18+ Link: https://lore.kernel.org/r/20211209010455.42744-5-ebiggers@kernel.org Signed-off-by: Eric Biggers <ebiggers@google.com> (cherry picked from commit 363bee2) Signed-off-by: Brett Mastbergen <bmastbergen@ciq.com>
1 parent 217ea6f commit 12c4368

File tree

1 file changed

+63
-20
lines changed

1 file changed

+63
-20
lines changed

fs/aio.c

Lines changed: 63 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -181,8 +181,9 @@ struct poll_iocb {
181181
struct file *file;
182182
struct wait_queue_head *head;
183183
__poll_t events;
184-
bool done;
185184
bool cancelled;
185+
bool work_scheduled;
186+
bool work_need_resched;
186187
struct wait_queue_entry wait;
187188
struct work_struct work;
188189
};
@@ -1642,14 +1643,26 @@ static void aio_poll_complete_work(struct work_struct *work)
16421643
* avoid further branches in the fast path.
16431644
*/
16441645
spin_lock_irq(&ctx->ctx_lock);
1646+
spin_lock(&req->head->lock);
16451647
if (!mask && !READ_ONCE(req->cancelled)) {
1646-
add_wait_queue(req->head, &req->wait);
1648+
/*
1649+
* The request isn't actually ready to be completed yet.
1650+
* Reschedule completion if another wakeup came in.
1651+
*/
1652+
if (req->work_need_resched) {
1653+
schedule_work(&req->work);
1654+
req->work_need_resched = false;
1655+
} else {
1656+
req->work_scheduled = false;
1657+
}
1658+
spin_unlock(&req->head->lock);
16471659
spin_unlock_irq(&ctx->ctx_lock);
16481660
return;
16491661
}
1662+
list_del_init(&req->wait.entry);
1663+
spin_unlock(&req->head->lock);
16501664
list_del_init(&iocb->ki_list);
16511665
iocb->ki_res.res = mangle_poll(mask);
1652-
req->done = true;
16531666
spin_unlock_irq(&ctx->ctx_lock);
16541667

16551668
iocb_put(iocb);
@@ -1663,9 +1676,9 @@ static int aio_poll_cancel(struct kiocb *iocb)
16631676

16641677
spin_lock(&req->head->lock);
16651678
WRITE_ONCE(req->cancelled, true);
1666-
if (!list_empty(&req->wait.entry)) {
1667-
list_del_init(&req->wait.entry);
1679+
if (!req->work_scheduled) {
16681680
schedule_work(&aiocb->poll.work);
1681+
req->work_scheduled = true;
16691682
}
16701683
spin_unlock(&req->head->lock);
16711684

@@ -1684,20 +1697,26 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
16841697
if (mask && !(mask & req->events))
16851698
return 0;
16861699

1687-
list_del_init(&req->wait.entry);
1688-
1689-
if (mask && spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) {
1700+
/*
1701+
* Complete the request inline if possible. This requires that three
1702+
* conditions be met:
1703+
* 1. An event mask must have been passed. If a plain wakeup was done
1704+
* instead, then mask == 0 and we have to call vfs_poll() to get
1705+
* the events, so inline completion isn't possible.
1706+
* 2. The completion work must not have already been scheduled.
1707+
* 3. ctx_lock must not be busy. We have to use trylock because we
1708+
* already hold the waitqueue lock, so this inverts the normal
1709+
* locking order. Use irqsave/irqrestore because not all
1710+
* filesystems (e.g. fuse) call this function with IRQs disabled,
1711+
* yet IRQs have to be disabled before ctx_lock is obtained.
1712+
*/
1713+
if (mask && !req->work_scheduled &&
1714+
spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) {
16901715
struct kioctx *ctx = iocb->ki_ctx;
16911716

1692-
/*
1693-
* Try to complete the iocb inline if we can. Use
1694-
* irqsave/irqrestore because not all filesystems (e.g. fuse)
1695-
* call this function with IRQs disabled and because IRQs
1696-
* have to be disabled before ctx_lock is obtained.
1697-
*/
1717+
list_del_init(&req->wait.entry);
16981718
list_del(&iocb->ki_list);
16991719
iocb->ki_res.res = mangle_poll(mask);
1700-
req->done = true;
17011720
if (iocb->ki_eventfd && eventfd_signal_allowed()) {
17021721
iocb = NULL;
17031722
INIT_WORK(&req->work, aio_poll_put_work);
@@ -1707,7 +1726,20 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
17071726
if (iocb)
17081727
iocb_put(iocb);
17091728
} else {
1710-
schedule_work(&req->work);
1729+
/*
1730+
* Schedule the completion work if needed. If it was already
1731+
* scheduled, record that another wakeup came in.
1732+
*
1733+
* Don't remove the request from the waitqueue here, as it might
1734+
* not actually be complete yet (we won't know until vfs_poll()
1735+
* is called), and we must not miss any wakeups.
1736+
*/
1737+
if (req->work_scheduled) {
1738+
req->work_need_resched = true;
1739+
} else {
1740+
schedule_work(&req->work);
1741+
req->work_scheduled = true;
1742+
}
17111743
}
17121744
return 1;
17131745
}
@@ -1754,8 +1786,9 @@ static int aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
17541786
req->events = demangle_poll(iocb->aio_buf) | EPOLLERR | EPOLLHUP;
17551787

17561788
req->head = NULL;
1757-
req->done = false;
17581789
req->cancelled = false;
1790+
req->work_scheduled = false;
1791+
req->work_need_resched = false;
17591792

17601793
apt.pt._qproc = aio_poll_queue_proc;
17611794
apt.pt._key = req->events;
@@ -1770,17 +1803,27 @@ static int aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
17701803
spin_lock_irq(&ctx->ctx_lock);
17711804
if (likely(req->head)) {
17721805
spin_lock(&req->head->lock);
1773-
if (unlikely(list_empty(&req->wait.entry))) {
1774-
if (apt.error)
1806+
if (list_empty(&req->wait.entry) || req->work_scheduled) {
1807+
/*
1808+
* aio_poll_wake() already either scheduled the async
1809+
* completion work, or completed the request inline.
1810+
*/
1811+
if (apt.error) /* unsupported case: multiple queues */
17751812
cancel = true;
17761813
apt.error = 0;
17771814
mask = 0;
17781815
}
17791816
if (mask || apt.error) {
1817+
/* Steal to complete synchronously. */
17801818
list_del_init(&req->wait.entry);
17811819
} else if (cancel) {
1820+
/* Cancel if possible (may be too late though). */
17821821
WRITE_ONCE(req->cancelled, true);
1783-
} else if (!req->done) { /* actually waiting for an event */
1822+
} else if (!list_empty(&req->wait.entry)) {
1823+
/*
1824+
* Actually waiting for an event, so add the request to
1825+
* active_reqs so that it can be cancelled if needed.
1826+
*/
17841827
list_add_tail(&aiocb->ki_list, &ctx->active_reqs);
17851828
aiocb->ki_cancel = aio_poll_cancel;
17861829
}

0 commit comments

Comments
 (0)