Skip to content

Commit fe01891

Browse files
committed
aio: fix use-after-free due to missing POLLFREE handling
jira VULN-63551 cve CVE-2021-47505 commit-author Eric Biggers <ebiggers@google.com> commit 50252e4 signalfd_poll() and binder_poll() are special in that they use a waitqueue whose lifetime is the current task, rather than the struct file as is normally the case. This is okay for blocking polls, since a blocking poll occurs within one task; however, non-blocking polls require another solution. This solution is for the queue to be cleared before it is freed, by sending a POLLFREE notification to all waiters. Unfortunately, only eventpoll handles POLLFREE. A second type of non-blocking poll, aio poll, was added in kernel v4.18, and it doesn't handle POLLFREE. This allows a use-after-free to occur if a signalfd or binder fd is polled with aio poll, and the waitqueue gets freed. Fix this by making aio poll handle POLLFREE. A patch by Ramji Jiyani <ramjiyani@google.com> (https://lore.kernel.org/r/20211027011834.2497484-1-ramjiyani@google.com) tried to do this by making aio_poll_wake() always complete the request inline if POLLFREE is seen. However, that solution had two bugs. First, it introduced a deadlock, as it unconditionally locked the aio context while holding the waitqueue lock, which inverts the normal locking order. Second, it didn't consider that POLLFREE notifications are missed while the request has been temporarily de-queued. The second problem was solved by my previous patch. This patch then properly fixes the use-after-free by handling POLLFREE in a deadlock-free way. It does this by taking advantage of the fact that freeing of the waitqueue is RCU-delayed, similar to what eventpoll does. Fixes: 2c14fa8 ("aio: implement IOCB_CMD_POLL") Cc: <stable@vger.kernel.org> # v4.18+ Link: https://lore.kernel.org/r/20211209010455.42744-6-ebiggers@kernel.org Signed-off-by: Eric Biggers <ebiggers@google.com> (cherry picked from commit 50252e4) Signed-off-by: Brett Mastbergen <bmastbergen@ciq.com>
1 parent ca7a22d commit fe01891

File tree

2 files changed

+107
-32
lines changed

2 files changed

+107
-32
lines changed

fs/aio.c

Lines changed: 106 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1624,6 +1624,51 @@ static void aio_poll_put_work(struct work_struct *work)
16241624
iocb_put(iocb);
16251625
}
16261626

1627+
/*
1628+
* Safely lock the waitqueue which the request is on, synchronizing with the
1629+
* case where the ->poll() provider decides to free its waitqueue early.
1630+
*
1631+
* Returns true on success, meaning that req->head->lock was locked, req->wait
1632+
* is on req->head, and an RCU read lock was taken. Returns false if the
1633+
* request was already removed from its waitqueue (which might no longer exist).
1634+
*/
1635+
static bool poll_iocb_lock_wq(struct poll_iocb *req)
1636+
{
1637+
wait_queue_head_t *head;
1638+
1639+
/*
1640+
* While we hold the waitqueue lock and the waitqueue is nonempty,
1641+
* wake_up_pollfree() will wait for us. However, taking the waitqueue
1642+
* lock in the first place can race with the waitqueue being freed.
1643+
*
1644+
* We solve this as eventpoll does: by taking advantage of the fact that
1645+
* all users of wake_up_pollfree() will RCU-delay the actual free. If
1646+
* we enter rcu_read_lock() and see that the pointer to the queue is
1647+
* non-NULL, we can then lock it without the memory being freed out from
1648+
* under us, then check whether the request is still on the queue.
1649+
*
1650+
* Keep holding rcu_read_lock() as long as we hold the queue lock, in
1651+
* case the caller deletes the entry from the queue, leaving it empty.
1652+
* In that case, only RCU prevents the queue memory from being freed.
1653+
*/
1654+
rcu_read_lock();
1655+
head = smp_load_acquire(&req->head);
1656+
if (head) {
1657+
spin_lock(&head->lock);
1658+
if (!list_empty(&req->wait.entry))
1659+
return true;
1660+
spin_unlock(&head->lock);
1661+
}
1662+
rcu_read_unlock();
1663+
return false;
1664+
}
1665+
1666+
static void poll_iocb_unlock_wq(struct poll_iocb *req)
1667+
{
1668+
spin_unlock(&req->head->lock);
1669+
rcu_read_unlock();
1670+
}
1671+
16271672
static void aio_poll_complete_work(struct work_struct *work)
16281673
{
16291674
struct poll_iocb *req = container_of(work, struct poll_iocb, work);
@@ -1643,24 +1688,25 @@ static void aio_poll_complete_work(struct work_struct *work)
16431688
* avoid further branches in the fast path.
16441689
*/
16451690
spin_lock_irq(&ctx->ctx_lock);
1646-
spin_lock(&req->head->lock);
1647-
if (!mask && !READ_ONCE(req->cancelled)) {
1648-
/*
1649-
* The request isn't actually ready to be completed yet.
1650-
* Reschedule completion if another wakeup came in.
1651-
*/
1652-
if (req->work_need_resched) {
1653-
schedule_work(&req->work);
1654-
req->work_need_resched = false;
1655-
} else {
1656-
req->work_scheduled = false;
1691+
if (poll_iocb_lock_wq(req)) {
1692+
if (!mask && !READ_ONCE(req->cancelled)) {
1693+
/*
1694+
* The request isn't actually ready to be completed yet.
1695+
* Reschedule completion if another wakeup came in.
1696+
*/
1697+
if (req->work_need_resched) {
1698+
schedule_work(&req->work);
1699+
req->work_need_resched = false;
1700+
} else {
1701+
req->work_scheduled = false;
1702+
}
1703+
poll_iocb_unlock_wq(req);
1704+
spin_unlock_irq(&ctx->ctx_lock);
1705+
return;
16571706
}
1658-
spin_unlock(&req->head->lock);
1659-
spin_unlock_irq(&ctx->ctx_lock);
1660-
return;
1661-
}
1662-
list_del_init(&req->wait.entry);
1663-
spin_unlock(&req->head->lock);
1707+
list_del_init(&req->wait.entry);
1708+
poll_iocb_unlock_wq(req);
1709+
} /* else, POLLFREE has freed the waitqueue, so we must complete */
16641710
list_del_init(&iocb->ki_list);
16651711
iocb->ki_res.res = mangle_poll(mask);
16661712
spin_unlock_irq(&ctx->ctx_lock);
@@ -1674,13 +1720,14 @@ static int aio_poll_cancel(struct kiocb *iocb)
16741720
struct aio_kiocb *aiocb = container_of(iocb, struct aio_kiocb, rw);
16751721
struct poll_iocb *req = &aiocb->poll;
16761722

1677-
spin_lock(&req->head->lock);
1678-
WRITE_ONCE(req->cancelled, true);
1679-
if (!req->work_scheduled) {
1680-
schedule_work(&aiocb->poll.work);
1681-
req->work_scheduled = true;
1682-
}
1683-
spin_unlock(&req->head->lock);
1723+
if (poll_iocb_lock_wq(req)) {
1724+
WRITE_ONCE(req->cancelled, true);
1725+
if (!req->work_scheduled) {
1726+
schedule_work(&aiocb->poll.work);
1727+
req->work_scheduled = true;
1728+
}
1729+
poll_iocb_unlock_wq(req);
1730+
} /* else, the request was force-cancelled by POLLFREE already */
16841731

16851732
return 0;
16861733
}
@@ -1732,21 +1779,45 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
17321779
*
17331780
* Don't remove the request from the waitqueue here, as it might
17341781
* not actually be complete yet (we won't know until vfs_poll()
1735-
* is called), and we must not miss any wakeups.
1782+
* is called), and we must not miss any wakeups. POLLFREE is an
1783+
* exception to this; see below.
17361784
*/
17371785
if (req->work_scheduled) {
17381786
req->work_need_resched = true;
17391787
} else {
17401788
schedule_work(&req->work);
17411789
req->work_scheduled = true;
17421790
}
1791+
1792+
/*
1793+
* If the waitqueue is being freed early but we can't complete
1794+
* the request inline, we have to tear down the request as best
1795+
* we can. That means immediately removing the request from its
1796+
* waitqueue and preventing all further accesses to the
1797+
* waitqueue via the request. We also need to schedule the
1798+
* completion work (done above). Also mark the request as
1799+
* cancelled, to potentially skip an unneeded call to ->poll().
1800+
*/
1801+
if (mask & POLLFREE) {
1802+
WRITE_ONCE(req->cancelled, true);
1803+
list_del_init(&req->wait.entry);
1804+
1805+
/*
1806+
* Careful: this *must* be the last step, since as soon
1807+
* as req->head is NULL'ed out, the request can be
1808+
* completed and freed, since aio_poll_complete_work()
1809+
* will no longer need to take the waitqueue lock.
1810+
*/
1811+
smp_store_release(&req->head, NULL);
1812+
}
17431813
}
17441814
return 1;
17451815
}
17461816

17471817
struct aio_poll_table {
17481818
struct poll_table_struct pt;
17491819
struct aio_kiocb *iocb;
1820+
bool queued;
17501821
int error;
17511822
};
17521823

@@ -1757,11 +1828,12 @@ aio_poll_queue_proc(struct file *file, struct wait_queue_head *head,
17571828
struct aio_poll_table *pt = container_of(p, struct aio_poll_table, pt);
17581829

17591830
/* multiple wait queues per file are not supported */
1760-
if (unlikely(pt->iocb->poll.head)) {
1831+
if (unlikely(pt->queued)) {
17611832
pt->error = -EINVAL;
17621833
return;
17631834
}
17641835

1836+
pt->queued = true;
17651837
pt->error = 0;
17661838
pt->iocb->poll.head = head;
17671839
add_wait_queue(head, &pt->iocb->poll.wait);
@@ -1793,6 +1865,7 @@ static int aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
17931865
apt.pt._qproc = aio_poll_queue_proc;
17941866
apt.pt._key = req->events;
17951867
apt.iocb = aiocb;
1868+
apt.queued = false;
17961869
apt.error = -EINVAL; /* same as no support for IOCB_CMD_POLL */
17971870

17981871
/* initialized the list so that we can do list_empty checks */
@@ -1801,9 +1874,10 @@ static int aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
18011874

18021875
mask = vfs_poll(req->file, &apt.pt) & req->events;
18031876
spin_lock_irq(&ctx->ctx_lock);
1804-
if (likely(req->head)) {
1805-
spin_lock(&req->head->lock);
1806-
if (list_empty(&req->wait.entry) || req->work_scheduled) {
1877+
if (likely(apt.queued)) {
1878+
bool on_queue = poll_iocb_lock_wq(req);
1879+
1880+
if (!on_queue || req->work_scheduled) {
18071881
/*
18081882
* aio_poll_wake() already either scheduled the async
18091883
* completion work, or completed the request inline.
@@ -1819,15 +1893,16 @@ static int aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
18191893
} else if (cancel) {
18201894
/* Cancel if possible (may be too late though). */
18211895
WRITE_ONCE(req->cancelled, true);
1822-
} else if (!list_empty(&req->wait.entry)) {
1896+
} else if (on_queue) {
18231897
/*
18241898
* Actually waiting for an event, so add the request to
18251899
* active_reqs so that it can be cancelled if needed.
18261900
*/
18271901
list_add_tail(&aiocb->ki_list, &ctx->active_reqs);
18281902
aiocb->ki_cancel = aio_poll_cancel;
18291903
}
1830-
spin_unlock(&req->head->lock);
1904+
if (on_queue)
1905+
poll_iocb_unlock_wq(req);
18311906
}
18321907
if (mask) { /* no async, we'd stolen it */
18331908
aiocb->ki_res.res = mangle_poll(mask);

include/uapi/asm-generic/poll.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
#define POLLRDHUP 0x2000
3030
#endif
3131

32-
#define POLLFREE (__force __poll_t)0x4000 /* currently only for epoll */
32+
#define POLLFREE (__force __poll_t)0x4000
3333

3434
#define POLL_BUSY_LOOP (__force __poll_t)0x8000
3535

0 commit comments

Comments
 (0)