|
| 1 | +net/mlx5: Add a timeout to acquire the command queue semaphore |
| 2 | + |
| 3 | +jira LE-1907 |
| 4 | +cve CVE-2024-38556 |
| 5 | +Rebuild_History Non-Buildable kernel-5.14.0-427.40.1.el9_4 |
| 6 | +commit-author Akiva Goldberger <agoldberger@nvidia.com> |
| 7 | +commit 485d65e1357123a697c591a5aeb773994b247ad7 |
| 8 | +Empty-Commit: Cherry-Pick Conflicts during history rebuild. |
| 9 | +Will be included in final tarball splat. Ref for failed cherry-pick at: |
| 10 | +ciq/ciq_backports/kernel-5.14.0-427.40.1.el9_4/485d65e1.failed |
| 11 | + |
| 12 | +Prevent forced completion handling on an entry that has not yet been |
| 13 | +assigned an index, causing an out of bounds access on idx = -22. |
| 14 | +Instead of waiting indefinitely for the sem, blocking flow now waits for |
| 15 | +index to be allocated or a sem acquisition timeout before beginning the |
| 16 | +timer for FW completion. |
| 17 | + |
| 18 | +Kernel log example: |
| 19 | +mlx5_core 0000:06:00.0: wait_func_handle_exec_timeout:1128:(pid 185911): cmd[-22]: CREATE_UCTX(0xa04) No done completion |
| 20 | + |
| 21 | +Fixes: 8e715cd613a1 ("net/mlx5: Set command entry semaphore up once got index free") |
| 22 | + Signed-off-by: Akiva Goldberger <agoldberger@nvidia.com> |
| 23 | + Reviewed-by: Moshe Shemesh <moshe@nvidia.com> |
| 24 | + Signed-off-by: Tariq Toukan <tariqt@nvidia.com> |
| 25 | +Link: https://lore.kernel.org/r/20240509112951.590184-5-tariqt@nvidia.com |
| 26 | + Signed-off-by: Jakub Kicinski <kuba@kernel.org> |
| 27 | +(cherry picked from commit 485d65e1357123a697c591a5aeb773994b247ad7) |
| 28 | + Signed-off-by: Jonathan Maple <jmaple@ciq.com> |
| 29 | + |
| 30 | +# Conflicts: |
| 31 | +# drivers/net/ethernet/mellanox/mlx5/core/cmd.c |
| 32 | +diff --cc drivers/net/ethernet/mellanox/mlx5/core/cmd.c |
| 33 | +index d532883b42d7,511e7fee39ac..000000000000 |
| 34 | +--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c |
| 35 | ++++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c |
| 36 | +@@@ -970,14 -974,28 +969,35 @@@ static void cmd_work_handler(struct wor |
| 37 | + int alloc_ret; |
| 38 | + int cmd_mode; |
| 39 | + |
| 40 | +- dev = container_of(cmd, struct mlx5_core_dev, cmd); |
| 41 | +- cb_timeout = msecs_to_jiffies(mlx5_tout_ms(dev, CMD)); |
| 42 | +- |
| 43 | + complete(&ent->handling); |
| 44 | +++<<<<<<< HEAD |
| 45 | + + sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem; |
| 46 | + + down(sem); |
| 47 | + + if (!ent->page_queue) { |
| 48 | + + alloc_ret = cmd_alloc_index(cmd); |
| 49 | +++======= |
| 50 | ++ |
| 51 | ++ dev = container_of(cmd, struct mlx5_core_dev, cmd); |
| 52 | ++ timeout = msecs_to_jiffies(mlx5_tout_ms(dev, CMD)); |
| 53 | ++ |
| 54 | ++ if (!ent->page_queue) { |
| 55 | ++ if (down_timeout(&cmd->vars.sem, timeout)) { |
| 56 | ++ mlx5_core_warn(dev, "%s(0x%x) timed out while waiting for a slot.\n", |
| 57 | ++ mlx5_command_str(ent->op), ent->op); |
| 58 | ++ if (ent->callback) { |
| 59 | ++ ent->callback(-EBUSY, ent->context); |
| 60 | ++ mlx5_free_cmd_msg(dev, ent->out); |
| 61 | ++ free_msg(dev, ent->in); |
| 62 | ++ cmd_ent_put(ent); |
| 63 | ++ } else { |
| 64 | ++ ent->ret = -EBUSY; |
| 65 | ++ complete(&ent->done); |
| 66 | ++ } |
| 67 | ++ complete(&ent->slotted); |
| 68 | ++ return; |
| 69 | ++ } |
| 70 | ++ alloc_ret = cmd_alloc_index(cmd, ent); |
| 71 | +++>>>>>>> 485d65e13571 (net/mlx5: Add a timeout to acquire the command queue semaphore) |
| 72 | + if (alloc_ret < 0) { |
| 73 | + mlx5_core_err_rl(dev, "failed to allocate command entry\n"); |
| 74 | + if (ent->callback) { |
| 75 | +@@@ -989,18 -1007,20 +1009,28 @@@ |
| 76 | + ent->ret = -EAGAIN; |
| 77 | + complete(&ent->done); |
| 78 | + } |
| 79 | +- up(sem); |
| 80 | ++ up(&cmd->vars.sem); |
| 81 | + return; |
| 82 | + } |
| 83 | + + ent->idx = alloc_ret; |
| 84 | + } else { |
| 85 | +++<<<<<<< HEAD |
| 86 | + + ent->idx = cmd->max_reg_cmds; |
| 87 | +++======= |
| 88 | ++ down(&cmd->vars.pages_sem); |
| 89 | ++ ent->idx = cmd->vars.max_reg_cmds; |
| 90 | +++>>>>>>> 485d65e13571 (net/mlx5: Add a timeout to acquire the command queue semaphore) |
| 91 | + spin_lock_irqsave(&cmd->alloc_lock, flags); |
| 92 | + - clear_bit(ent->idx, &cmd->vars.bitmask); |
| 93 | + - cmd->ent_arr[ent->idx] = ent; |
| 94 | + + clear_bit(ent->idx, &cmd->bitmask); |
| 95 | + spin_unlock_irqrestore(&cmd->alloc_lock, flags); |
| 96 | + } |
| 97 | + |
| 98 | +++<<<<<<< HEAD |
| 99 | + + cmd->ent_arr[ent->idx] = ent; |
| 100 | +++======= |
| 101 | ++ complete(&ent->slotted); |
| 102 | ++ |
| 103 | +++>>>>>>> 485d65e13571 (net/mlx5: Add a timeout to acquire the command queue semaphore) |
| 104 | + lay = get_inst(cmd, ent->idx); |
| 105 | + ent->lay = lay; |
| 106 | + memset(lay, 0, sizeof(*lay)); |
| 107 | +* Unmerged path drivers/net/ethernet/mellanox/mlx5/core/cmd.c |
| 108 | +diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h |
| 109 | +index 4b9626cd83e4..22fc69e3c0bc 100644 |
| 110 | +--- a/include/linux/mlx5/driver.h |
| 111 | ++++ b/include/linux/mlx5/driver.h |
| 112 | +@@ -851,6 +851,7 @@ struct mlx5_cmd_work_ent { |
| 113 | + void *context; |
| 114 | + int idx; |
| 115 | + struct completion handling; |
| 116 | ++ struct completion slotted; |
| 117 | + struct completion done; |
| 118 | + struct mlx5_cmd *cmd; |
| 119 | + struct work_struct work; |
0 commit comments