Skip to content

Commit 97fa5bf

Browse files
committed
Merge: CVE-2024-38556 kernel: net/mlx5: Add a timeout to acquire the command queue semaphore
MR: https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-9/-/merge_requests/4665 JIRA: https://issues.redhat.com/browse/RHEL-44225 CVE: CVE-2024-38556 ``` commit 485d65e Author: Akiva Goldberger <agoldberger@nvidia.com> Date: Thu May 9 14:29:50 2024 +0300 net/mlx5: Add a timeout to acquire the command queue semaphore Prevent forced completion handling on an entry that has not yet been assigned an index, causing an out of bounds access on idx = -22. Instead of waiting indefinitely for the sem, blocking flow now waits for index to be allocated or a sem acquisition timeout before beginning the timer for FW completion. Kernel log example: mlx5_core 0000:06:00.0: wait_func_handle_exec_timeout:1128:(pid 185911): cmd[-22]: CREATE_UCTX(0xa04) No done completion Fixes: 8e715cd ("net/mlx5: Set command entry semaphore up once got index free") Signed-off-by: Akiva Goldberger <agoldberger@nvidia.com> Reviewed-by: Moshe Shemesh <moshe@nvidia.com> Signed-off-by: Tariq Toukan <tariqt@nvidia.com> Link: https://lore.kernel.org/r/20240509112951.590184-5-tariqt@nvidia.com Signed-off-by: Jakub Kicinski <kuba@kernel.org> ``` Signed-off-by: Benjamin Poirier <bpoirier@redhat.com> Approved-by: Kamal Heib <kheib@redhat.com> Approved-by: José Ignacio Tornos Martínez <jtornosm@redhat.com> Approved-by: CKI KWF Bot <cki-ci-bot+kwf-gitlab-com@redhat.com> Merged-by: Lucas Zampieri <lzampier@redhat.com>
2 parents 811e21c + dc7af7d commit 97fa5bf

File tree

2 files changed

+33
-9
lines changed
  • drivers/net/ethernet/mellanox/mlx5/core
  • include/linux/mlx5

2 files changed

+33
-9
lines changed

drivers/net/ethernet/mellanox/mlx5/core/cmd.c

Lines changed: 32 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -966,19 +966,32 @@ static void cmd_work_handler(struct work_struct *work)
966966
bool poll_cmd = ent->polling;
967967
struct mlx5_cmd_layout *lay;
968968
struct mlx5_core_dev *dev;
969-
unsigned long cb_timeout;
970-
struct semaphore *sem;
969+
unsigned long timeout;
971970
unsigned long flags;
972971
int alloc_ret;
973972
int cmd_mode;
974973

974+
complete(&ent->handling);
975+
975976
dev = container_of(cmd, struct mlx5_core_dev, cmd);
976-
cb_timeout = msecs_to_jiffies(mlx5_tout_ms(dev, CMD));
977+
timeout = msecs_to_jiffies(mlx5_tout_ms(dev, CMD));
977978

978-
complete(&ent->handling);
979-
sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem;
980-
down(sem);
981979
if (!ent->page_queue) {
980+
if (down_timeout(&cmd->sem, timeout)) {
981+
mlx5_core_warn(dev, "%s(0x%x) timed out while waiting for a slot.\n",
982+
mlx5_command_str(ent->op), ent->op);
983+
if (ent->callback) {
984+
ent->callback(-EBUSY, ent->context);
985+
mlx5_free_cmd_msg(dev, ent->out);
986+
free_msg(dev, ent->in);
987+
cmd_ent_put(ent);
988+
} else {
989+
ent->ret = -EBUSY;
990+
complete(&ent->done);
991+
}
992+
complete(&ent->slotted);
993+
return;
994+
}
982995
alloc_ret = cmd_alloc_index(cmd);
983996
if (alloc_ret < 0) {
984997
mlx5_core_err_rl(dev, "failed to allocate command entry\n");
@@ -991,17 +1004,20 @@ static void cmd_work_handler(struct work_struct *work)
9911004
ent->ret = -EAGAIN;
9921005
complete(&ent->done);
9931006
}
994-
up(sem);
1007+
up(&cmd->sem);
9951008
return;
9961009
}
9971010
ent->idx = alloc_ret;
9981011
} else {
1012+
down(&cmd->pages_sem);
9991013
ent->idx = cmd->max_reg_cmds;
10001014
spin_lock_irqsave(&cmd->alloc_lock, flags);
10011015
clear_bit(ent->idx, &cmd->bitmask);
10021016
spin_unlock_irqrestore(&cmd->alloc_lock, flags);
10031017
}
10041018

1019+
complete(&ent->slotted);
1020+
10051021
cmd->ent_arr[ent->idx] = ent;
10061022
lay = get_inst(cmd, ent->idx);
10071023
ent->lay = lay;
@@ -1021,7 +1037,7 @@ static void cmd_work_handler(struct work_struct *work)
10211037
ent->ts1 = ktime_get_ns();
10221038
cmd_mode = cmd->mode;
10231039

1024-
if (ent->callback && schedule_delayed_work(&ent->cb_timeout_work, cb_timeout))
1040+
if (ent->callback && schedule_delayed_work(&ent->cb_timeout_work, timeout))
10251041
cmd_ent_get(ent);
10261042
set_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state);
10271043

@@ -1141,6 +1157,9 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
11411157
ent->ret = -ECANCELED;
11421158
goto out_err;
11431159
}
1160+
1161+
wait_for_completion(&ent->slotted);
1162+
11441163
if (cmd->mode == CMD_MODE_POLLING || ent->polling)
11451164
wait_for_completion(&ent->done);
11461165
else if (!wait_for_completion_timeout(&ent->done, timeout))
@@ -1155,6 +1174,9 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
11551174
} else if (err == -ECANCELED) {
11561175
mlx5_core_warn(dev, "%s(0x%x) canceled on out of queue timeout.\n",
11571176
mlx5_command_str(ent->op), ent->op);
1177+
} else if (err == -EBUSY) {
1178+
mlx5_core_warn(dev, "%s(0x%x) timeout while waiting for command semaphore.\n",
1179+
mlx5_command_str(ent->op), ent->op);
11581180
}
11591181
mlx5_core_dbg(dev, "err %d, delivery status %s(%d)\n",
11601182
err, deliv_status_to_str(ent->status), ent->status);
@@ -1206,6 +1228,7 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
12061228
ent->polling = force_polling;
12071229

12081230
init_completion(&ent->handling);
1231+
init_completion(&ent->slotted);
12091232
if (!callback)
12101233
init_completion(&ent->done);
12111234

@@ -1223,7 +1246,7 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
12231246
return 0; /* mlx5_cmd_comp_handler() will put(ent) */
12241247

12251248
err = wait_func(dev, ent);
1226-
if (err == -ETIMEDOUT || err == -ECANCELED)
1249+
if (err == -ETIMEDOUT || err == -ECANCELED || err == -EBUSY)
12271250
goto out_free;
12281251

12291252
ds = ent->ts2 - ent->ts1;

include/linux/mlx5/driver.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -851,6 +851,7 @@ struct mlx5_cmd_work_ent {
851851
void *context;
852852
int idx;
853853
struct completion handling;
854+
struct completion slotted;
854855
struct completion done;
855856
struct mlx5_cmd *cmd;
856857
struct work_struct work;

0 commit comments

Comments
 (0)