Skip to content

Commit 491ea1c

Browse files
committed
Merge: CVE-2024-38608 kernel: net/mlx5e: Fix netif state handling
MR: https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-9/-/merge_requests/4664 JIRA: https://issues.redhat.com/browse/RHEL-43870 CVE: CVE-2024-38608 ``` commit 3d59184 Author: Shay Drory <shayd@nvidia.com> Date: Thu May 9 14:29:47 2024 +0300 net/mlx5e: Fix netif state handling mlx5e_suspend cleans resources only if netif_device_present() returns true. However, mlx5e_resume changes the state of netif, via mlx5e_nic_enable, only if reg_state == NETREG_REGISTERED. In the below case, the above leads to NULL-ptr Oops[1] and memory leaks: mlx5e_probe _mlx5e_resume mlx5e_attach_netdev mlx5e_nic_enable <-- netdev not reg, not calling netif_device_attach() register_netdev <-- failed for some reason. ERROR_FLOW: _mlx5e_suspend <-- netif_device_present return false, resources aren't freed :( Hence, clean resources in this case as well. [1] BUG: kernel NULL pointer dereference, address: 0000000000000000 PGD 0 P4D 0 Oops: 0010 [#1] SMP CPU: 2 PID: 9345 Comm: test-ovs-ct-gen Not tainted 6.5.0_for_upstream_min_debug_2023_09_05_16_01 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:0x0 Code: Unable to access opcode bytes at0xffffffffffffffd6. RSP: 0018:ffff888178aaf758 EFLAGS: 00010246 Call Trace: <TASK> ? __die+0x20/0x60 ? page_fault_oops+0x14c/0x3c0 ? exc_page_fault+0x75/0x140 ? asm_exc_page_fault+0x22/0x30 notifier_call_chain+0x35/0xb0 blocking_notifier_call_chain+0x3d/0x60 mlx5_blocking_notifier_call_chain+0x22/0x30 [mlx5_core] mlx5_core_uplink_netdev_event_replay+0x3e/0x60 [mlx5_core] mlx5_mdev_netdev_track+0x53/0x60 [mlx5_ib] mlx5_ib_roce_init+0xc3/0x340 [mlx5_ib] __mlx5_ib_add+0x34/0xd0 [mlx5_ib] mlx5r_probe+0xe1/0x210 [mlx5_ib] ? auxiliary_match_id+0x6a/0x90 auxiliary_bus_probe+0x38/0x80 ? driver_sysfs_add+0x51/0x80 really_probe+0xc9/0x3e0 ? driver_probe_device+0x90/0x90 __driver_probe_device+0x80/0x160 driver_probe_device+0x1e/0x90 __device_attach_driver+0x7d/0x100 bus_for_each_drv+0x80/0xd0 __device_attach+0xbc/0x1f0 bus_probe_device+0x86/0xa0 device_add+0x637/0x840 __auxiliary_device_add+0x3b/0xa0 add_adev+0xc9/0x140 [mlx5_core] mlx5_rescan_drivers_locked+0x22a/0x310 [mlx5_core] mlx5_register_device+0x53/0xa0 [mlx5_core] mlx5_init_one_devl_locked+0x5c4/0x9c0 [mlx5_core] mlx5_init_one+0x3b/0x60 [mlx5_core] probe_one+0x44c/0x730 [mlx5_core] local_pci_probe+0x3e/0x90 pci_device_probe+0xbf/0x210 ? kernfs_create_link+0x5d/0xa0 ? sysfs_do_create_link_sd+0x60/0xc0 really_probe+0xc9/0x3e0 ? driver_probe_device+0x90/0x90 __driver_probe_device+0x80/0x160 driver_probe_device+0x1e/0x90 __device_attach_driver+0x7d/0x100 bus_for_each_drv+0x80/0xd0 __device_attach+0xbc/0x1f0 pci_bus_add_device+0x54/0x80 pci_iov_add_virtfn+0x2e6/0x320 sriov_enable+0x208/0x420 mlx5_core_sriov_configure+0x9e/0x200 [mlx5_core] sriov_numvfs_store+0xae/0x1a0 kernfs_fop_write_iter+0x10c/0x1a0 vfs_write+0x291/0x3c0 ksys_write+0x5f/0xe0 do_syscall_64+0x3d/0x90 entry_SYSCALL_64_after_hwframe+0x46/0xb0 CR2: 0000000000000000 ---[ end trace 0000000000000000 ]--- Fixes: 2c3b5be ("net/mlx5e: More generic netdev management API") Signed-off-by: Shay Drory <shayd@nvidia.com> Signed-off-by: Tariq Toukan <tariqt@nvidia.com> Reviewed-by: Simon Horman <horms@kernel.org> Link: https://lore.kernel.org/r/20240509112951.590184-2-tariqt@nvidia.com Signed-off-by: Jakub Kicinski <kuba@kernel.org> ``` Depends on b1a33e6 net/mlx5e: Add wrapping for auxiliary_driver ops and remove unused args (v6.8-rc1) To avoid this dependency, the same changes to mlx5e_suspend() would have to be introduced. Signed-off-by: Benjamin Poirier <bpoirier@redhat.com> Approved-by: Kamal Heib <kheib@redhat.com> Approved-by: John B. Wyatt IV <jwyatt@redhat.com> Approved-by: CKI KWF Bot <cki-ci-bot+kwf-gitlab-com@redhat.com> Merged-by: Lucas Zampieri <lzampier@redhat.com>
2 parents 867ceac + 6b31da8 commit 491ea1c

File tree

1 file changed

+16
-8
lines changed
  • drivers/net/ethernet/mellanox/mlx5/core

1 file changed

+16
-8
lines changed

drivers/net/ethernet/mellanox/mlx5/core/en_main.c

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6067,14 +6067,14 @@ static int mlx5e_resume(struct auxiliary_device *adev)
60676067
return 0;
60686068
}
60696069

6070-
static int mlx5e_suspend(struct auxiliary_device *adev, pm_message_t state)
6070+
static int _mlx5e_suspend(struct auxiliary_device *adev, bool pre_netdev_reg)
60716071
{
60726072
struct mlx5e_dev *mlx5e_dev = auxiliary_get_drvdata(adev);
60736073
struct mlx5e_priv *priv = mlx5e_dev->priv;
60746074
struct net_device *netdev = priv->netdev;
60756075
struct mlx5_core_dev *mdev = priv->mdev;
60766076

6077-
if (!netif_device_present(netdev)) {
6077+
if (!pre_netdev_reg && !netif_device_present(netdev)) {
60786078
if (test_bit(MLX5E_STATE_DESTROYING, &priv->state))
60796079
mlx5e_destroy_mdev_resources(mdev);
60806080
return -ENODEV;
@@ -6085,15 +6085,18 @@ static int mlx5e_suspend(struct auxiliary_device *adev, pm_message_t state)
60856085
return 0;
60866086
}
60876087

6088-
static int mlx5e_probe(struct auxiliary_device *adev,
6089-
const struct auxiliary_device_id *id)
6088+
static int mlx5e_suspend(struct auxiliary_device *adev, pm_message_t state)
6089+
{
6090+
return _mlx5e_suspend(adev, false);
6091+
}
6092+
6093+
static int _mlx5e_probe(struct auxiliary_device *adev)
60906094
{
60916095
struct mlx5_adev *edev = container_of(adev, struct mlx5_adev, adev);
60926096
const struct mlx5e_profile *profile = &mlx5e_nic_profile;
60936097
struct mlx5_core_dev *mdev = edev->mdev;
60946098
struct mlx5e_dev *mlx5e_dev;
60956099
struct net_device *netdev;
6096-
pm_message_t state = {};
60976100
struct mlx5e_priv *priv;
60986101
int err;
60996102

@@ -6148,7 +6151,7 @@ static int mlx5e_probe(struct auxiliary_device *adev,
61486151
return 0;
61496152

61506153
err_resume:
6151-
mlx5e_suspend(adev, state);
6154+
_mlx5e_suspend(adev, true);
61526155
err_profile_cleanup:
61536156
profile->cleanup(priv);
61546157
err_destroy_netdev:
@@ -6160,16 +6163,21 @@ static int mlx5e_probe(struct auxiliary_device *adev,
61606163
return err;
61616164
}
61626165

6166+
static int mlx5e_probe(struct auxiliary_device *adev,
6167+
const struct auxiliary_device_id *id)
6168+
{
6169+
return _mlx5e_probe(adev);
6170+
}
6171+
61636172
static void mlx5e_remove(struct auxiliary_device *adev)
61646173
{
61656174
struct mlx5e_dev *mlx5e_dev = auxiliary_get_drvdata(adev);
61666175
struct mlx5e_priv *priv = mlx5e_dev->priv;
6167-
pm_message_t state = {};
61686176

61696177
mlx5_core_uplink_netdev_set(priv->mdev, NULL);
61706178
mlx5e_dcbnl_delete_app(priv);
61716179
unregister_netdev(priv->netdev);
6172-
mlx5e_suspend(adev, state);
6180+
_mlx5e_suspend(adev, false);
61736181
priv->profile->cleanup(priv);
61746182
mlx5e_destroy_netdev(priv);
61756183
mlx5e_devlink_port_unregister(mlx5e_dev);

0 commit comments

Comments
 (0)