|
| 1 | +net/mlx5e: Fix netif state handling |
| 2 | + |
| 3 | +jira LE-1907 |
| 4 | +cve CVE-2024-38608 |
| 5 | +Rebuild_History Non-Buildable kernel-5.14.0-427.33.1.el9_4 |
| 6 | +commit-author Shay Drory <shayd@nvidia.com> |
| 7 | +commit 3d5918477f94e4c2f064567875c475468e264644 |
| 8 | +Empty-Commit: Cherry-Pick Conflicts during history rebuild. |
| 9 | +Will be included in final tarball splat. Ref for failed cherry-pick at: |
| 10 | +ciq/ciq_backports/kernel-5.14.0-427.33.1.el9_4/3d591847.failed |
| 11 | + |
| 12 | +mlx5e_suspend cleans resources only if netif_device_present() returns |
| 13 | +true. However, mlx5e_resume changes the state of netif, via |
| 14 | +mlx5e_nic_enable, only if reg_state == NETREG_REGISTERED. |
| 15 | +In the below case, the above leads to NULL-ptr Oops[1] and memory |
| 16 | +leaks: |
| 17 | + |
| 18 | +mlx5e_probe |
| 19 | + _mlx5e_resume |
| 20 | + mlx5e_attach_netdev |
| 21 | + mlx5e_nic_enable <-- netdev not reg, not calling netif_device_attach() |
| 22 | + register_netdev <-- failed for some reason. |
| 23 | +ERROR_FLOW: |
| 24 | + _mlx5e_suspend <-- netif_device_present return false, resources aren't freed :( |
| 25 | + |
| 26 | +Hence, clean resources in this case as well. |
| 27 | + |
| 28 | +[1] |
| 29 | +BUG: kernel NULL pointer dereference, address: 0000000000000000 |
| 30 | +PGD 0 P4D 0 |
| 31 | +Oops: 0010 [#1] SMP |
| 32 | +CPU: 2 PID: 9345 Comm: test-ovs-ct-gen Not tainted 6.5.0_for_upstream_min_debug_2023_09_05_16_01 #1 |
| 33 | +Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 |
| 34 | +RIP: 0010:0x0 |
| 35 | +Code: Unable to access opcode bytes at0xffffffffffffffd6. |
| 36 | +RSP: 0018:ffff888178aaf758 EFLAGS: 00010246 |
| 37 | +Call Trace: |
| 38 | + <TASK> |
| 39 | + ? __die+0x20/0x60 |
| 40 | + ? page_fault_oops+0x14c/0x3c0 |
| 41 | + ? exc_page_fault+0x75/0x140 |
| 42 | + ? asm_exc_page_fault+0x22/0x30 |
| 43 | + notifier_call_chain+0x35/0xb0 |
| 44 | + blocking_notifier_call_chain+0x3d/0x60 |
| 45 | + mlx5_blocking_notifier_call_chain+0x22/0x30 [mlx5_core] |
| 46 | + mlx5_core_uplink_netdev_event_replay+0x3e/0x60 [mlx5_core] |
| 47 | + mlx5_mdev_netdev_track+0x53/0x60 [mlx5_ib] |
| 48 | + mlx5_ib_roce_init+0xc3/0x340 [mlx5_ib] |
| 49 | + __mlx5_ib_add+0x34/0xd0 [mlx5_ib] |
| 50 | + mlx5r_probe+0xe1/0x210 [mlx5_ib] |
| 51 | + ? auxiliary_match_id+0x6a/0x90 |
| 52 | + auxiliary_bus_probe+0x38/0x80 |
| 53 | + ? driver_sysfs_add+0x51/0x80 |
| 54 | + really_probe+0xc9/0x3e0 |
| 55 | + ? driver_probe_device+0x90/0x90 |
| 56 | + __driver_probe_device+0x80/0x160 |
| 57 | + driver_probe_device+0x1e/0x90 |
| 58 | + __device_attach_driver+0x7d/0x100 |
| 59 | + bus_for_each_drv+0x80/0xd0 |
| 60 | + __device_attach+0xbc/0x1f0 |
| 61 | + bus_probe_device+0x86/0xa0 |
| 62 | + device_add+0x637/0x840 |
| 63 | + __auxiliary_device_add+0x3b/0xa0 |
| 64 | + add_adev+0xc9/0x140 [mlx5_core] |
| 65 | + mlx5_rescan_drivers_locked+0x22a/0x310 [mlx5_core] |
| 66 | + mlx5_register_device+0x53/0xa0 [mlx5_core] |
| 67 | + mlx5_init_one_devl_locked+0x5c4/0x9c0 [mlx5_core] |
| 68 | + mlx5_init_one+0x3b/0x60 [mlx5_core] |
| 69 | + probe_one+0x44c/0x730 [mlx5_core] |
| 70 | + local_pci_probe+0x3e/0x90 |
| 71 | + pci_device_probe+0xbf/0x210 |
| 72 | + ? kernfs_create_link+0x5d/0xa0 |
| 73 | + ? sysfs_do_create_link_sd+0x60/0xc0 |
| 74 | + really_probe+0xc9/0x3e0 |
| 75 | + ? driver_probe_device+0x90/0x90 |
| 76 | + __driver_probe_device+0x80/0x160 |
| 77 | + driver_probe_device+0x1e/0x90 |
| 78 | + __device_attach_driver+0x7d/0x100 |
| 79 | + bus_for_each_drv+0x80/0xd0 |
| 80 | + __device_attach+0xbc/0x1f0 |
| 81 | + pci_bus_add_device+0x54/0x80 |
| 82 | + pci_iov_add_virtfn+0x2e6/0x320 |
| 83 | + sriov_enable+0x208/0x420 |
| 84 | + mlx5_core_sriov_configure+0x9e/0x200 [mlx5_core] |
| 85 | + sriov_numvfs_store+0xae/0x1a0 |
| 86 | + kernfs_fop_write_iter+0x10c/0x1a0 |
| 87 | + vfs_write+0x291/0x3c0 |
| 88 | + ksys_write+0x5f/0xe0 |
| 89 | + do_syscall_64+0x3d/0x90 |
| 90 | + entry_SYSCALL_64_after_hwframe+0x46/0xb0 |
| 91 | + CR2: 0000000000000000 |
| 92 | + ---[ end trace 0000000000000000 ]--- |
| 93 | + |
| 94 | +Fixes: 2c3b5beec46a ("net/mlx5e: More generic netdev management API") |
| 95 | + Signed-off-by: Shay Drory <shayd@nvidia.com> |
| 96 | + Signed-off-by: Tariq Toukan <tariqt@nvidia.com> |
| 97 | + Reviewed-by: Simon Horman <horms@kernel.org> |
| 98 | +Link: https://lore.kernel.org/r/20240509112951.590184-2-tariqt@nvidia.com |
| 99 | + Signed-off-by: Jakub Kicinski <kuba@kernel.org> |
| 100 | +(cherry picked from commit 3d5918477f94e4c2f064567875c475468e264644) |
| 101 | + Signed-off-by: Jonathan Maple <jmaple@ciq.com> |
| 102 | + |
| 103 | +# Conflicts: |
| 104 | +# drivers/net/ethernet/mellanox/mlx5/core/en_main.c |
| 105 | +diff --cc drivers/net/ethernet/mellanox/mlx5/core/en_main.c |
| 106 | +index 53343da28517,64497b6eebd3..000000000000 |
| 107 | +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c |
| 108 | ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c |
| 109 | +@@@ -6011,10 -6064,13 +6011,10 @@@ static int _mlx5e_suspend(struct auxili |
| 110 | + struct mlx5e_priv *priv = mlx5e_dev->priv; |
| 111 | + struct net_device *netdev = priv->netdev; |
| 112 | + struct mlx5_core_dev *mdev = priv->mdev; |
| 113 | + - struct mlx5_core_dev *pos; |
| 114 | + - int i; |
| 115 | + |
| 116 | +- if (!netif_device_present(netdev)) { |
| 117 | ++ if (!pre_netdev_reg && !netif_device_present(netdev)) { |
| 118 | + if (test_bit(MLX5E_STATE_DESTROYING, &priv->state)) |
| 119 | + - mlx5_sd_for_each_dev(i, mdev, pos) |
| 120 | + - mlx5e_destroy_mdev_resources(pos); |
| 121 | + + mlx5e_destroy_mdev_resources(mdev); |
| 122 | + return -ENODEV; |
| 123 | + } |
| 124 | + |
| 125 | +@@@ -6025,7 -6083,17 +6025,21 @@@ |
| 126 | + |
| 127 | + static int mlx5e_suspend(struct auxiliary_device *adev, pm_message_t state) |
| 128 | + { |
| 129 | +++<<<<<<< HEAD |
| 130 | + + return _mlx5e_suspend(adev); |
| 131 | +++======= |
| 132 | ++ struct mlx5_adev *edev = container_of(adev, struct mlx5_adev, adev); |
| 133 | ++ struct mlx5_core_dev *mdev = edev->mdev; |
| 134 | ++ struct auxiliary_device *actual_adev; |
| 135 | ++ int err = 0; |
| 136 | ++ |
| 137 | ++ actual_adev = mlx5_sd_get_adev(mdev, adev, edev->idx); |
| 138 | ++ if (actual_adev) |
| 139 | ++ err = _mlx5e_suspend(actual_adev, false); |
| 140 | ++ |
| 141 | ++ mlx5_sd_cleanup(mdev); |
| 142 | ++ return err; |
| 143 | +++>>>>>>> 3d5918477f94 (net/mlx5e: Fix netif state handling) |
| 144 | + } |
| 145 | + |
| 146 | + static int _mlx5e_probe(struct auxiliary_device *adev) |
| 147 | +@@@ -6104,18 -6172,32 +6118,18 @@@ err_devlink_unregister |
| 148 | + static int mlx5e_probe(struct auxiliary_device *adev, |
| 149 | + const struct auxiliary_device_id *id) |
| 150 | + { |
| 151 | + - struct mlx5_adev *edev = container_of(adev, struct mlx5_adev, adev); |
| 152 | + - struct mlx5_core_dev *mdev = edev->mdev; |
| 153 | + - struct auxiliary_device *actual_adev; |
| 154 | + - int err; |
| 155 | + - |
| 156 | + - err = mlx5_sd_init(mdev); |
| 157 | + - if (err) |
| 158 | + - return err; |
| 159 | + - |
| 160 | + - actual_adev = mlx5_sd_get_adev(mdev, adev, edev->idx); |
| 161 | + - if (actual_adev) |
| 162 | + - return _mlx5e_probe(actual_adev); |
| 163 | + - return 0; |
| 164 | + + return _mlx5e_probe(adev); |
| 165 | + } |
| 166 | + |
| 167 | + -static void _mlx5e_remove(struct auxiliary_device *adev) |
| 168 | + +static void mlx5e_remove(struct auxiliary_device *adev) |
| 169 | + { |
| 170 | + - struct mlx5_adev *edev = container_of(adev, struct mlx5_adev, adev); |
| 171 | + struct mlx5e_dev *mlx5e_dev = auxiliary_get_drvdata(adev); |
| 172 | + struct mlx5e_priv *priv = mlx5e_dev->priv; |
| 173 | + - struct mlx5_core_dev *mdev = edev->mdev; |
| 174 | + |
| 175 | + - mlx5_core_uplink_netdev_set(mdev, NULL); |
| 176 | + + mlx5_core_uplink_netdev_set(priv->mdev, NULL); |
| 177 | + mlx5e_dcbnl_delete_app(priv); |
| 178 | + unregister_netdev(priv->netdev); |
| 179 | +- _mlx5e_suspend(adev); |
| 180 | ++ _mlx5e_suspend(adev, false); |
| 181 | + priv->profile->cleanup(priv); |
| 182 | + mlx5e_destroy_netdev(priv); |
| 183 | + mlx5e_devlink_port_unregister(mlx5e_dev); |
| 184 | +* Unmerged path drivers/net/ethernet/mellanox/mlx5/core/en_main.c |
0 commit comments