Skip to content

Commit 330f0f6

Browse files
cjubrankuba-moo
authored andcommitted
net/mlx5: Remove default QoS group and attach vports directly to root TSAR
Currently, the driver creates a default group (`node0`) and attaches all vports to it unless the user explicitly sets a parent group. As a result, when a user configures tx_share on a group and tx_share on a VF, the expectation is for the group and the VF to share bandwidth relatively. However, since the VF is not connected to the same parent (but to the default node), the proportional share logic is not applied correctly. To fix this, remove the default group (`node0`) and instead connect vports directly to the root TSAR when no parent is specified. This ensures that vports and groups share the same root scheduler and their tx_share values are compared directly under the same hierarchy. Fixes: 0fe132e ("net/mlx5: E-switch, Allow to add vports to rate groups") Signed-off-by: Carolina Jubran <cjubran@nvidia.com> Reviewed-by: Cosmin Ratiu <cratiu@nvidia.com> Signed-off-by: Mark Bloch <mbloch@nvidia.com> Link: https://patch.msgid.link/20250820133209.389065-3-mbloch@nvidia.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
1 parent bc17455 commit 330f0f6

File tree

2 files changed

+33
-69
lines changed

2 files changed

+33
-69
lines changed

drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c

Lines changed: 33 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -462,6 +462,7 @@ static int
462462
esw_qos_vport_create_sched_element(struct mlx5_esw_sched_node *vport_node,
463463
struct netlink_ext_ack *extack)
464464
{
465+
struct mlx5_esw_sched_node *parent = vport_node->parent;
465466
u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
466467
struct mlx5_core_dev *dev = vport_node->esw->dev;
467468
void *attr;
@@ -477,7 +478,7 @@ esw_qos_vport_create_sched_element(struct mlx5_esw_sched_node *vport_node,
477478
attr = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes);
478479
MLX5_SET(vport_element, attr, vport_number, vport_node->vport->vport);
479480
MLX5_SET(scheduling_context, sched_ctx, parent_element_id,
480-
vport_node->parent->ix);
481+
parent ? parent->ix : vport_node->esw->qos.root_tsar_ix);
481482
MLX5_SET(scheduling_context, sched_ctx, max_average_bw,
482483
vport_node->max_rate);
483484

@@ -786,48 +787,15 @@ static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *exta
786787
return err;
787788
}
788789

789-
if (MLX5_CAP_QOS(dev, log_esw_max_sched_depth)) {
790-
esw->qos.node0 = __esw_qos_create_vports_sched_node(esw, NULL, extack);
791-
} else {
792-
/* The eswitch doesn't support scheduling nodes.
793-
* Create a software-only node0 using the root TSAR to attach vport QoS to.
794-
*/
795-
if (!__esw_qos_alloc_node(esw,
796-
esw->qos.root_tsar_ix,
797-
SCHED_NODE_TYPE_VPORTS_TSAR,
798-
NULL))
799-
esw->qos.node0 = ERR_PTR(-ENOMEM);
800-
else
801-
list_add_tail(&esw->qos.node0->entry,
802-
&esw->qos.domain->nodes);
803-
}
804-
if (IS_ERR(esw->qos.node0)) {
805-
err = PTR_ERR(esw->qos.node0);
806-
esw_warn(dev, "E-Switch create rate node 0 failed (%d)\n", err);
807-
goto err_node0;
808-
}
809790
refcount_set(&esw->qos.refcnt, 1);
810791

811792
return 0;
812-
813-
err_node0:
814-
if (mlx5_destroy_scheduling_element_cmd(esw->dev, SCHEDULING_HIERARCHY_E_SWITCH,
815-
esw->qos.root_tsar_ix))
816-
esw_warn(esw->dev, "E-Switch destroy root TSAR failed.\n");
817-
818-
return err;
819793
}
820794

821795
static void esw_qos_destroy(struct mlx5_eswitch *esw)
822796
{
823797
int err;
824798

825-
if (esw->qos.node0->ix != esw->qos.root_tsar_ix)
826-
__esw_qos_destroy_node(esw->qos.node0, NULL);
827-
else
828-
__esw_qos_free_node(esw->qos.node0);
829-
esw->qos.node0 = NULL;
830-
831799
err = mlx5_destroy_scheduling_element_cmd(esw->dev,
832800
SCHEDULING_HIERARCHY_E_SWITCH,
833801
esw->qos.root_tsar_ix);
@@ -990,13 +958,16 @@ esw_qos_vport_tc_enable(struct mlx5_vport *vport, enum sched_node_type type,
990958
struct netlink_ext_ack *extack)
991959
{
992960
struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
993-
int err, new_level, max_level;
961+
struct mlx5_esw_sched_node *parent = vport_node->parent;
962+
int err;
994963

995964
if (type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) {
965+
int new_level, max_level;
966+
996967
/* Increase the parent's level by 2 to account for both the
997968
* TC arbiter and the vports TC scheduling element.
998969
*/
999-
new_level = vport_node->parent->level + 2;
970+
new_level = (parent ? parent->level : 2) + 2;
1000971
max_level = 1 << MLX5_CAP_QOS(vport_node->esw->dev,
1001972
log_esw_max_sched_depth);
1002973
if (new_level > max_level) {
@@ -1033,9 +1004,7 @@ esw_qos_vport_tc_enable(struct mlx5_vport *vport, enum sched_node_type type,
10331004
err_sched_nodes:
10341005
if (type == SCHED_NODE_TYPE_RATE_LIMITER) {
10351006
esw_qos_node_destroy_sched_element(vport_node, NULL);
1036-
list_add_tail(&vport_node->entry,
1037-
&vport_node->parent->children);
1038-
vport_node->level = vport_node->parent->level + 1;
1007+
esw_qos_node_attach_to_parent(vport_node);
10391008
} else {
10401009
esw_qos_tc_arbiter_scheduling_teardown(vport_node, NULL);
10411010
}
@@ -1083,7 +1052,6 @@ static int esw_qos_set_vport_tcs_min_rate(struct mlx5_vport *vport,
10831052
static void esw_qos_vport_disable(struct mlx5_vport *vport, struct netlink_ext_ack *extack)
10841053
{
10851054
struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
1086-
struct mlx5_esw_sched_node *parent = vport_node->parent;
10871055
enum sched_node_type curr_type = vport_node->type;
10881056

10891057
if (curr_type == SCHED_NODE_TYPE_VPORT)
@@ -1093,7 +1061,7 @@ static void esw_qos_vport_disable(struct mlx5_vport *vport, struct netlink_ext_a
10931061

10941062
vport_node->bw_share = 0;
10951063
list_del_init(&vport_node->entry);
1096-
esw_qos_normalize_min_rate(parent->esw, parent, extack);
1064+
esw_qos_normalize_min_rate(vport_node->esw, vport_node->parent, extack);
10971065

10981066
trace_mlx5_esw_vport_qos_destroy(vport_node->esw->dev, vport);
10991067
}
@@ -1103,25 +1071,23 @@ static int esw_qos_vport_enable(struct mlx5_vport *vport,
11031071
struct mlx5_esw_sched_node *parent,
11041072
struct netlink_ext_ack *extack)
11051073
{
1074+
struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
11061075
int err;
11071076

11081077
esw_assert_qos_lock_held(vport->dev->priv.eswitch);
11091078

1110-
esw_qos_node_set_parent(vport->qos.sched_node, parent);
1111-
if (type == SCHED_NODE_TYPE_VPORT) {
1112-
err = esw_qos_vport_create_sched_element(vport->qos.sched_node,
1113-
extack);
1114-
} else {
1079+
esw_qos_node_set_parent(vport_node, parent);
1080+
if (type == SCHED_NODE_TYPE_VPORT)
1081+
err = esw_qos_vport_create_sched_element(vport_node, extack);
1082+
else
11151083
err = esw_qos_vport_tc_enable(vport, type, extack);
1116-
}
11171084
if (err)
11181085
return err;
11191086

1120-
vport->qos.sched_node->type = type;
1121-
esw_qos_normalize_min_rate(parent->esw, parent, extack);
1122-
trace_mlx5_esw_vport_qos_create(vport->dev, vport,
1123-
vport->qos.sched_node->max_rate,
1124-
vport->qos.sched_node->bw_share);
1087+
vport_node->type = type;
1088+
esw_qos_normalize_min_rate(vport_node->esw, parent, extack);
1089+
trace_mlx5_esw_vport_qos_create(vport->dev, vport, vport_node->max_rate,
1090+
vport_node->bw_share);
11251091

11261092
return 0;
11271093
}
@@ -1132,17 +1098,20 @@ static int mlx5_esw_qos_vport_enable(struct mlx5_vport *vport, enum sched_node_t
11321098
{
11331099
struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
11341100
struct mlx5_esw_sched_node *sched_node;
1101+
struct mlx5_eswitch *parent_esw;
11351102
int err;
11361103

11371104
esw_assert_qos_lock_held(esw);
11381105
err = esw_qos_get(esw, extack);
11391106
if (err)
11401107
return err;
11411108

1142-
parent = parent ?: esw->qos.node0;
1143-
sched_node = __esw_qos_alloc_node(parent->esw, 0, type, parent);
1109+
parent_esw = parent ? parent->esw : esw;
1110+
sched_node = __esw_qos_alloc_node(parent_esw, 0, type, parent);
11441111
if (!sched_node)
11451112
return -ENOMEM;
1113+
if (!parent)
1114+
list_add_tail(&sched_node->entry, &esw->qos.domain->nodes);
11461115

11471116
sched_node->max_rate = max_rate;
11481117
sched_node->min_rate = min_rate;
@@ -1168,7 +1137,7 @@ void mlx5_esw_qos_vport_disable(struct mlx5_vport *vport)
11681137
goto unlock;
11691138

11701139
parent = vport->qos.sched_node->parent;
1171-
WARN(parent != esw->qos.node0, "Disabling QoS on port before detaching it from node");
1140+
WARN(parent, "Disabling QoS on port before detaching it from node");
11721141

11731142
esw_qos_vport_disable(vport, NULL);
11741143
mlx5_esw_qos_vport_qos_free(vport);
@@ -1268,7 +1237,6 @@ static int esw_qos_vport_update(struct mlx5_vport *vport,
12681237
int err;
12691238

12701239
esw_assert_qos_lock_held(vport->dev->priv.eswitch);
1271-
parent = parent ?: curr_parent;
12721240
if (curr_type == type && curr_parent == parent)
12731241
return 0;
12741242

@@ -1306,16 +1274,16 @@ static int esw_qos_vport_update_parent(struct mlx5_vport *vport, struct mlx5_esw
13061274

13071275
esw_assert_qos_lock_held(esw);
13081276
curr_parent = vport->qos.sched_node->parent;
1309-
parent = parent ?: esw->qos.node0;
13101277
if (curr_parent == parent)
13111278
return 0;
13121279

13131280
/* Set vport QoS type based on parent node type if different from
13141281
* default QoS; otherwise, use the vport's current QoS type.
13151282
*/
1316-
if (parent->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR)
1283+
if (parent && parent->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR)
13171284
type = SCHED_NODE_TYPE_RATE_LIMITER;
1318-
else if (curr_parent->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR)
1285+
else if (curr_parent &&
1286+
curr_parent->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR)
13191287
type = SCHED_NODE_TYPE_VPORT;
13201288
else
13211289
type = vport->qos.sched_node->type;
@@ -1654,9 +1622,10 @@ static bool esw_qos_validate_unsupported_tc_bw(struct mlx5_eswitch *esw,
16541622
static bool esw_qos_vport_validate_unsupported_tc_bw(struct mlx5_vport *vport,
16551623
u32 *tc_bw)
16561624
{
1657-
struct mlx5_eswitch *esw = vport->qos.sched_node ?
1658-
vport->qos.sched_node->parent->esw :
1659-
vport->dev->priv.eswitch;
1625+
struct mlx5_esw_sched_node *node = vport->qos.sched_node;
1626+
struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
1627+
1628+
esw = (node && node->parent) ? node->parent->esw : esw;
16601629

16611630
return esw_qos_validate_unsupported_tc_bw(esw, tc_bw);
16621631
}
@@ -1763,7 +1732,7 @@ int mlx5_esw_devlink_rate_leaf_tc_bw_set(struct devlink_rate *rate_leaf,
17631732
if (disable) {
17641733
if (vport_node->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR)
17651734
err = esw_qos_vport_update(vport, SCHED_NODE_TYPE_VPORT,
1766-
NULL, extack);
1735+
vport_node->parent, extack);
17671736
goto unlock;
17681737
}
17691738

@@ -1775,7 +1744,7 @@ int mlx5_esw_devlink_rate_leaf_tc_bw_set(struct devlink_rate *rate_leaf,
17751744
} else {
17761745
err = esw_qos_vport_update(vport,
17771746
SCHED_NODE_TYPE_TC_ARBITER_TSAR,
1778-
NULL, extack);
1747+
vport_node->parent, extack);
17791748
}
17801749
if (!err)
17811750
esw_qos_set_tc_arbiter_bw_shares(vport_node, tc_bw, extack);

drivers/net/ethernet/mellanox/mlx5/core/eswitch.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -373,11 +373,6 @@ struct mlx5_eswitch {
373373
refcount_t refcnt;
374374
u32 root_tsar_ix;
375375
struct mlx5_qos_domain *domain;
376-
/* Contains all vports with QoS enabled but no explicit node.
377-
* Cannot be NULL if QoS is enabled, but may be a fake node
378-
* referencing the root TSAR if the esw doesn't support nodes.
379-
*/
380-
struct mlx5_esw_sched_node *node0;
381376
} qos;
382377

383378
struct mlx5_esw_bridge_offloads *br_offloads;

0 commit comments

Comments
 (0)