Skip to content

Commit 6cc04ea

Browse files
author
Benjamin Poirier
committed
RDMA/mlx5: Expose RDMA TRANSPORT flow table types to userspace
JIRA: https://issues.redhat.com/browse/RHEL-72227 JIRA: https://issues.redhat.com/browse/RHEL-73520 Upstream-status: v6.15-rc1 commit 74934dd Author: Patrisious Haddad <phaddad@nvidia.com> Date: Thu Mar 6 13:51:30 2025 +0200 RDMA/mlx5: Expose RDMA TRANSPORT flow table types to userspace This patch adds RDMA_TRANSPORT_RX and RDMA_TRANSPORT_TX as a new flow table type for matcher creation. Signed-off-by: Patrisious Haddad <phaddad@nvidia.com> Reviewed-by: Mark Bloch <mbloch@nvidia.com> Signed-off-by: Leon Romanovsky <leonro@nvidia.com> Link: https://patch.msgid.link/2287d8c50483e880450c7e8e08d9de34cdec1b14.1741261611.git.leon@kernel.org Signed-off-by: Leon Romanovsky <leon@kernel.org> Signed-off-by: Benjamin Poirier <bpoirier@redhat.com>
1 parent 592ebb3 commit 6cc04ea

File tree

5 files changed

+149
-13
lines changed

5 files changed

+149
-13
lines changed

drivers/infiniband/hw/mlx5/fs.c

Lines changed: 141 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include <rdma/mlx5_user_ioctl_verbs.h>
1313
#include <rdma/ib_hdrs.h>
1414
#include <rdma/ib_umem.h>
15+
#include <rdma/ib_ucaps.h>
1516
#include <linux/mlx5/driver.h>
1617
#include <linux/mlx5/fs.h>
1718
#include <linux/mlx5/fs_helpers.h>
@@ -690,14 +691,15 @@ static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_ib_dev *dev,
690691
struct mlx5_ib_flow_prio *prio,
691692
int priority,
692693
int num_entries, int num_groups,
693-
u32 flags)
694+
u32 flags, u16 vport)
694695
{
695696
struct mlx5_flow_table_attr ft_attr = {};
696697
struct mlx5_flow_table *ft;
697698

698699
ft_attr.prio = priority;
699700
ft_attr.max_fte = num_entries;
700701
ft_attr.flags = flags;
702+
ft_attr.vport = vport;
701703
ft_attr.autogroup.max_num_groups = num_groups;
702704
ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
703705
if (IS_ERR(ft))
@@ -792,7 +794,7 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
792794
ft = prio->flow_table;
793795
if (!ft)
794796
return _get_prio(dev, ns, prio, priority, max_table_size,
795-
num_groups, flags);
797+
num_groups, flags, 0);
796798

797799
return prio;
798800
}
@@ -935,7 +937,7 @@ int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num,
935937
prio = &dev->flow_db->opfcs[type];
936938
if (!prio->flow_table) {
937939
prio = _get_prio(dev, ns, prio, priority,
938-
dev->num_ports * MAX_OPFC_RULES, 1, 0);
940+
dev->num_ports * MAX_OPFC_RULES, 1, 0, 0);
939941
if (IS_ERR(prio)) {
940942
err = PTR_ERR(prio);
941943
goto free;
@@ -1413,17 +1415,51 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
14131415
return ERR_PTR(err);
14141416
}
14151417

1418+
static int mlx5_ib_fill_transport_ns_info(struct mlx5_ib_dev *dev,
1419+
enum mlx5_flow_namespace_type type,
1420+
u32 *flags, u16 *vport_idx,
1421+
u16 *vport,
1422+
struct mlx5_core_dev **ft_mdev,
1423+
u32 ib_port)
1424+
{
1425+
struct mlx5_core_dev *esw_mdev;
1426+
1427+
if (!is_mdev_switchdev_mode(dev->mdev))
1428+
return 0;
1429+
1430+
if (!MLX5_CAP_ADV_RDMA(dev->mdev, rdma_transport_manager))
1431+
return -EOPNOTSUPP;
1432+
1433+
if (!dev->port[ib_port - 1].rep)
1434+
return -EINVAL;
1435+
1436+
esw_mdev = mlx5_eswitch_get_core_dev(dev->port[ib_port - 1].rep->esw);
1437+
if (esw_mdev != dev->mdev)
1438+
return -EOPNOTSUPP;
1439+
1440+
*flags |= MLX5_FLOW_TABLE_OTHER_VPORT;
1441+
*ft_mdev = esw_mdev;
1442+
*vport = dev->port[ib_port - 1].rep->vport;
1443+
*vport_idx = dev->port[ib_port - 1].rep->vport_index;
1444+
1445+
return 0;
1446+
}
1447+
14161448
static struct mlx5_ib_flow_prio *
14171449
_get_flow_table(struct mlx5_ib_dev *dev, u16 user_priority,
14181450
enum mlx5_flow_namespace_type ns_type,
1419-
bool mcast)
1451+
bool mcast, u32 ib_port)
14201452
{
1453+
struct mlx5_core_dev *ft_mdev = dev->mdev;
14211454
struct mlx5_flow_namespace *ns = NULL;
14221455
struct mlx5_ib_flow_prio *prio = NULL;
14231456
int max_table_size = 0;
1457+
u16 vport_idx = 0;
14241458
bool esw_encap;
14251459
u32 flags = 0;
1460+
u16 vport = 0;
14261461
int priority;
1462+
int ret;
14271463

14281464
if (mcast)
14291465
priority = MLX5_IB_FLOW_MCAST_PRIO;
@@ -1471,13 +1507,38 @@ _get_flow_table(struct mlx5_ib_dev *dev, u16 user_priority,
14711507
MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev, log_max_ft_size));
14721508
priority = user_priority;
14731509
break;
1510+
case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX:
1511+
case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX:
1512+
if (ib_port == 0 || user_priority > MLX5_RDMA_TRANSPORT_BYPASS_PRIO)
1513+
return ERR_PTR(-EINVAL);
1514+
ret = mlx5_ib_fill_transport_ns_info(dev, ns_type, &flags,
1515+
&vport_idx, &vport,
1516+
&ft_mdev, ib_port);
1517+
if (ret)
1518+
return ERR_PTR(ret);
1519+
1520+
if (ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX)
1521+
max_table_size =
1522+
BIT(MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_RX(
1523+
ft_mdev, log_max_ft_size));
1524+
else
1525+
max_table_size =
1526+
BIT(MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_TX(
1527+
ft_mdev, log_max_ft_size));
1528+
priority = user_priority;
1529+
break;
14741530
default:
14751531
break;
14761532
}
14771533

14781534
max_table_size = min_t(int, max_table_size, MLX5_FS_MAX_ENTRIES);
14791535

1480-
ns = mlx5_get_flow_namespace(dev->mdev, ns_type);
1536+
if (ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX ||
1537+
ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX)
1538+
ns = mlx5_get_flow_vport_namespace(ft_mdev, ns_type, vport_idx);
1539+
else
1540+
ns = mlx5_get_flow_namespace(ft_mdev, ns_type);
1541+
14811542
if (!ns)
14821543
return ERR_PTR(-EOPNOTSUPP);
14831544

@@ -1497,6 +1558,12 @@ _get_flow_table(struct mlx5_ib_dev *dev, u16 user_priority,
14971558
case MLX5_FLOW_NAMESPACE_RDMA_TX:
14981559
prio = &dev->flow_db->rdma_tx[priority];
14991560
break;
1561+
case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX:
1562+
prio = &dev->flow_db->rdma_transport_rx[ib_port - 1];
1563+
break;
1564+
case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX:
1565+
prio = &dev->flow_db->rdma_transport_tx[ib_port - 1];
1566+
break;
15001567
default: return ERR_PTR(-EINVAL);
15011568
}
15021569

@@ -1507,7 +1574,7 @@ _get_flow_table(struct mlx5_ib_dev *dev, u16 user_priority,
15071574
return prio;
15081575

15091576
return _get_prio(dev, ns, prio, priority, max_table_size,
1510-
MLX5_FS_MAX_TYPES, flags);
1577+
MLX5_FS_MAX_TYPES, flags, vport);
15111578
}
15121579

15131580
static struct mlx5_ib_flow_handler *
@@ -1626,7 +1693,8 @@ static struct mlx5_ib_flow_handler *raw_fs_rule_add(
16261693
mutex_lock(&dev->flow_db->lock);
16271694

16281695
ft_prio = _get_flow_table(dev, fs_matcher->priority,
1629-
fs_matcher->ns_type, mcast);
1696+
fs_matcher->ns_type, mcast,
1697+
fs_matcher->ib_port);
16301698
if (IS_ERR(ft_prio)) {
16311699
err = PTR_ERR(ft_prio);
16321700
goto unlock;
@@ -1742,6 +1810,12 @@ mlx5_ib_ft_type_to_namespace(enum mlx5_ib_uapi_flow_table_type table_type,
17421810
case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TX:
17431811
*namespace = MLX5_FLOW_NAMESPACE_RDMA_TX;
17441812
break;
1813+
case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TRANSPORT_RX:
1814+
*namespace = MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX;
1815+
break;
1816+
case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TRANSPORT_TX:
1817+
*namespace = MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX;
1818+
break;
17451819
default:
17461820
return -EINVAL;
17471821
}
@@ -1831,7 +1905,8 @@ static int get_dests(struct uverbs_attr_bundle *attrs,
18311905
return -EINVAL;
18321906

18331907
/* Allow only DEVX object or QP as dest when inserting to RDMA_RX */
1834-
if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) &&
1908+
if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX ||
1909+
fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX) &&
18351910
((!dest_devx && !dest_qp) || (dest_devx && dest_qp)))
18361911
return -EINVAL;
18371912

@@ -1848,7 +1923,8 @@ static int get_dests(struct uverbs_attr_bundle *attrs,
18481923
return -EINVAL;
18491924
/* Allow only flow table as dest when inserting to FDB or RDMA_RX */
18501925
if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB_BYPASS ||
1851-
fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) &&
1926+
fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX ||
1927+
fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX) &&
18521928
*dest_type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE)
18531929
return -EINVAL;
18541930
} else if (dest_qp) {
@@ -1869,14 +1945,16 @@ static int get_dests(struct uverbs_attr_bundle *attrs,
18691945
*dest_id = mqp->raw_packet_qp.rq.tirn;
18701946
*dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR;
18711947
} else if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS ||
1872-
fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) &&
1948+
fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX ||
1949+
fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX) &&
18731950
!(*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)) {
18741951
*dest_type = MLX5_FLOW_DESTINATION_TYPE_PORT;
18751952
}
18761953

18771954
if (*dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
18781955
(fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS ||
1879-
fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX))
1956+
fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX ||
1957+
fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX))
18801958
return -EINVAL;
18811959

18821960
return 0;
@@ -2353,6 +2431,15 @@ static int mlx5_ib_matcher_ns(struct uverbs_attr_bundle *attrs,
23532431
return 0;
23542432
}
23552433

2434+
static bool verify_context_caps(struct mlx5_ib_dev *dev, u64 enabled_caps)
2435+
{
2436+
if (is_mdev_switchdev_mode(dev->mdev))
2437+
return UCAP_ENABLED(enabled_caps,
2438+
RDMA_UCAP_MLX5_CTRL_OTHER_VHCA);
2439+
2440+
return UCAP_ENABLED(enabled_caps, RDMA_UCAP_MLX5_CTRL_LOCAL);
2441+
}
2442+
23562443
static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)(
23572444
struct uverbs_attr_bundle *attrs)
23582445
{
@@ -2401,6 +2488,26 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)(
24012488
goto end;
24022489
}
24032490

2491+
if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_IB_PORT)) {
2492+
err = uverbs_copy_from(&obj->ib_port, attrs,
2493+
MLX5_IB_ATTR_FLOW_MATCHER_IB_PORT);
2494+
if (err)
2495+
goto end;
2496+
if (!rdma_is_port_valid(&dev->ib_dev, obj->ib_port)) {
2497+
err = -EINVAL;
2498+
goto end;
2499+
}
2500+
if (obj->ns_type != MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX &&
2501+
obj->ns_type != MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX) {
2502+
err = -EINVAL;
2503+
goto end;
2504+
}
2505+
if (!verify_context_caps(dev, uobj->context->enabled_caps)) {
2506+
err = -EOPNOTSUPP;
2507+
goto end;
2508+
}
2509+
}
2510+
24042511
uobj->object = obj;
24052512
obj->mdev = dev->mdev;
24062513
atomic_set(&obj->usecnt, 0);
@@ -2448,7 +2555,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_STEERING_ANCHOR_CREATE)(
24482555

24492556
mutex_lock(&dev->flow_db->lock);
24502557

2451-
ft_prio = _get_flow_table(dev, priority, ns_type, 0);
2558+
ft_prio = _get_flow_table(dev, priority, ns_type, 0, 0);
24522559
if (IS_ERR(ft_prio)) {
24532560
err = PTR_ERR(ft_prio);
24542561
goto free_obj;
@@ -2834,7 +2941,10 @@ DECLARE_UVERBS_NAMED_METHOD(
28342941
UA_OPTIONAL),
28352942
UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE,
28362943
enum mlx5_ib_uapi_flow_table_type,
2837-
UA_OPTIONAL));
2944+
UA_OPTIONAL),
2945+
UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_FLOW_MATCHER_IB_PORT,
2946+
UVERBS_ATTR_TYPE(u32),
2947+
UA_OPTIONAL));
28382948

28392949
DECLARE_UVERBS_NAMED_METHOD_DESTROY(
28402950
MLX5_IB_METHOD_FLOW_MATCHER_DESTROY,
@@ -2904,8 +3014,26 @@ int mlx5_ib_fs_init(struct mlx5_ib_dev *dev)
29043014
if (!dev->flow_db)
29053015
return -ENOMEM;
29063016

3017+
dev->flow_db->rdma_transport_rx = kcalloc(dev->num_ports,
3018+
sizeof(struct mlx5_ib_flow_prio),
3019+
GFP_KERNEL);
3020+
if (!dev->flow_db->rdma_transport_rx)
3021+
goto free_flow_db;
3022+
3023+
dev->flow_db->rdma_transport_tx = kcalloc(dev->num_ports,
3024+
sizeof(struct mlx5_ib_flow_prio),
3025+
GFP_KERNEL);
3026+
if (!dev->flow_db->rdma_transport_tx)
3027+
goto free_rdma_transport_rx;
3028+
29073029
mutex_init(&dev->flow_db->lock);
29083030

29093031
ib_set_device_ops(&dev->ib_dev, &flow_ops);
29103032
return 0;
3033+
3034+
free_rdma_transport_rx:
3035+
kfree(dev->flow_db->rdma_transport_rx);
3036+
free_flow_db:
3037+
kfree(dev->flow_db);
3038+
return -ENOMEM;
29113039
}

drivers/infiniband/hw/mlx5/fs.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ static inline void mlx5_ib_fs_cleanup(struct mlx5_ib_dev *dev)
4040
* is a safe assumption that all references are gone.
4141
*/
4242
mlx5_ib_fs_cleanup_anchor(dev);
43+
kfree(dev->flow_db->rdma_transport_tx);
44+
kfree(dev->flow_db->rdma_transport_rx);
4345
kfree(dev->flow_db);
4446
}
4547
#endif /* _MLX5_IB_FS_H */

drivers/infiniband/hw/mlx5/mlx5_ib.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,7 @@ struct mlx5_ib_flow_matcher {
276276
struct mlx5_core_dev *mdev;
277277
atomic_t usecnt;
278278
u8 match_criteria_enable;
279+
u32 ib_port;
279280
};
280281

281282
struct mlx5_ib_steering_anchor {
@@ -307,6 +308,8 @@ struct mlx5_ib_flow_db {
307308
struct mlx5_ib_flow_prio rdma_tx[MLX5_IB_NUM_FLOW_FT];
308309
struct mlx5_ib_flow_prio opfcs[MLX5_IB_OPCOUNTER_MAX];
309310
struct mlx5_flow_table *lag_demux_ft;
311+
struct mlx5_ib_flow_prio *rdma_transport_rx;
312+
struct mlx5_ib_flow_prio *rdma_transport_tx;
310313
/* Protect flow steering bypass flow tables
311314
* when add/del flow rules.
312315
* only single add/removal of flow steering rule could be done

include/uapi/rdma/mlx5_user_ioctl_cmds.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,7 @@ enum mlx5_ib_flow_matcher_create_attrs {
239239
MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA,
240240
MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS,
241241
MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE,
242+
MLX5_IB_ATTR_FLOW_MATCHER_IB_PORT,
242243
};
243244

244245
enum mlx5_ib_flow_matcher_destroy_attrs {

include/uapi/rdma/mlx5_user_ioctl_verbs.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ enum mlx5_ib_uapi_flow_table_type {
4545
MLX5_IB_UAPI_FLOW_TABLE_TYPE_FDB = 0x2,
4646
MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_RX = 0x3,
4747
MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TX = 0x4,
48+
MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TRANSPORT_RX = 0x5,
49+
MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TRANSPORT_TX = 0x6,
4850
};
4951

5052
enum mlx5_ib_uapi_flow_action_packet_reformat_type {

0 commit comments

Comments
 (0)