Skip to content

Commit acd0554

Browse files
author
Benjamin Poirier
committed
RDMA/mlx5: Add optional counters for RDMA_TX/RX_packets/bytes
JIRA: https://issues.redhat.com/browse/RHEL-72227 JIRA: https://issues.redhat.com/browse/RHEL-73520 Upstream-status: v6.15-rc1 commit d375db4 Author: Patrisious Haddad <phaddad@nvidia.com> Date: Thu Mar 13 16:18:41 2025 +0200 RDMA/mlx5: Add optional counters for RDMA_TX/RX_packets/bytes Add the following optional counters: rdma_tx_packets,rdma_rx_bytes,rdma_rx_packets,rdma_tx_bytes. Which counts all RDMA packets/bytes sent and received per link. Note that since each direction packet and byte counter are shared, the counter is only reset when both counters of that direction are removed. But from user-perspective each can be enabled/disabled separately. The counters can be enabled using: sudo rdma stat set link rocep8s0f0/1 optional-counters rdma_tx_packets And can be seen using: rdma stat -j show link rocep8s0f0/1 Signed-off-by: Patrisious Haddad <phaddad@nvidia.com> Reviewed-by: Mark Bloch <mbloch@nvidia.com> Link: https://patch.msgid.link/9f2753ad636f21704416df64b47395c8991d1123.1741875070.git.leon@kernel.org Signed-off-by: Leon Romanovsky <leon@kernel.org> Signed-off-by: Benjamin Poirier <bpoirier@redhat.com>
1 parent 6cc04ea commit acd0554

File tree

4 files changed

+133
-7
lines changed

4 files changed

+133
-7
lines changed

drivers/infiniband/hw/mlx5/counters.c

Lines changed: 83 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,13 @@ static const struct mlx5_ib_counter rdmatx_cnp_op_cnts[] = {
140140
INIT_OP_COUNTER(cc_tx_cnp_pkts, CC_TX_CNP_PKTS),
141141
};
142142

143+
static const struct mlx5_ib_counter packets_op_cnts[] = {
144+
INIT_OP_COUNTER(rdma_tx_packets, RDMA_TX_PACKETS),
145+
INIT_OP_COUNTER(rdma_tx_bytes, RDMA_TX_BYTES),
146+
INIT_OP_COUNTER(rdma_rx_packets, RDMA_RX_PACKETS),
147+
INIT_OP_COUNTER(rdma_rx_bytes, RDMA_RX_BYTES),
148+
};
149+
143150
static int mlx5_ib_read_counters(struct ib_counters *counters,
144151
struct ib_counters_read_attr *read_attr,
145152
struct uverbs_attr_bundle *attrs)
@@ -427,14 +434,23 @@ static int do_get_hw_stats(struct ib_device *ibdev,
427434
return num_counters;
428435
}
429436

437+
static bool is_rdma_bytes_counter(u32 type)
438+
{
439+
if (type == MLX5_IB_OPCOUNTER_RDMA_TX_BYTES ||
440+
type == MLX5_IB_OPCOUNTER_RDMA_RX_BYTES)
441+
return true;
442+
443+
return false;
444+
}
445+
430446
static int do_get_op_stat(struct ib_device *ibdev,
431447
struct rdma_hw_stats *stats,
432448
u32 port_num, int index)
433449
{
434450
struct mlx5_ib_dev *dev = to_mdev(ibdev);
435451
const struct mlx5_ib_counters *cnts;
436452
const struct mlx5_ib_op_fc *opfcs;
437-
u64 packets = 0, bytes;
453+
u64 packets, bytes;
438454
u32 type;
439455
int ret;
440456

@@ -453,8 +469,11 @@ static int do_get_op_stat(struct ib_device *ibdev,
453469
if (ret)
454470
return ret;
455471

472+
if (is_rdma_bytes_counter(type))
473+
stats->value[index] = bytes;
474+
else
475+
stats->value[index] = packets;
456476
out:
457-
stats->value[index] = packets;
458477
return index;
459478
}
460479

@@ -681,6 +700,12 @@ static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
681700
descs[j].priv = &rdmatx_cnp_op_cnts[i].type;
682701
}
683702
}
703+
704+
for (i = 0; i < ARRAY_SIZE(packets_op_cnts); i++, j++) {
705+
descs[j].name = packets_op_cnts[i].name;
706+
descs[j].flags |= IB_STAT_FLAG_OPTIONAL;
707+
descs[j].priv = &packets_op_cnts[i].type;
708+
}
684709
}
685710

686711

@@ -731,6 +756,8 @@ static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
731756

732757
num_op_counters = ARRAY_SIZE(basic_op_cnts);
733758

759+
num_op_counters += ARRAY_SIZE(packets_op_cnts);
760+
734761
if (MLX5_CAP_FLOWTABLE(dev->mdev,
735762
ft_field_support_2_nic_receive_rdma.bth_opcode))
736763
num_op_counters += ARRAY_SIZE(rdmarx_cnp_op_cnts);
@@ -760,10 +787,47 @@ static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
760787
return -ENOMEM;
761788
}
762789

790+
/*
791+
* Checks if the given flow counter type should be sharing the same flow counter
792+
* with another type and if it should, checks if that other type flow counter
793+
* was already created, if both conditions are met return true and the counter
794+
* else return false.
795+
*/
796+
static bool mlx5r_is_opfc_shared_and_in_use(struct mlx5_ib_op_fc *opfcs,
797+
u32 type,
798+
struct mlx5_ib_op_fc **opfc)
799+
{
800+
u32 shared_fc_type;
801+
802+
switch (type) {
803+
case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS:
804+
shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_TX_BYTES;
805+
break;
806+
case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES:
807+
shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS;
808+
break;
809+
case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS:
810+
shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_RX_BYTES;
811+
break;
812+
case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES:
813+
shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS;
814+
break;
815+
default:
816+
return false;
817+
}
818+
819+
*opfc = &opfcs[shared_fc_type];
820+
if (!(*opfc)->fc)
821+
return false;
822+
823+
return true;
824+
}
825+
763826
static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
764827
{
765828
u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
766829
int num_cnt_ports = dev->num_ports;
830+
struct mlx5_ib_op_fc *in_use_opfc;
767831
int i, j;
768832

769833
if (is_mdev_switchdev_mode(dev->mdev))
@@ -785,11 +849,16 @@ static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
785849
if (!dev->port[i].cnts.opfcs[j].fc)
786850
continue;
787851

852+
if (mlx5r_is_opfc_shared_and_in_use(
853+
dev->port[i].cnts.opfcs, j, &in_use_opfc))
854+
goto skip;
855+
788856
if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS))
789857
mlx5_ib_fs_remove_op_fc(dev,
790858
&dev->port[i].cnts.opfcs[j], j);
791859
mlx5_fc_destroy(dev->mdev,
792860
dev->port[i].cnts.opfcs[j].fc);
861+
skip:
793862
dev->port[i].cnts.opfcs[j].fc = NULL;
794863
}
795864
}
@@ -983,8 +1052,8 @@ static int mlx5_ib_modify_stat(struct ib_device *device, u32 port,
9831052
unsigned int index, bool enable)
9841053
{
9851054
struct mlx5_ib_dev *dev = to_mdev(device);
1055+
struct mlx5_ib_op_fc *opfc, *in_use_opfc;
9861056
struct mlx5_ib_counters *cnts;
987-
struct mlx5_ib_op_fc *opfc;
9881057
u32 num_hw_counters, type;
9891058
int ret;
9901059

@@ -1008,6 +1077,13 @@ static int mlx5_ib_modify_stat(struct ib_device *device, u32 port,
10081077
if (opfc->fc)
10091078
return -EEXIST;
10101079

1080+
if (mlx5r_is_opfc_shared_and_in_use(cnts->opfcs, type,
1081+
&in_use_opfc)) {
1082+
opfc->fc = in_use_opfc->fc;
1083+
opfc->rule[0] = in_use_opfc->rule[0];
1084+
return 0;
1085+
}
1086+
10111087
opfc->fc = mlx5_fc_create(dev->mdev, false);
10121088
if (IS_ERR(opfc->fc))
10131089
return PTR_ERR(opfc->fc);
@@ -1023,8 +1099,12 @@ static int mlx5_ib_modify_stat(struct ib_device *device, u32 port,
10231099
if (!opfc->fc)
10241100
return -EINVAL;
10251101

1102+
if (mlx5r_is_opfc_shared_and_in_use(cnts->opfcs, type, &in_use_opfc))
1103+
goto out;
1104+
10261105
mlx5_ib_fs_remove_op_fc(dev, opfc, type);
10271106
mlx5_fc_destroy(dev->mdev, opfc->fc);
1107+
out:
10281108
opfc->fc = NULL;
10291109
return 0;
10301110
}

drivers/infiniband/hw/mlx5/fs.c

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -802,10 +802,12 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
802802
enum {
803803
RDMA_RX_ECN_OPCOUNTER_PRIO,
804804
RDMA_RX_CNP_OPCOUNTER_PRIO,
805+
RDMA_RX_PKTS_BYTES_OPCOUNTER_PRIO,
805806
};
806807

807808
enum {
808809
RDMA_TX_CNP_OPCOUNTER_PRIO,
810+
RDMA_TX_PKTS_BYTES_OPCOUNTER_PRIO,
809811
};
810812

811813
static int set_vhca_port_spec(struct mlx5_ib_dev *dev, u32 port_num,
@@ -869,6 +871,29 @@ static int set_cnp_spec(struct mlx5_ib_dev *dev, u32 port_num,
869871
return 0;
870872
}
871873

874+
/* Returns the prio we should use for the given optional counter type,
875+
* whereas for bytes type we use the packet type, since they share the same
876+
* resources.
877+
*/
878+
static struct mlx5_ib_flow_prio *get_opfc_prio(struct mlx5_ib_dev *dev,
879+
u32 type)
880+
{
881+
u32 prio_type;
882+
883+
switch (type) {
884+
case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES:
885+
prio_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS;
886+
break;
887+
case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES:
888+
prio_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS;
889+
break;
890+
default:
891+
prio_type = type;
892+
}
893+
894+
return &dev->flow_db->opfcs[prio_type];
895+
}
896+
872897
int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num,
873898
struct mlx5_ib_op_fc *opfc,
874899
enum mlx5_ib_optional_counter_type type)
@@ -923,6 +948,20 @@ int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num,
923948
priority = RDMA_TX_CNP_OPCOUNTER_PRIO;
924949
break;
925950

951+
case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS:
952+
case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES:
953+
spec_num = 1;
954+
fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS;
955+
priority = RDMA_TX_PKTS_BYTES_OPCOUNTER_PRIO;
956+
break;
957+
958+
case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS:
959+
case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES:
960+
spec_num = 1;
961+
fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
962+
priority = RDMA_RX_PKTS_BYTES_OPCOUNTER_PRIO;
963+
break;
964+
926965
default:
927966
err = -EOPNOTSUPP;
928967
goto free;
@@ -934,7 +973,7 @@ int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num,
934973
goto free;
935974
}
936975

937-
prio = &dev->flow_db->opfcs[type];
976+
prio = get_opfc_prio(dev, type);
938977
if (!prio->flow_table) {
939978
prio = _get_prio(dev, ns, prio, priority,
940979
dev->num_ports * MAX_OPFC_RULES, 1, 0, 0);
@@ -976,11 +1015,14 @@ void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev,
9761015
struct mlx5_ib_op_fc *opfc,
9771016
enum mlx5_ib_optional_counter_type type)
9781017
{
1018+
struct mlx5_ib_flow_prio *prio;
9791019
int i;
9801020

1021+
prio = get_opfc_prio(dev, type);
1022+
9811023
for (i = 0; i < MAX_OPFC_RULES && opfc->rule[i]; i++) {
9821024
mlx5_del_flow_rules(opfc->rule[i]);
983-
put_flow_table(dev, &dev->flow_db->opfcs[type], true);
1025+
put_flow_table(dev, prio, true);
9841026
}
9851027
}
9861028

drivers/infiniband/hw/mlx5/mlx5_ib.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,10 @@ enum mlx5_ib_optional_counter_type {
294294
MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS,
295295
MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS,
296296
MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS,
297+
MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS,
298+
MLX5_IB_OPCOUNTER_RDMA_TX_BYTES,
299+
MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS,
300+
MLX5_IB_OPCOUNTER_RDMA_RX_BYTES,
297301

298302
MLX5_IB_OPCOUNTER_MAX,
299303
};

include/linux/mlx5/device.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1533,8 +1533,8 @@ static inline u16 mlx5_to_sw_pkey_sz(int pkey_sz)
15331533
return MLX5_MIN_PKEY_TABLE_SIZE << pkey_sz;
15341534
}
15351535

1536-
#define MLX5_RDMA_RX_NUM_COUNTERS_PRIOS 2
1537-
#define MLX5_RDMA_TX_NUM_COUNTERS_PRIOS 1
1536+
#define MLX5_RDMA_RX_NUM_COUNTERS_PRIOS 3
1537+
#define MLX5_RDMA_TX_NUM_COUNTERS_PRIOS 2
15381538
#define MLX5_BY_PASS_NUM_REGULAR_PRIOS 16
15391539
#define MLX5_BY_PASS_NUM_DONT_TRAP_PRIOS 16
15401540
#define MLX5_BY_PASS_NUM_MULTICAST_PRIOS 1

0 commit comments

Comments
 (0)