Skip to content

Commit 8fd2866

Browse files
committed
Merge: i40e: add ability to reset VF for Tx and Rx MDD events
MR: https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-10/-/merge_requests/130 JIRA: https://issues.redhat.com/browse/RHEL-73034 Signed-off-by: Michal Schmidt <mschmidt@redhat.com> Approved-by: Kamal Heib <kheib@redhat.com> Approved-by: José Ignacio Tornos Martínez <jtornosm@redhat.com> Approved-by: CKI KWF Bot <cki-ci-bot+kwf-gitlab-com@redhat.com> Merged-by: Jan Stancek <jstancek@redhat.com>
2 parents 35bb9a8 + 0b896f2 commit 8fd2866

File tree

7 files changed

+123
-17
lines changed

7 files changed

+123
-17
lines changed

Documentation/networking/device_drivers/ethernet/intel/i40e.rst

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,18 @@ Use ethtool to view and set link-down-on-close, as follows::
299299
ethtool --show-priv-flags ethX
300300
ethtool --set-priv-flags ethX link-down-on-close [on|off]
301301

302+
Setting the mdd-auto-reset-vf Private Flag
303+
------------------------------------------
304+
305+
When the mdd-auto-reset-vf private flag is set to "on", the problematic VF will
306+
be automatically reset if a malformed descriptor is detected. If the flag is
307+
set to "off", the problematic VF will be disabled.
308+
309+
Use ethtool to view and set mdd-auto-reset-vf, as follows::
310+
311+
ethtool --show-priv-flags ethX
312+
ethtool --set-priv-flags ethX mdd-auto-reset-vf [on|off]
313+
302314
Viewing Link Messages
303315
---------------------
304316
Link messages will not be displayed to the console if the distribution is

drivers/net/ethernet/intel/i40e/i40e.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ enum i40e_state {
8888
__I40E_SERVICE_SCHED,
8989
__I40E_ADMINQ_EVENT_PENDING,
9090
__I40E_MDD_EVENT_PENDING,
91+
__I40E_MDD_VF_PRINT_PENDING,
9192
__I40E_VFLR_EVENT_PENDING,
9293
__I40E_RESET_RECOVERY_PENDING,
9394
__I40E_TIMEOUT_RECOVERY_PENDING,
@@ -191,6 +192,7 @@ enum i40e_pf_flags {
191192
*/
192193
I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENA,
193194
I40E_FLAG_VF_VLAN_PRUNING_ENA,
195+
I40E_FLAG_MDD_AUTO_RESET_VF,
194196
I40E_PF_FLAGS_NBITS, /* must be last */
195197
};
196198

@@ -572,7 +574,7 @@ struct i40e_pf {
572574
int num_alloc_vfs; /* actual number of VFs allocated */
573575
u32 vf_aq_requests;
574576
u32 arq_overflows; /* Not fatal, possibly indicative of problems */
575-
577+
struct ratelimit_state mdd_message_rate_limit;
576578
/* DCBx/DCBNL capability for PF that indicates
577579
* whether DCBx is managed by firmware or host
578580
* based agent (LLDPAD). Also, indicates what

drivers/net/ethernet/intel/i40e/i40e_debugfs.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -722,7 +722,7 @@ static void i40e_dbg_dump_vf(struct i40e_pf *pf, int vf_id)
722722
dev_info(&pf->pdev->dev, "vf %2d: VSI id=%d, seid=%d, qps=%d\n",
723723
vf_id, vf->lan_vsi_id, vsi->seid, vf->num_queue_pairs);
724724
dev_info(&pf->pdev->dev, " num MDD=%lld\n",
725-
vf->num_mdd_events);
725+
vf->mdd_tx_events.count + vf->mdd_rx_events.count);
726726
} else {
727727
dev_info(&pf->pdev->dev, "invalid VF id %d\n", vf_id);
728728
}

drivers/net/ethernet/intel/i40e/i40e_ethtool.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -459,6 +459,8 @@ static const struct i40e_priv_flags i40e_gstrings_priv_flags[] = {
459459
I40E_PRIV_FLAG("base-r-fec", I40E_FLAG_BASE_R_FEC, 0),
460460
I40E_PRIV_FLAG("vf-vlan-pruning",
461461
I40E_FLAG_VF_VLAN_PRUNING_ENA, 0),
462+
I40E_PRIV_FLAG("mdd-auto-reset-vf",
463+
I40E_FLAG_MDD_AUTO_RESET_VF, 0),
462464
};
463465

464466
#define I40E_PRIV_FLAGS_STR_LEN ARRAY_SIZE(i40e_gstrings_priv_flags)

drivers/net/ethernet/intel/i40e/i40e_main.c

Lines changed: 94 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -11216,6 +11216,67 @@ static void i40e_handle_reset_warning(struct i40e_pf *pf, bool lock_acquired)
1121611216
i40e_reset_and_rebuild(pf, false, lock_acquired);
1121711217
}
1121811218

11219+
/**
11220+
* i40e_print_vf_mdd_event - print VF Tx/Rx malicious driver detect event
11221+
* @pf: board private structure
11222+
* @vf: pointer to the VF structure
11223+
* @is_tx: true - for Tx event, false - for Rx
11224+
*/
11225+
static void i40e_print_vf_mdd_event(struct i40e_pf *pf, struct i40e_vf *vf,
11226+
bool is_tx)
11227+
{
11228+
dev_err(&pf->pdev->dev, is_tx ?
11229+
"%lld Tx Malicious Driver Detection events detected on PF %d VF %d MAC %pm. mdd-auto-reset-vfs=%s\n" :
11230+
"%lld Rx Malicious Driver Detection events detected on PF %d VF %d MAC %pm. mdd-auto-reset-vfs=%s\n",
11231+
is_tx ? vf->mdd_tx_events.count : vf->mdd_rx_events.count,
11232+
pf->hw.pf_id,
11233+
vf->vf_id,
11234+
vf->default_lan_addr.addr,
11235+
str_on_off(test_bit(I40E_FLAG_MDD_AUTO_RESET_VF, pf->flags)));
11236+
}
11237+
11238+
/**
11239+
* i40e_print_vfs_mdd_events - print VFs malicious driver detect event
11240+
* @pf: pointer to the PF structure
11241+
*
11242+
* Called from i40e_handle_mdd_event to rate limit and print VFs MDD events.
11243+
*/
11244+
static void i40e_print_vfs_mdd_events(struct i40e_pf *pf)
11245+
{
11246+
unsigned int i;
11247+
11248+
/* check that there are pending MDD events to print */
11249+
if (!test_and_clear_bit(__I40E_MDD_VF_PRINT_PENDING, pf->state))
11250+
return;
11251+
11252+
if (!__ratelimit(&pf->mdd_message_rate_limit))
11253+
return;
11254+
11255+
for (i = 0; i < pf->num_alloc_vfs; i++) {
11256+
struct i40e_vf *vf = &pf->vf[i];
11257+
bool is_printed = false;
11258+
11259+
/* only print Rx MDD event message if there are new events */
11260+
if (vf->mdd_rx_events.count != vf->mdd_rx_events.last_printed) {
11261+
vf->mdd_rx_events.last_printed = vf->mdd_rx_events.count;
11262+
i40e_print_vf_mdd_event(pf, vf, false);
11263+
is_printed = true;
11264+
}
11265+
11266+
/* only print Tx MDD event message if there are new events */
11267+
if (vf->mdd_tx_events.count != vf->mdd_tx_events.last_printed) {
11268+
vf->mdd_tx_events.last_printed = vf->mdd_tx_events.count;
11269+
i40e_print_vf_mdd_event(pf, vf, true);
11270+
is_printed = true;
11271+
}
11272+
11273+
if (is_printed && !test_bit(I40E_FLAG_MDD_AUTO_RESET_VF, pf->flags))
11274+
dev_info(&pf->pdev->dev,
11275+
"Use PF Control I/F to re-enable the VF #%d\n",
11276+
i);
11277+
}
11278+
}
11279+
1121911280
/**
1122011281
* i40e_handle_mdd_event
1122111282
* @pf: pointer to the PF structure
@@ -11230,8 +11291,13 @@ static void i40e_handle_mdd_event(struct i40e_pf *pf)
1123011291
u32 reg;
1123111292
int i;
1123211293

11233-
if (!test_bit(__I40E_MDD_EVENT_PENDING, pf->state))
11294+
if (!test_and_clear_bit(__I40E_MDD_EVENT_PENDING, pf->state)) {
11295+
/* Since the VF MDD event logging is rate limited, check if
11296+
* there are pending MDD events.
11297+
*/
11298+
i40e_print_vfs_mdd_events(pf);
1123411299
return;
11300+
}
1123511301

1123611302
/* find what triggered the MDD event */
1123711303
reg = rd32(hw, I40E_GL_MDET_TX);
@@ -11275,36 +11341,48 @@ static void i40e_handle_mdd_event(struct i40e_pf *pf)
1127511341

1127611342
/* see if one of the VFs needs its hand slapped */
1127711343
for (i = 0; i < pf->num_alloc_vfs && mdd_detected; i++) {
11344+
bool is_mdd_on_tx = false;
11345+
bool is_mdd_on_rx = false;
11346+
1127811347
vf = &(pf->vf[i]);
1127911348
reg = rd32(hw, I40E_VP_MDET_TX(i));
1128011349
if (reg & I40E_VP_MDET_TX_VALID_MASK) {
11350+
set_bit(__I40E_MDD_VF_PRINT_PENDING, pf->state);
1128111351
wr32(hw, I40E_VP_MDET_TX(i), 0xFFFF);
11282-
vf->num_mdd_events++;
11283-
dev_info(&pf->pdev->dev, "TX driver issue detected on VF %d\n",
11284-
i);
11285-
dev_info(&pf->pdev->dev,
11286-
"Use PF Control I/F to re-enable the VF\n");
11352+
vf->mdd_tx_events.count++;
1128711353
set_bit(I40E_VF_STATE_DISABLED, &vf->vf_states);
11354+
is_mdd_on_tx = true;
1128811355
}
1128911356

1129011357
reg = rd32(hw, I40E_VP_MDET_RX(i));
1129111358
if (reg & I40E_VP_MDET_RX_VALID_MASK) {
11359+
set_bit(__I40E_MDD_VF_PRINT_PENDING, pf->state);
1129211360
wr32(hw, I40E_VP_MDET_RX(i), 0xFFFF);
11293-
vf->num_mdd_events++;
11294-
dev_info(&pf->pdev->dev, "RX driver issue detected on VF %d\n",
11295-
i);
11296-
dev_info(&pf->pdev->dev,
11297-
"Use PF Control I/F to re-enable the VF\n");
11361+
vf->mdd_rx_events.count++;
1129811362
set_bit(I40E_VF_STATE_DISABLED, &vf->vf_states);
11363+
is_mdd_on_rx = true;
11364+
}
11365+
11366+
if ((is_mdd_on_tx || is_mdd_on_rx) &&
11367+
test_bit(I40E_FLAG_MDD_AUTO_RESET_VF, pf->flags)) {
11368+
/* VF MDD event counters will be cleared by
11369+
* reset, so print the event prior to reset.
11370+
*/
11371+
if (is_mdd_on_rx)
11372+
i40e_print_vf_mdd_event(pf, vf, false);
11373+
if (is_mdd_on_tx)
11374+
i40e_print_vf_mdd_event(pf, vf, true);
11375+
11376+
i40e_vc_reset_vf(vf, true);
1129911377
}
1130011378
}
1130111379

11302-
/* re-enable mdd interrupt cause */
11303-
clear_bit(__I40E_MDD_EVENT_PENDING, pf->state);
1130411380
reg = rd32(hw, I40E_PFINT_ICR0_ENA);
1130511381
reg |= I40E_PFINT_ICR0_ENA_MAL_DETECT_MASK;
1130611382
wr32(hw, I40E_PFINT_ICR0_ENA, reg);
1130711383
i40e_flush(hw);
11384+
11385+
i40e_print_vfs_mdd_events(pf);
1130811386
}
1130911387

1131011388
/**
@@ -15997,6 +16075,9 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
1599716075
ERR_PTR(err),
1599816076
i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
1599916077

16078+
/* VF MDD event logs are rate limited to one second intervals */
16079+
ratelimit_state_init(&pf->mdd_message_rate_limit, 1 * HZ, 1);
16080+
1600016081
/* Reconfigure hardware for allowing smaller MSS in the case
1600116082
* of TSO, so that we avoid the MDD being fired and causing
1600216083
* a reset in the case of small MSS+TSO.

drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,7 @@ void i40e_vc_notify_vf_reset(struct i40e_vf *vf)
216216
* @notify_vf: notify vf about reset or not
217217
* Reset VF handler.
218218
**/
219-
static void i40e_vc_reset_vf(struct i40e_vf *vf, bool notify_vf)
219+
void i40e_vc_reset_vf(struct i40e_vf *vf, bool notify_vf)
220220
{
221221
struct i40e_pf *pf = vf->pf;
222222
int i;

drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,12 @@ struct i40evf_channel {
6464
u64 max_tx_rate; /* bandwidth rate allocation for VSIs */
6565
};
6666

67+
struct i40e_mdd_vf_events {
68+
u64 count; /* total count of Rx|Tx events */
69+
/* count number of the last printed event */
70+
u64 last_printed;
71+
};
72+
6773
/* VF information structure */
6874
struct i40e_vf {
6975
struct i40e_pf *pf;
@@ -92,7 +98,9 @@ struct i40e_vf {
9298

9399
u8 num_queue_pairs; /* num of qps assigned to VF vsis */
94100
u8 num_req_queues; /* num of requested qps */
95-
u64 num_mdd_events; /* num of mdd events detected */
101+
/* num of mdd tx and rx events detected */
102+
struct i40e_mdd_vf_events mdd_rx_events;
103+
struct i40e_mdd_vf_events mdd_tx_events;
96104

97105
unsigned long vf_caps; /* vf's adv. capabilities */
98106
unsigned long vf_states; /* vf's runtime states */
@@ -120,6 +128,7 @@ int i40e_alloc_vfs(struct i40e_pf *pf, u16 num_alloc_vfs);
120128
int i40e_vc_process_vf_msg(struct i40e_pf *pf, s16 vf_id, u32 v_opcode,
121129
u32 v_retval, u8 *msg, u16 msglen);
122130
int i40e_vc_process_vflr_event(struct i40e_pf *pf);
131+
void i40e_vc_reset_vf(struct i40e_vf *vf, bool notify_vf);
123132
bool i40e_reset_vf(struct i40e_vf *vf, bool flr);
124133
bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr);
125134
void i40e_vc_notify_vf_reset(struct i40e_vf *vf);

0 commit comments

Comments
 (0)