Skip to content

Commit c4bec8f

Browse files
author
Frantisek Hrbata
committed
Merge: skx_common: use driver decoder when possible
MR: https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-9/-/merge_requests/1466 Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2124913 Tested: by me, sanity This patchset backports support for decoding errors in the driver when possible instead of using BIOS, which improves the performance. As production systems might see storms of corrected errors in thousands per second, this can be very useful. Code will fall back to BIOS when driver decoding is not possible. Signed-off-by: Aristeu Rozanski <arozansk@redhat.com> Approved-by: David Arcari <darcari@redhat.com> Approved-by: Prarit Bhargava <prarit@redhat.com> Approved-by: Rafael Aquini <aquini@redhat.com> Approved-by: Herton R. Krzesinski <herton@redhat.com> Signed-off-by: Frantisek Hrbata <fhrbata@redhat.com>
2 parents 0fe6a54 + 35202c4 commit c4bec8f

File tree

3 files changed

+19
-11
lines changed

3 files changed

+19
-11
lines changed

drivers/edac/skx_base.c

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -714,8 +714,13 @@ static int __init skx_init(void)
714714

715715
skx_set_decode(skx_decode, skx_show_retry_rd_err_log);
716716

717-
if (nvdimm_count && skx_adxl_get() == -ENODEV)
718-
skx_printk(KERN_NOTICE, "Only decoding DDR4 address!\n");
717+
if (nvdimm_count && skx_adxl_get() != -ENODEV) {
718+
skx_set_decode(NULL, skx_show_retry_rd_err_log);
719+
} else {
720+
if (nvdimm_count)
721+
skx_printk(KERN_NOTICE, "Only decoding DDR4 address!\n");
722+
skx_set_decode(skx_decode, skx_show_retry_rd_err_log);
723+
}
719724

720725
/* Ensure that the OPSTATE is set correctly for POLL or NMI */
721726
opstate_init();

drivers/edac/skx_common.c

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ static char *adxl_msg;
4040
static unsigned long adxl_nm_bitmap;
4141

4242
static char skx_msg[MSG_SIZE];
43-
static skx_decode_f skx_decode;
43+
static skx_decode_f driver_decode;
4444
static skx_show_retry_log_f skx_show_retry_rd_err_log;
4545
static u64 skx_tolm, skx_tohm;
4646
static LIST_HEAD(dev_edac_list);
@@ -173,6 +173,8 @@ static bool skx_adxl_decode(struct decoded_addr *res, bool error_in_1st_level_me
173173
break;
174174
}
175175

176+
res->decoded_by_adxl = true;
177+
176178
return true;
177179
}
178180

@@ -183,7 +185,7 @@ void skx_set_mem_cfg(bool mem_cfg_2lm)
183185

184186
void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log)
185187
{
186-
skx_decode = decode;
188+
driver_decode = decode;
187189
skx_show_retry_rd_err_log = show_retry_log;
188190
}
189191

@@ -588,19 +590,19 @@ static void skx_mce_output_error(struct mem_ctl_info *mci,
588590
break;
589591
}
590592
}
591-
if (adxl_component_count) {
593+
if (res->decoded_by_adxl) {
592594
len = snprintf(skx_msg, MSG_SIZE, "%s%s err_code:0x%04x:0x%04x %s",
593595
overflow ? " OVERFLOW" : "",
594596
(uncorrected_error && recoverable) ? " recoverable" : "",
595597
mscod, errcode, adxl_msg);
596598
} else {
597599
len = snprintf(skx_msg, MSG_SIZE,
598-
"%s%s err_code:0x%04x:0x%04x socket:%d imc:%d rank:%d bg:%d ba:%d row:0x%x col:0x%x",
600+
"%s%s err_code:0x%04x:0x%04x ProcessorSocketId:0x%x MemoryControllerId:0x%x PhysicalRankId:0x%x Row:0x%x Column:0x%x Bank:0x%x BankGroup:0x%x",
599601
overflow ? " OVERFLOW" : "",
600602
(uncorrected_error && recoverable) ? " recoverable" : "",
601603
mscod, errcode,
602604
res->socket, res->imc, res->rank,
603-
res->bank_group, res->bank_address, res->row, res->column);
605+
res->row, res->column, res->bank_address, res->bank_group);
604606
}
605607

606608
if (skx_show_retry_rd_err_log)
@@ -648,11 +650,11 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
648650
memset(&res, 0, sizeof(res));
649651
res.addr = mce->addr;
650652

651-
if (adxl_component_count) {
652-
if (!skx_adxl_decode(&res, skx_error_in_1st_level_mem(mce)))
653+
/* Try driver decoder first */
654+
if (!(driver_decode && driver_decode(&res))) {
655+
/* Then try firmware decoder (ACPI DSM methods) */
656+
if (!(adxl_component_count && skx_adxl_decode(&res, skx_error_in_1st_level_mem(mce))))
653657
return NOTIFY_DONE;
654-
} else if (!skx_decode || !skx_decode(&res)) {
655-
return NOTIFY_DONE;
656658
}
657659

658660
mci = res.dev->imc[res.imc].mci;

drivers/edac/skx_common.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,7 @@ struct decoded_addr {
136136
int column;
137137
int bank_address;
138138
int bank_group;
139+
bool decoded_by_adxl;
139140
};
140141

141142
struct res_config {

0 commit comments

Comments
 (0)