Skip to content

Commit 4237a9b

Browse files
committed
Merge: RAS: introduce MI300 persistence of poison data
MR: https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-9/-/merge_requests/4164 JIRA: https://issues.redhat.com/browse/RHEL-17008 Depends: !3899 Signed-off-by: Aristeu Rozanski <arozansk@redhat.com> Approved-by: David Arcari <darcari@redhat.com> Approved-by: Lenny Szubowicz <lszubowi@redhat.com> Approved-by: CKI KWF Bot <cki-ci-bot+kwf-gitlab-com@redhat.com> Merged-by: Lucas Zampieri <lzampier@redhat.com>
2 parents 7928a42 + 5d0a9bc commit 4237a9b

File tree

34 files changed

+1725
-48
lines changed

34 files changed

+1725
-48
lines changed
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
.. SPDX-License-Identifier: GPL-2.0
2+
3+
Address translation
4+
===================
5+
6+
x86 AMD
7+
-------
8+
9+
Zen-based AMD systems include a Data Fabric that manages the layout of
10+
physical memory. Devices attached to the Fabric, like memory controllers,
11+
I/O, etc., may not have a complete view of the system physical memory map.
12+
These devices may provide a "normalized", i.e. device physical, address
13+
when reporting memory errors. Normalized addresses must be translated to
14+
a system physical address for the kernel to action on the memory.
15+
16+
AMD Address Translation Library (CONFIG_AMD_ATL) provides translation for
17+
this case.
18+
19+
Glossary of acronyms used in address translation for Zen-based systems
20+
21+
* CCM = Cache Coherent Moderator
22+
* COD = Cluster-on-Die
23+
* COH_ST = Coherent Station
24+
* DF = Data Fabric
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
.. SPDX-License-Identifier: GPL-2.0
2+
.. toctree::
3+
:maxdepth: 2
4+
5+
main
6+
error-decoding
7+
address-translation

Documentation/admin-guide/ras.rst renamed to Documentation/admin-guide/RAS/main.rst

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
1+
.. SPDX-License-Identifier: GPL-2.0
12
.. include:: <isonum.txt>
23

3-
============================================
4-
Reliability, Availability and Serviceability
5-
============================================
4+
==================================================
5+
Reliability, Availability and Serviceability (RAS)
6+
==================================================
7+
8+
This documents different aspects of the RAS functionality present in the
9+
kernel.
610

711
RAS concepts
812
************

Documentation/admin-guide/index.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ configure specific aspects of kernel behavior to your liking.
108108
pm/index
109109
pnp
110110
rapidio
111-
ras
111+
RAS/index
112112
rtc
113113
serial-console
114114
svga

MAINTAINERS

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6792,7 +6792,6 @@ R: Robert Richter <rric@kernel.org>
67926792
L: linux-edac@vger.kernel.org
67936793
S: Supported
67946794
T: git git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras.git edac-for-next
6795-
F: Documentation/admin-guide/ras.rst
67966795
F: Documentation/driver-api/edac.rst
67976796
F: drivers/edac/
67986797
F: include/linux/edac.h
@@ -16248,11 +16247,18 @@ M: Tony Luck <tony.luck@intel.com>
1624816247
M: Borislav Petkov <bp@alien8.de>
1624916248
L: linux-edac@vger.kernel.org
1625016249
S: Maintained
16251-
F: Documentation/admin-guide/ras.rst
16250+
F: Documentation/RAS/
16251+
F: Documentation/admin-guide/RAS
1625216252
F: drivers/ras/
1625316253
F: include/linux/ras.h
1625416254
F: include/ras/ras_event.h
1625516255

16256+
RAS FRU MEMORY POISON MANAGER (FMPM)
16257+
M: Yazen Ghannam <Yazen.Ghannam@amd.com>
16258+
L: linux-edac@vger.kernel.org
16259+
S: Maintained
16260+
F: drivers/ras/amd/fmpm.c
16261+
1625616262
RC-CORE / LIRC FRAMEWORK
1625716263
M: Sean Young <sean@mess.org>
1625816264
L: linux-media@vger.kernel.org

arch/x86/events/amd/core.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -579,7 +579,7 @@ static void amd_pmu_cpu_starting(int cpu)
579579
if (!x86_pmu.amd_nb_constraints)
580580
return;
581581

582-
nb_id = topology_die_id(cpu);
582+
nb_id = topology_amd_node_id(cpu);
583583
WARN_ON_ONCE(nb_id == BAD_APICID);
584584

585585
for_each_online_cpu(i) {

arch/x86/include/asm/processor.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,9 @@ struct cpuinfo_topology {
104104
u32 logical_pkg_id;
105105
u32 logical_die_id;
106106

107+
// AMD Node ID and Nodes per Package info
108+
u32 amd_node_id;
109+
107110
// Cache level topology IDs
108111
u32 llc_id;
109112
u32 l2c_id;

arch/x86/include/asm/topology.h

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,8 @@ extern const struct cpumask *cpu_clustergroup_mask(int cpu);
112112
#define topology_core_id(cpu) (cpu_data(cpu).topo.core_id)
113113
#define topology_ppin(cpu) (cpu_data(cpu).ppin)
114114

115+
#define topology_amd_node_id(cpu) (cpu_data(cpu).topo.die_id)
116+
115117
extern unsigned int __max_die_per_package;
116118

117119
#ifdef CONFIG_SMP
@@ -147,6 +149,11 @@ int topology_update_package_map(unsigned int apicid, unsigned int cpu);
147149
int topology_update_die_map(unsigned int dieid, unsigned int cpu);
148150
int topology_phys_to_logical_pkg(unsigned int pkg);
149151

152+
static inline unsigned int topology_amd_nodes_per_pkg(void)
153+
{
154+
return __max_die_per_package;
155+
}
156+
150157
extern struct cpumask __cpu_primary_thread_mask;
151158
#define cpu_primary_thread_mask ((const struct cpumask *)&__cpu_primary_thread_mask)
152159

@@ -159,15 +166,10 @@ static inline bool topology_is_primary_thread(unsigned int cpu)
159166
return cpumask_test_cpu(cpu, cpu_primary_thread_mask);
160167
}
161168
#else /* CONFIG_SMP */
162-
#define topology_max_packages() (1)
163-
static inline int
164-
topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; }
165-
static inline int
166-
topology_update_die_map(unsigned int dieid, unsigned int cpu) { return 0; }
167169
static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; }
168-
static inline int topology_max_die_per_package(void) { return 1; }
169170
static inline int topology_max_smt_threads(void) { return 1; }
170171
static inline bool topology_is_primary_thread(unsigned int cpu) { return true; }
172+
static inline unsigned int topology_amd_nodes_per_pkg(void) { return 1; };
171173
#endif /* !CONFIG_SMP */
172174

173175
static inline void arch_fix_phys_package_id(int num, u32 slot)

arch/x86/kernel/amd_nb.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -386,7 +386,7 @@ struct resource *amd_get_mmconfig_range(struct resource *res)
386386

387387
int amd_get_subcaches(int cpu)
388388
{
389-
struct pci_dev *link = node_to_amd_nb(topology_die_id(cpu))->link;
389+
struct pci_dev *link = node_to_amd_nb(topology_amd_node_id(cpu))->link;
390390
unsigned int mask;
391391

392392
if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
@@ -400,7 +400,7 @@ int amd_get_subcaches(int cpu)
400400
int amd_set_subcaches(int cpu, unsigned long mask)
401401
{
402402
static unsigned int reset, ban;
403-
struct amd_northbridge *nb = node_to_amd_nb(topology_die_id(cpu));
403+
struct amd_northbridge *nb = node_to_amd_nb(topology_amd_node_id(cpu));
404404
unsigned int reg;
405405
int cuid;
406406

arch/x86/kernel/cpu/cacheinfo.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -595,7 +595,7 @@ static void amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index)
595595
if (index < 3)
596596
return;
597597

598-
node = topology_die_id(smp_processor_id());
598+
node = topology_amd_node_id(smp_processor_id());
599599
this_leaf->nb = node_to_amd_nb(node);
600600
if (this_leaf->nb && !this_leaf->nb->l3_cache.indices)
601601
amd_calc_l3_indices(this_leaf->nb);

0 commit comments

Comments
 (0)