Skip to content

Commit 45220ca

Browse files
committed
powerpc/pseries/iommu: memory notifier incorrectly adds TCEs for pmemory
jira LE-4694 Rebuild_History Non-Buildable kernel-6.12.0-55.43.1.el10_0 commit-author Gaurav Batra <gbatra@linux.ibm.com> commit 6aa989a iommu_mem_notifier() is invoked when RAM is dynamically added/removed. This notifier call is responsible to add/remove TCEs from the Dynamic DMA Window (DDW) when TCEs are pre-mapped. TCEs are pre-mapped only for RAM and not for persistent memory (pmemory). For DMA buffers in pmemory, TCEs are dynamically mapped when the device driver instructs to do so. The issue is 'daxctl' command is capable of adding pmemory as "System RAM" after LPAR boot. The command to do so is - daxctl reconfigure-device --mode=system-ram dax0.0 --force This will dynamically add pmemory range to LPAR RAM eventually invoking iommu_mem_notifier(). The address range of pmemory is way beyond the Max RAM that the LPAR can have. Which means, this range is beyond the DDW created for the device, at device initialization time. As a result when TCEs are pre-mapped for the pmemory range, by iommu_mem_notifier(), PHYP HCALL returns H_PARAMETER. This failed the command, daxctl, to add pmemory as RAM. The solution is to not pre-map TCEs for pmemory. Signed-off-by: Gaurav Batra <gbatra@linux.ibm.com> Tested-by: Donet Tom <donettom@linux.ibm.com> Reviewed-by: Donet Tom <donettom@linux.ibm.com> Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com> Link: https://patch.msgid.link/20250130183854.92258-1-gbatra@linux.ibm.com (cherry picked from commit 6aa989a) Signed-off-by: Jonathan Maple <jmaple@ciq.com>
1 parent f1ac171 commit 45220ca

File tree

3 files changed

+18
-14
lines changed

3 files changed

+18
-14
lines changed

arch/powerpc/include/asm/mmzone.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ extern cpumask_var_t node_to_cpumask_map[];
2929
#ifdef CONFIG_MEMORY_HOTPLUG
3030
extern unsigned long max_pfn;
3131
u64 memory_hotplug_max(void);
32+
u64 hot_add_drconf_memory_max(void);
3233
#else
3334
#define memory_hotplug_max() memblock_end_of_DRAM()
3435
#endif

arch/powerpc/mm/numa.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1336,7 +1336,7 @@ int hot_add_scn_to_nid(unsigned long scn_addr)
13361336
return nid;
13371337
}
13381338

1339-
static u64 hot_add_drconf_memory_max(void)
1339+
u64 hot_add_drconf_memory_max(void)
13401340
{
13411341
struct device_node *memory = NULL;
13421342
struct device_node *dn = NULL;

arch/powerpc/platforms/pseries/iommu.c

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1293,17 +1293,13 @@ static LIST_HEAD(failed_ddw_pdn_list);
12931293

12941294
static phys_addr_t ddw_memory_hotplug_max(void)
12951295
{
1296-
resource_size_t max_addr = memory_hotplug_max();
1297-
struct device_node *memory;
1296+
resource_size_t max_addr;
12981297

1299-
for_each_node_by_type(memory, "memory") {
1300-
struct resource res;
1301-
1302-
if (of_address_to_resource(memory, 0, &res))
1303-
continue;
1304-
1305-
max_addr = max_t(resource_size_t, max_addr, res.end + 1);
1306-
}
1298+
#if defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG)
1299+
max_addr = hot_add_drconf_memory_max();
1300+
#else
1301+
max_addr = memblock_end_of_DRAM();
1302+
#endif
13071303

13081304
return max_addr;
13091305
}
@@ -1609,7 +1605,7 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
16091605

16101606
if (direct_mapping) {
16111607
/* DDW maps the whole partition, so enable direct DMA mapping */
1612-
ret = walk_system_ram_range(0, memblock_end_of_DRAM() >> PAGE_SHIFT,
1608+
ret = walk_system_ram_range(0, ddw_memory_hotplug_max() >> PAGE_SHIFT,
16131609
win64->value, tce_setrange_multi_pSeriesLP_walk);
16141610
if (ret) {
16151611
dev_info(&dev->dev, "failed to map DMA window for %pOF: %d\n",
@@ -2355,11 +2351,17 @@ static int iommu_mem_notifier(struct notifier_block *nb, unsigned long action,
23552351
struct memory_notify *arg = data;
23562352
int ret = 0;
23572353

2354+
/* This notifier can get called when onlining persistent memory as well.
2355+
* TCEs are not pre-mapped for persistent memory. Persistent memory will
2356+
* always be above ddw_memory_hotplug_max()
2357+
*/
2358+
23582359
switch (action) {
23592360
case MEM_GOING_ONLINE:
23602361
spin_lock(&dma_win_list_lock);
23612362
list_for_each_entry(window, &dma_win_list, list) {
2362-
if (window->direct) {
2363+
if (window->direct && (arg->start_pfn << PAGE_SHIFT) <
2364+
ddw_memory_hotplug_max()) {
23632365
ret |= tce_setrange_multi_pSeriesLP(arg->start_pfn,
23642366
arg->nr_pages, window->prop);
23652367
}
@@ -2371,7 +2373,8 @@ static int iommu_mem_notifier(struct notifier_block *nb, unsigned long action,
23712373
case MEM_OFFLINE:
23722374
spin_lock(&dma_win_list_lock);
23732375
list_for_each_entry(window, &dma_win_list, list) {
2374-
if (window->direct) {
2376+
if (window->direct && (arg->start_pfn << PAGE_SHIFT) <
2377+
ddw_memory_hotplug_max()) {
23752378
ret |= tce_clearrange_multi_pSeriesLP(arg->start_pfn,
23762379
arg->nr_pages, window->prop);
23772380
}

0 commit comments

Comments
 (0)