Skip to content

Commit b71e05a

Browse files
committed
Merge: Updates for iommu TCEs for pmemory
MR: https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-9/-/merge_requests/6830 Description: Updates for iommu TCEs for pmemory JIRA: https://issues.redhat.com/browse/RHEL-88421 Build Info: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=67511443 Tested: Verified Brew build test kernel RPMs Signed-off-by: Mamatha Inamdar <minamdar@redhat.com> commit 6aa989a Author: Gaurav Batra <gbatra@linux.ibm.com> Date: Thu Jan 30 12:38:54 2025 -0600 powerpc/pseries/iommu: memory notifier incorrectly adds TCEs for pmemory iommu_mem_notifier() is invoked when RAM is dynamically added/removed. This notifier call is responsible to add/remove TCEs from the Dynamic DMA Window (DDW) when TCEs are pre-mapped. TCEs are pre-mapped only for RAM and not for persistent memory (pmemory). For DMA buffers in pmemory, TCEs are dynamically mapped when the device driver instructs to do so. The issue is 'daxctl' command is capable of adding pmemory as "System RAM" after LPAR boot. The command to do so is - daxctl reconfigure-device --mode=system-ram dax0.0 --force This will dynamically add pmemory range to LPAR RAM eventually invoking iommu_mem_notifier(). The address range of pmemory is way beyond the Max RAM that the LPAR can have. Which means, this range is beyond the DDW created for the device, at device initialization time. As a result when TCEs are pre-mapped for the pmemory range, by iommu_mem_notifier(), PHYP HCALL returns H_PARAMETER. This failed the command, daxctl, to add pmemory as RAM. The solution is to not pre-map TCEs for pmemory. Signed-off-by: Gaurav Batra <gbatra@linux.ibm.com> Tested-by: Donet Tom <donettom@linux.ibm.com> Reviewed-by: Donet Tom <donettom@linux.ibm.com> Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com> Link: https://patch.msgid.link/20250130183854.92258-1-gbatra@linux.ibm.com Signed-off-by: Mamatha Inamdar <minamdar@redhat.com> Approved-by: Steve Best <sbest@redhat.com> Approved-by: Rafael Aquini <raquini@redhat.com> Approved-by: CKI KWF Bot <cki-ci-bot+kwf-gitlab-com@redhat.com> Merged-by: Augusto Caringi <acaringi@redhat.com>
2 parents 8f24935 + f1506f7 commit b71e05a

File tree

3 files changed

+18
-14
lines changed

3 files changed

+18
-14
lines changed

arch/powerpc/include/asm/mmzone.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ extern cpumask_var_t node_to_cpumask_map[];
3535
#ifdef CONFIG_MEMORY_HOTPLUG
3636
extern unsigned long max_pfn;
3737
u64 memory_hotplug_max(void);
38+
u64 hot_add_drconf_memory_max(void);
3839
#else
3940
#define memory_hotplug_max() memblock_end_of_DRAM()
4041
#endif

arch/powerpc/mm/numa.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1354,7 +1354,7 @@ int hot_add_scn_to_nid(unsigned long scn_addr)
13541354
return nid;
13551355
}
13561356

1357-
static u64 hot_add_drconf_memory_max(void)
1357+
u64 hot_add_drconf_memory_max(void)
13581358
{
13591359
struct device_node *memory = NULL;
13601360
struct device_node *dn = NULL;

arch/powerpc/platforms/pseries/iommu.c

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1287,17 +1287,13 @@ static LIST_HEAD(failed_ddw_pdn_list);
12871287

12881288
static phys_addr_t ddw_memory_hotplug_max(void)
12891289
{
1290-
resource_size_t max_addr = memory_hotplug_max();
1291-
struct device_node *memory;
1290+
resource_size_t max_addr;
12921291

1293-
for_each_node_by_type(memory, "memory") {
1294-
struct resource res;
1295-
1296-
if (of_address_to_resource(memory, 0, &res))
1297-
continue;
1298-
1299-
max_addr = max_t(resource_size_t, max_addr, res.end + 1);
1300-
}
1292+
#if defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG)
1293+
max_addr = hot_add_drconf_memory_max();
1294+
#else
1295+
max_addr = memblock_end_of_DRAM();
1296+
#endif
13011297

13021298
return max_addr;
13031299
}
@@ -1603,7 +1599,7 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
16031599

16041600
if (direct_mapping) {
16051601
/* DDW maps the whole partition, so enable direct DMA mapping */
1606-
ret = walk_system_ram_range(0, memblock_end_of_DRAM() >> PAGE_SHIFT,
1602+
ret = walk_system_ram_range(0, ddw_memory_hotplug_max() >> PAGE_SHIFT,
16071603
win64->value, tce_setrange_multi_pSeriesLP_walk);
16081604
if (ret) {
16091605
dev_info(&dev->dev, "failed to map DMA window for %pOF: %d\n",
@@ -2352,11 +2348,17 @@ static int iommu_mem_notifier(struct notifier_block *nb, unsigned long action,
23522348
struct memory_notify *arg = data;
23532349
int ret = 0;
23542350

2351+
/* This notifier can get called when onlining persistent memory as well.
2352+
* TCEs are not pre-mapped for persistent memory. Persistent memory will
2353+
* always be above ddw_memory_hotplug_max()
2354+
*/
2355+
23552356
switch (action) {
23562357
case MEM_GOING_ONLINE:
23572358
spin_lock(&dma_win_list_lock);
23582359
list_for_each_entry(window, &dma_win_list, list) {
2359-
if (window->direct) {
2360+
if (window->direct && (arg->start_pfn << PAGE_SHIFT) <
2361+
ddw_memory_hotplug_max()) {
23602362
ret |= tce_setrange_multi_pSeriesLP(arg->start_pfn,
23612363
arg->nr_pages, window->prop);
23622364
}
@@ -2368,7 +2370,8 @@ static int iommu_mem_notifier(struct notifier_block *nb, unsigned long action,
23682370
case MEM_OFFLINE:
23692371
spin_lock(&dma_win_list_lock);
23702372
list_for_each_entry(window, &dma_win_list, list) {
2371-
if (window->direct) {
2373+
if (window->direct && (arg->start_pfn << PAGE_SHIFT) <
2374+
ddw_memory_hotplug_max()) {
23722375
ret |= tce_clearrange_multi_pSeriesLP(arg->start_pfn,
23732376
arg->nr_pages, window->prop);
23742377
}

0 commit comments

Comments
 (0)