diff options
author | Gaurav Batra <gbatra@linux.ibm.com> | 2024-05-13 20:46:08 -0500 |
---|---|---|
committer | Michael Ellerman <mpe@ellerman.id.au> | 2024-06-04 17:13:53 +1000 |
commit | ff5163bb7000a0254ffdd7b50cb6df43add94f33 (patch) | |
tree | c6d481ee879112fa5d8b20a238239e78dc78359d /arch/powerpc/platforms | |
parent | 214f33fcf656bf1be3f9f03d58fda067cdf7eecc (diff) | |
download | linux-ff5163bb7000a0254ffdd7b50cb6df43add94f33.tar.gz linux-ff5163bb7000a0254ffdd7b50cb6df43add94f33.tar.bz2 linux-ff5163bb7000a0254ffdd7b50cb6df43add94f33.zip |
powerpc/pseries/iommu: Split Dynamic DMA Window to be used in Hybrid mode
Dynamic DMA Window (DDW) supports TCEs that are backed by 2MB page
size. In most configurations, DDW is big enough to pre-map all of LPAR
memory for IO. Pre-mapping of memory for DMA results in improvements in
IO performance.
Persistent memory, vPMEM, can be assigned to an LPAR as well. vPMEM is
not contiguous with LPAR memory and usually is assigned at high memory
addresses. This makes is not possible to pre-map both vPMEM and LPAR
memory in the same DDW.
For a dedicated adapter this limitation is not an issue. Dedicated
adapters can have both Default DMA window, which is backed by 4K page
size and a DDW backed by 2MB page size TCEs. In this scenario, LPAR
memory is pre-mapped in the DDW. Any DMA going to the vPMEM is routed
via dynamically allocated TCEs in the default window.
The issue arises with SR-IOV adapters. There is only one DMA window -
either Default or DDW. If an LPAR has vPMEM assigned, memory is not
pre-mapped in the DDW since TCEs needs to be allocated for vPMEM as well.
In this case, DDW is created and TCEs are dynamically allocated for both
vPMEM and LPAR memory.
Today, DDW is only used in single mode - direct mapped TCEs or
dynamically mapped TCEs. This enhancement breaks a single DDW in 2
regions -
1. First region to pre-map LPAR memory
2. Second region to dynamically allocate TCEs for IO to vPMEM
The DDW is split only if it is big enough to pre-map complete LPAR
memory and still have some space left to dynamically map vPMEM. Maximum
size possible DDW is created as permitted by the Hypervisor.
Signed-off-by: Gaurav Batra <gbatra@linux.ibm.com>
Reviewed-by: Brian King <brking@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://msgid.link/20240514014608.35537-1-gbatra@linux.ibm.com
Diffstat (limited to 'arch/powerpc/platforms')
-rw-r--r-- | arch/powerpc/platforms/pseries/iommu.c | 69 |
1 files changed, 52 insertions, 17 deletions
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index b1e6d275cda9..bbc87e53db56 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -1304,7 +1304,7 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) struct ddw_query_response query; struct ddw_create_response create; int page_shift; - u64 win_addr; + u64 win_addr, dynamic_offset = 0; const char *win_name; struct device_node *dn; u32 ddw_avail[DDW_APPLICABLE_SIZE]; @@ -1312,6 +1312,7 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) struct property *win64; struct failed_ddw_pdn *fpdn; bool default_win_removed = false, direct_mapping = false; + bool dynamic_mapping = false; bool pmem_present; struct pci_dn *pci = PCI_DN(pdn); struct property *default_win = NULL; @@ -1407,7 +1408,6 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) goto out_failed; } - /* * The "ibm,pmemory" can appear anywhere in the address space. * Assuming it is still backed by page structs, try MAX_PHYSMEM_BITS @@ -1432,14 +1432,42 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) 1ULL << page_shift); len = order_base_2(query.largest_available_block << page_shift); - win_name = DMA64_PROPNAME; + + dynamic_mapping = true; } else { direct_mapping = !default_win_removed || (len == MAX_PHYSMEM_BITS) || (!pmem_present && (len == max_ram_len)); - win_name = direct_mapping ? DIRECT64_PROPNAME : DMA64_PROPNAME; + + /* DDW is big enough to direct map RAM. If there is vPMEM, check + * if enough space is left in DDW where we can dynamically + * allocate TCEs for vPMEM. For now, this Hybrid sharing of DDW + * is only for SR-IOV devices. + */ + if (default_win_removed && pmem_present && !direct_mapping) { + /* DDW is big enough to be split */ + if ((query.largest_available_block << page_shift) >= + MIN_DDW_VPMEM_DMA_WINDOW + (1ULL << max_ram_len)) { + direct_mapping = true; + + /* offset of the Dynamic part of DDW */ + dynamic_offset = 1ULL << max_ram_len; + } + + /* DDW will at least have dynamic allocation */ + dynamic_mapping = true; + + /* create max size DDW possible */ + len = order_base_2(query.largest_available_block + << page_shift); + } } + /* Even if the DDW is split into both direct mapped RAM and dynamically + * mapped vPMEM, the DDW property in OF will be marked as Direct. + */ + win_name = direct_mapping ? DIRECT64_PROPNAME : DMA64_PROPNAME; + ret = create_ddw(dev, ddw_avail, &create, page_shift, len); if (ret != 0) goto out_failed; @@ -1467,9 +1495,9 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) if (!window) goto out_del_prop; - if (direct_mapping) { - window->direct = true; + window->direct = direct_mapping; + if (direct_mapping) { /* DDW maps the whole partition, so enable direct DMA mapping */ ret = walk_system_ram_range(0, memblock_end_of_DRAM() >> PAGE_SHIFT, win64->value, tce_setrange_multi_pSeriesLP_walk); @@ -1481,12 +1509,13 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) clean_dma_window(pdn, win64->value); goto out_del_list; } - } else { + } + + if (dynamic_mapping) { struct iommu_table *newtbl; int i; unsigned long start = 0, end = 0; - - window->direct = false; + u64 dynamic_addr, dynamic_len; for (i = 0; i < ARRAY_SIZE(pci->phb->mem_resources); i++) { const unsigned long mask = IORESOURCE_MEM_64 | IORESOURCE_MEM; @@ -1506,8 +1535,15 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) goto out_del_list; } - iommu_table_setparms_common(newtbl, pci->phb->bus->number, create.liobn, win_addr, - 1UL << len, page_shift, NULL, &iommu_table_lpar_multi_ops); + /* If the DDW is split between directly mapped RAM and Dynamic + * mapped for TCES, offset into the DDW where the dynamic part + * begins. + */ + dynamic_addr = win_addr + dynamic_offset; + dynamic_len = (1UL << len) - dynamic_offset; + iommu_table_setparms_common(newtbl, pci->phb->bus->number, create.liobn, + dynamic_addr, dynamic_len, page_shift, NULL, + &iommu_table_lpar_multi_ops); iommu_init_table(newtbl, pci->phb->node, start, end); pci->table_group->tables[1] = newtbl; @@ -1559,13 +1595,12 @@ out_failed: out_unlock: mutex_unlock(&dma_win_init_mutex); - /* - * If we have persistent memory and the window size is only as big - * as RAM, then we failed to create a window to cover persistent - * memory and need to set the DMA limit. + /* If we have persistent memory and the window size is not big enough + * to directly map both RAM and vPMEM, then we need to set DMA limit. */ - if (pmem_present && direct_mapping && len == max_ram_len) - dev->dev.bus_dma_limit = dev->dev.archdata.dma_offset + (1ULL << len); + if (pmem_present && direct_mapping && len != MAX_PHYSMEM_BITS) + dev->dev.bus_dma_limit = dev->dev.archdata.dma_offset + + (1ULL << max_ram_len); return direct_mapping; } |