diff options
author | Robin Murphy <robin.murphy@arm.com> | 2017-03-31 15:46:07 +0100 |
---|---|---|
committer | Joerg Roedel <jroedel@suse.de> | 2017-04-03 12:45:03 +0200 |
commit | bb65a64c7285e7105c1a6c8a33b37770343a4e96 (patch) | |
tree | 2efc271de7785fa95fa91c6385c6fa56a76ce7be /drivers/iommu/dma-iommu.c | |
parent | a44e6657585b15eeebf5681bfcc7ce0b002429c2 (diff) | |
download | linux-stable-bb65a64c7285e7105c1a6c8a33b37770343a4e96.tar.gz linux-stable-bb65a64c7285e7105c1a6c8a33b37770343a4e96.tar.bz2 linux-stable-bb65a64c7285e7105c1a6c8a33b37770343a4e96.zip |
iommu/dma: Plumb in the per-CPU IOVA caches
With IOVA allocation suitably tidied up, we are finally free to opt in
to the per-CPU caching mechanism. The caching alone can provide a modest
improvement over walking the rbtree for weedier systems (iperf3 shows
~10% more ethernet throughput on an ARM Juno r1 constrained to a single
650MHz Cortex-A53), but the real gain will be in sidestepping the rbtree
lock contention which larger ARM-based systems with lots of parallel I/O
are starting to feel the pain of.
Reviewed-by: Nate Watterson <nwatters@codeaurora.org>
Tested-by: Nate Watterson <nwatters@codeaurora.org>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Diffstat (limited to 'drivers/iommu/dma-iommu.c')
-rw-r--r-- | drivers/iommu/dma-iommu.c | 37 |
1 files changed, 17 insertions, 20 deletions
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 1b94beb43036..8348f366ddd1 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -361,8 +361,7 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain, { struct iommu_dma_cookie *cookie = domain->iova_cookie; struct iova_domain *iovad = &cookie->iovad; - unsigned long shift, iova_len; - struct iova *iova = NULL; + unsigned long shift, iova_len, iova = 0; if (cookie->type == IOMMU_DMA_MSI_COOKIE) { cookie->msi_iova += size; @@ -371,41 +370,39 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain, shift = iova_shift(iovad); iova_len = size >> shift; + /* + * Freeing non-power-of-two-sized allocations back into the IOVA caches + * will come back to bite us badly, so we have to waste a bit of space + * rounding up anything cacheable to make sure that can't happen. The + * order of the unadjusted size will still match upon freeing. + */ + if (iova_len < (1 << (IOVA_RANGE_CACHE_MAX_SIZE - 1))) + iova_len = roundup_pow_of_two(iova_len); if (domain->geometry.force_aperture) dma_limit = min(dma_limit, domain->geometry.aperture_end); /* Try to get PCI devices a SAC address */ if (dma_limit > DMA_BIT_MASK(32) && dev_is_pci(dev)) - iova = alloc_iova(iovad, iova_len, DMA_BIT_MASK(32) >> shift, - true); - /* - * Enforce size-alignment to be safe - there could perhaps be an - * attribute to control this per-device, or at least per-domain... - */ + iova = alloc_iova_fast(iovad, iova_len, DMA_BIT_MASK(32) >> shift); + if (!iova) - iova = alloc_iova(iovad, iova_len, dma_limit >> shift, true); + iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift); - return (dma_addr_t)iova->pfn_lo << shift; + return (dma_addr_t)iova << shift; } static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie, dma_addr_t iova, size_t size) { struct iova_domain *iovad = &cookie->iovad; - struct iova *iova_rbnode; + unsigned long shift = iova_shift(iovad); /* The MSI case is only ever cleaning up its most recent allocation */ - if (cookie->type == IOMMU_DMA_MSI_COOKIE) { + if (cookie->type == IOMMU_DMA_MSI_COOKIE) cookie->msi_iova -= size; - return; - } - - iova_rbnode = find_iova(iovad, iova_pfn(iovad, iova)); - if (WARN_ON(!iova_rbnode)) - return; - - __free_iova(iovad, iova_rbnode); + else + free_iova_fast(iovad, iova >> shift, size >> shift); } static void __iommu_dma_unmap(struct iommu_domain *domain, dma_addr_t dma_addr, |