diff options
author | Jason Gunthorpe <jgg@nvidia.com> | 2023-01-30 13:53:04 -0400 |
---|---|---|
committer | Jason Gunthorpe <jgg@nvidia.com> | 2023-01-30 13:54:35 -0400 |
commit | fd9f2a912255106d3b53163d816a4bd99ba29664 (patch) | |
tree | 892f38b617b534c93dc5c0b1f1efc3950812e828 /arch/s390 | |
parent | 84798f2849942bb5e8817417adfdfa6241df2835 (diff) | |
parent | 429f27e36874e34727a1f8495be2ea3a7060732c (diff) | |
download | linux-stable-fd9f2a912255106d3b53163d816a4bd99ba29664.tar.gz linux-stable-fd9f2a912255106d3b53163d816a4bd99ba29664.tar.bz2 linux-stable-fd9f2a912255106d3b53163d816a4bd99ba29664.zip |
Merge branch 'iommu-memory-accounting' of ssh://gitolite.kernel.org/pub/scm/linux/kernel/git/joro/iommu intoiommufd/for-next
Jason Gunthorpe says:
====================
iommufd follows the same design as KVM and uses memory cgroups to limit
the amount of kernel memory a iommufd file descriptor can pin down. The
various internal data structures already use GFP_KERNEL_ACCOUNT to charge
its own memory.
However, one of the biggest consumers of kernel memory is the IOPTEs
stored under the iommu_domain and these allocations are not tracked.
This series is the first step in fixing it.
The iommu driver contract already includes a 'gfp' argument to the
map_pages op, allowing iommufd to specify GFP_KERNEL_ACCOUNT and then
having the driver allocate the IOPTE tables with that flag will capture a
significant amount of the allocations.
Update the iommu_map() API to pass in the GFP argument, and fix all call
sites. Replace iommu_map_atomic().
Audit the "enterprise" iommu drivers to make sure they do the right thing.
Intel and S390 ignore the GFP argument and always use GFP_ATOMIC. This is
problematic for iommufd anyhow, so fix it. AMD and ARM SMMUv2/3 are
already correct.
A follow up series will be needed to capture the allocations made when the
iommu_domain itself is allocated, which will complete the job.
====================
* 'iommu-memory-accounting' of ssh://gitolite.kernel.org/pub/scm/linux/kernel/git/joro/iommu:
iommu/s390: Use GFP_KERNEL in sleepable contexts
iommu/s390: Push the gfp parameter to the kmem_cache_alloc()'s
iommu/intel: Use GFP_KERNEL in sleepable contexts
iommu/intel: Support the gfp argument to the map_pages op
iommu/intel: Add a gfp parameter to alloc_pgtable_page()
iommufd: Use GFP_KERNEL_ACCOUNT for iommu_map()
iommu/dma: Use the gfp parameter in __iommu_dma_alloc_noncontiguous()
iommu: Add a gfp parameter to iommu_map_sg()
iommu: Remove iommu_map_atomic()
iommu: Add a gfp parameter to iommu_map()
Link: https://lore.kernel.org/linux-iommu/0-v3-76b587fe28df+6e3-iommu_map_gfp_jgg@nvidia.com
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Diffstat (limited to 'arch/s390')
-rw-r--r-- | arch/s390/include/asm/pci_dma.h | 5 | ||||
-rw-r--r-- | arch/s390/pci/pci_dma.c | 31 |
2 files changed, 20 insertions, 16 deletions
diff --git a/arch/s390/include/asm/pci_dma.h b/arch/s390/include/asm/pci_dma.h index 91e63426bdc5..7119c04c51c5 100644 --- a/arch/s390/include/asm/pci_dma.h +++ b/arch/s390/include/asm/pci_dma.h @@ -186,9 +186,10 @@ static inline unsigned long *get_st_pto(unsigned long entry) /* Prototypes */ void dma_free_seg_table(unsigned long); -unsigned long *dma_alloc_cpu_table(void); +unsigned long *dma_alloc_cpu_table(gfp_t gfp); void dma_cleanup_tables(unsigned long *); -unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr); +unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr, + gfp_t gfp); void dma_update_cpu_trans(unsigned long *entry, phys_addr_t page_addr, int flags); extern const struct dma_map_ops s390_pci_dma_ops; diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index ea478d11fbd1..2d9b01d7ca4c 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -27,11 +27,11 @@ static int zpci_refresh_global(struct zpci_dev *zdev) zdev->iommu_pages * PAGE_SIZE); } -unsigned long *dma_alloc_cpu_table(void) +unsigned long *dma_alloc_cpu_table(gfp_t gfp) { unsigned long *table, *entry; - table = kmem_cache_alloc(dma_region_table_cache, GFP_ATOMIC); + table = kmem_cache_alloc(dma_region_table_cache, gfp); if (!table) return NULL; @@ -45,11 +45,11 @@ static void dma_free_cpu_table(void *table) kmem_cache_free(dma_region_table_cache, table); } -static unsigned long *dma_alloc_page_table(void) +static unsigned long *dma_alloc_page_table(gfp_t gfp) { unsigned long *table, *entry; - table = kmem_cache_alloc(dma_page_table_cache, GFP_ATOMIC); + table = kmem_cache_alloc(dma_page_table_cache, gfp); if (!table) return NULL; @@ -63,7 +63,7 @@ static void dma_free_page_table(void *table) kmem_cache_free(dma_page_table_cache, table); } -static unsigned long *dma_get_seg_table_origin(unsigned long *rtep) +static unsigned long *dma_get_seg_table_origin(unsigned long *rtep, gfp_t gfp) { unsigned long old_rte, rte; unsigned long *sto; @@ -72,7 +72,7 @@ static unsigned long *dma_get_seg_table_origin(unsigned long *rtep) if (reg_entry_isvalid(rte)) { sto = get_rt_sto(rte); } else { - sto = dma_alloc_cpu_table(); + sto = dma_alloc_cpu_table(gfp); if (!sto) return NULL; @@ -90,7 +90,7 @@ static unsigned long *dma_get_seg_table_origin(unsigned long *rtep) return sto; } -static unsigned long *dma_get_page_table_origin(unsigned long *step) +static unsigned long *dma_get_page_table_origin(unsigned long *step, gfp_t gfp) { unsigned long old_ste, ste; unsigned long *pto; @@ -99,7 +99,7 @@ static unsigned long *dma_get_page_table_origin(unsigned long *step) if (reg_entry_isvalid(ste)) { pto = get_st_pto(ste); } else { - pto = dma_alloc_page_table(); + pto = dma_alloc_page_table(gfp); if (!pto) return NULL; set_st_pto(&ste, virt_to_phys(pto)); @@ -116,18 +116,19 @@ static unsigned long *dma_get_page_table_origin(unsigned long *step) return pto; } -unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr) +unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr, + gfp_t gfp) { unsigned long *sto, *pto; unsigned int rtx, sx, px; rtx = calc_rtx(dma_addr); - sto = dma_get_seg_table_origin(&rto[rtx]); + sto = dma_get_seg_table_origin(&rto[rtx], gfp); if (!sto) return NULL; sx = calc_sx(dma_addr); - pto = dma_get_page_table_origin(&sto[sx]); + pto = dma_get_page_table_origin(&sto[sx], gfp); if (!pto) return NULL; @@ -170,7 +171,8 @@ static int __dma_update_trans(struct zpci_dev *zdev, phys_addr_t pa, return -EINVAL; for (i = 0; i < nr_pages; i++) { - entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr); + entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr, + GFP_ATOMIC); if (!entry) { rc = -ENOMEM; goto undo_cpu_trans; @@ -186,7 +188,8 @@ undo_cpu_trans: while (i-- > 0) { page_addr -= PAGE_SIZE; dma_addr -= PAGE_SIZE; - entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr); + entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr, + GFP_ATOMIC); if (!entry) break; dma_update_cpu_trans(entry, page_addr, flags); @@ -576,7 +579,7 @@ int zpci_dma_init_device(struct zpci_dev *zdev) spin_lock_init(&zdev->iommu_bitmap_lock); - zdev->dma_table = dma_alloc_cpu_table(); + zdev->dma_table = dma_alloc_cpu_table(GFP_KERNEL); if (!zdev->dma_table) { rc = -ENOMEM; goto out; |