Merge branch 'iommu-memory-accounting' of ssh://gitolite.kernel.org/pub/scm/linux/kernel/git/joro/iommu intoiommufd/for-next

Jason Gunthorpe says: ==================== iommufd follows the same design as KVM and uses memory cgroups to limit the amount of kernel memory a iommufd file descriptor can pin down. The various internal data structures already use GFP_KERNEL_ACCOUNT to charge its own memory. However, one of the biggest consumers of kernel memory is the IOPTEs stored under the iommu_domain and these allocations are not tracked. This series is the first step in fixing it. The iommu driver contract already includes a 'gfp' argument to the map_pages op, allowing iommufd to specify GFP_KERNEL_ACCOUNT and then having the driver allocate the IOPTE tables with that flag will capture a significant amount of the allocations. Update the iommu_map() API to pass in the GFP argument, and fix all call sites. Replace iommu_map_atomic(). Audit the "enterprise" iommu drivers to make sure they do the right thing. Intel and S390 ignore the GFP argument and always use GFP_ATOMIC. This is problematic for iommufd anyhow, so fix it. AMD and ARM SMMUv2/3 are already correct. A follow up series will be needed to capture the allocations made when the iommu_domain itself is allocated, which will complete the job. ==================== * 'iommu-memory-accounting' of ssh://gitolite.kernel.org/pub/scm/linux/kernel/git/joro/iommu: iommu/s390: Use GFP_KERNEL in sleepable contexts iommu/s390: Push the gfp parameter to the kmem_cache_alloc()'s iommu/intel: Use GFP_KERNEL in sleepable contexts iommu/intel: Support the gfp argument to the map_pages op iommu/intel: Add a gfp parameter to alloc_pgtable_page() iommufd: Use GFP_KERNEL_ACCOUNT for iommu_map() iommu/dma: Use the gfp parameter in __iommu_dma_alloc_noncontiguous() iommu: Add a gfp parameter to iommu_map_sg() iommu: Remove iommu_map_atomic() iommu: Add a gfp parameter to iommu_map() Link: https://lore.kernel.org/linux-iommu/0-v3-76b587fe28df+6e3-iommu_map_gfp_jgg@nvidia.com Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
author: Jason Gunthorpe <jgg@nvidia.com> 2023-01-30 13:53:04 -0400
committer: Jason Gunthorpe <jgg@nvidia.com> 2023-01-30 13:54:35 -0400
commit: fd9f2a912255106d3b53163d816a4bd99ba29664 (patch)
tree: 892f38b617b534c93dc5c0b1f1efc3950812e828 /arch/s390
parent: 84798f2849942bb5e8817417adfdfa6241df2835 (diff)
parent: 429f27e36874e34727a1f8495be2ea3a7060732c (diff)
download: linux-stable-fd9f2a912255106d3b53163d816a4bd99ba29664.tar.gz
linux-stable-fd9f2a912255106d3b53163d816a4bd99ba29664.tar.bz2
linux-stable-fd9f2a912255106d3b53163d816a4bd99ba29664.zip
2 files changed, 20 insertions, 16 deletions
diff --git a/arch/s390/include/asm/pci_dma.h b/arch/s390/include/asm/pci_dma.h
index 91e63426bdc5..7119c04c51c5 100644
--- a/arch/s390/include/asm/pci_dma.h
+++ b/arch/s390/include/asm/pci_dma.h
@@ -186,9 +186,10 @@ static inline unsigned long *get_st_pto(unsigned long entry)
 
 /* Prototypes */
 void dma_free_seg_table(unsigned long);
-unsigned long *dma_alloc_cpu_table(void);
+unsigned long *dma_alloc_cpu_table(gfp_t gfp);
 void dma_cleanup_tables(unsigned long *);
-unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr);
+unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr,
+				  gfp_t gfp);
 void dma_update_cpu_trans(unsigned long *entry, phys_addr_t page_addr, int flags);
 
 extern const struct dma_map_ops s390_pci_dma_ops;
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index ea478d11fbd1..2d9b01d7ca4c 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -27,11 +27,11 @@ static int zpci_refresh_global(struct zpci_dev *zdev)
 				  zdev->iommu_pages * PAGE_SIZE);
 }
 
-unsigned long *dma_alloc_cpu_table(void)
+unsigned long *dma_alloc_cpu_table(gfp_t gfp)
 {
 	unsigned long *table, *entry;
 
-	table = kmem_cache_alloc(dma_region_table_cache, GFP_ATOMIC);
+	table = kmem_cache_alloc(dma_region_table_cache, gfp);
 	if (!table)
 		return NULL;
 
@@ -45,11 +45,11 @@ static void dma_free_cpu_table(void *table)
 	kmem_cache_free(dma_region_table_cache, table);
 }
 
-static unsigned long *dma_alloc_page_table(void)
+static unsigned long *dma_alloc_page_table(gfp_t gfp)
 {
 	unsigned long *table, *entry;
 
-	table = kmem_cache_alloc(dma_page_table_cache, GFP_ATOMIC);
+	table = kmem_cache_alloc(dma_page_table_cache, gfp);
 	if (!table)
 		return NULL;
 
@@ -63,7 +63,7 @@ static void dma_free_page_table(void *table)
 	kmem_cache_free(dma_page_table_cache, table);
 }
 
-static unsigned long *dma_get_seg_table_origin(unsigned long *rtep)
+static unsigned long *dma_get_seg_table_origin(unsigned long *rtep, gfp_t gfp)
 {
 	unsigned long old_rte, rte;
 	unsigned long *sto;
@@ -72,7 +72,7 @@ static unsigned long *dma_get_seg_table_origin(unsigned long *rtep)
 	if (reg_entry_isvalid(rte)) {
 		sto = get_rt_sto(rte);
 	} else {
-		sto = dma_alloc_cpu_table();
+		sto = dma_alloc_cpu_table(gfp);
 		if (!sto)
 			return NULL;
 
@@ -90,7 +90,7 @@ static unsigned long *dma_get_seg_table_origin(unsigned long *rtep)
 	return sto;
 }
 
-static unsigned long *dma_get_page_table_origin(unsigned long *step)
+static unsigned long *dma_get_page_table_origin(unsigned long *step, gfp_t gfp)
 {
 	unsigned long old_ste, ste;
 	unsigned long *pto;
@@ -99,7 +99,7 @@ static unsigned long *dma_get_page_table_origin(unsigned long *step)
 	if (reg_entry_isvalid(ste)) {
 		pto = get_st_pto(ste);
 	} else {
-		pto = dma_alloc_page_table();
+		pto = dma_alloc_page_table(gfp);
 		if (!pto)
 			return NULL;
 		set_st_pto(&ste, virt_to_phys(pto));
@@ -116,18 +116,19 @@ static unsigned long *dma_get_page_table_origin(unsigned long *step)
 	return pto;
 }
 
-unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr)
+unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr,
+				  gfp_t gfp)
 {
 	unsigned long *sto, *pto;
 	unsigned int rtx, sx, px;
 
 	rtx = calc_rtx(dma_addr);
-	sto = dma_get_seg_table_origin(&rto[rtx]);
+	sto = dma_get_seg_table_origin(&rto[rtx], gfp);
 	if (!sto)
 		return NULL;
 
 	sx = calc_sx(dma_addr);
-	pto = dma_get_page_table_origin(&sto[sx]);
+	pto = dma_get_page_table_origin(&sto[sx], gfp);
 	if (!pto)
 		return NULL;
 
@@ -170,7 +171,8 @@ static int __dma_update_trans(struct zpci_dev *zdev, phys_addr_t pa,
 		return -EINVAL;
 
 	for (i = 0; i < nr_pages; i++) {
-		entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr);
+		entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr,
+					   GFP_ATOMIC);
 		if (!entry) {
 			rc = -ENOMEM;
 			goto undo_cpu_trans;
@@ -186,7 +188,8 @@ undo_cpu_trans:
 		while (i-- > 0) {
 			page_addr -= PAGE_SIZE;
 			dma_addr -= PAGE_SIZE;
-			entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr);
+			entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr,
+						   GFP_ATOMIC);
 			if (!entry)
 				break;
 			dma_update_cpu_trans(entry, page_addr, flags);
@@ -576,7 +579,7 @@ int zpci_dma_init_device(struct zpci_dev *zdev)
 
 	spin_lock_init(&zdev->iommu_bitmap_lock);
 
-	zdev->dma_table = dma_alloc_cpu_table();
+	zdev->dma_table = dma_alloc_cpu_table(GFP_KERNEL);
 	if (!zdev->dma_table) {
 		rc = -ENOMEM;
 		goto out;
author	Jason Gunthorpe <jgg@nvidia.com>	2023-01-30 13:53:04 -0400
committer	Jason Gunthorpe <jgg@nvidia.com>	2023-01-30 13:54:35 -0400
commit	fd9f2a912255106d3b53163d816a4bd99ba29664 (patch)
tree	892f38b617b534c93dc5c0b1f1efc3950812e828 /arch/s390
parent	84798f2849942bb5e8817417adfdfa6241df2835 (diff)
parent	429f27e36874e34727a1f8495be2ea3a7060732c (diff)
download	linux-stable-fd9f2a912255106d3b53163d816a4bd99ba29664.tar.gz linux-stable-fd9f2a912255106d3b53163d816a4bd99ba29664.tar.bz2 linux-stable-fd9f2a912255106d3b53163d816a4bd99ba29664.zip