diff options
Diffstat (limited to 'drivers')
40 files changed, 3154 insertions, 1764 deletions
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 2ab07ce17abb..aca76383f201 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -303,6 +303,15 @@ config ROCKCHIP_IOMMU Say Y here if you are using a Rockchip SoC that includes an IOMMU device. +config SUN50I_IOMMU + bool "Allwinner H6 IOMMU Support" + depends on ARCH_SUNXI || COMPILE_TEST + select ARM_DMA_USE_IOMMU + select IOMMU_API + select IOMMU_DMA + help + Support for the IOMMU introduced in the Allwinner H6 SoCs. + config TEGRA_IOMMU_GART bool "Tegra GART IOMMU Support" depends on ARCH_TEGRA_2x_SOC diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index 9f33fdb3bb05..57cf4ba5e27c 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -29,6 +29,7 @@ obj-$(CONFIG_MTK_IOMMU_V1) += mtk_iommu_v1.o obj-$(CONFIG_OMAP_IOMMU) += omap-iommu.o obj-$(CONFIG_OMAP_IOMMU_DEBUG) += omap-iommu-debug.o obj-$(CONFIG_ROCKCHIP_IOMMU) += rockchip-iommu.o +obj-$(CONFIG_SUN50I_IOMMU) += sun50i-iommu.o obj-$(CONFIG_TEGRA_IOMMU_GART) += tegra-gart.o obj-$(CONFIG_TEGRA_IOMMU_SMMU) += tegra-smmu.o obj-$(CONFIG_EXYNOS_IOMMU) += exynos-iommu.o diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 2883ac389abb..311ef7105c6d 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -22,7 +22,6 @@ #include <linux/dma-direct.h> #include <linux/dma-iommu.h> #include <linux/iommu-helper.h> -#include <linux/iommu.h> #include <linux/delay.h> #include <linux/amd-iommu.h> #include <linux/notifier.h> @@ -43,8 +42,7 @@ #include <asm/gart.h> #include <asm/dma.h> -#include "amd_iommu_proto.h" -#include "amd_iommu_types.h" +#include "amd_iommu.h" #include "irq_remapping.h" #define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28)) @@ -71,6 +69,8 @@ */ #define AMD_IOMMU_PGSIZES ((~0xFFFUL) & ~(2ULL << 38)) +#define DEFAULT_PGTABLE_LEVEL PAGE_MODE_3_LEVEL + static DEFINE_SPINLOCK(pd_bitmap_lock); /* List of all available dev_data structures */ @@ -99,7 +99,6 @@ struct iommu_cmd { struct kmem_cache *amd_iommu_irq_cache; static void update_domain(struct protection_domain *domain); -static int protection_domain_init(struct protection_domain *domain); static void detach_device(struct device *dev); static void update_and_flush_device_table(struct protection_domain *domain, struct domain_pgtable *pgtable); @@ -280,12 +279,6 @@ static struct iommu_dev_data *find_dev_data(u16 devid) return dev_data; } -struct iommu_dev_data *get_dev_data(struct device *dev) -{ - return dev->archdata.iommu; -} -EXPORT_SYMBOL(get_dev_data); - /* * Find or create an IOMMU group for a acpihid device. */ @@ -314,16 +307,15 @@ static struct iommu_group *acpihid_device_group(struct device *dev) static bool pci_iommuv2_capable(struct pci_dev *pdev) { static const int caps[] = { - PCI_EXT_CAP_ID_ATS, PCI_EXT_CAP_ID_PRI, PCI_EXT_CAP_ID_PASID, }; int i, pos; - if (pci_ats_disabled()) + if (!pci_ats_supported(pdev)) return false; - for (i = 0; i < 3; ++i) { + for (i = 0; i < 2; ++i) { pos = pci_find_ext_capability(pdev, caps[i]); if (pos == 0) return false; @@ -336,7 +328,7 @@ static bool pdev_pri_erratum(struct pci_dev *pdev, u32 erratum) { struct iommu_dev_data *dev_data; - dev_data = get_dev_data(&pdev->dev); + dev_data = dev_iommu_priv_get(&pdev->dev); return dev_data->errata & (1 << erratum) ? true : false; } @@ -349,7 +341,7 @@ static bool check_device(struct device *dev) { int devid; - if (!dev || !dev->dma_mask) + if (!dev) return false; devid = get_device_id(dev); @@ -366,32 +358,18 @@ static bool check_device(struct device *dev) return true; } -static void init_iommu_group(struct device *dev) -{ - struct iommu_group *group; - - group = iommu_group_get_for_dev(dev); - if (IS_ERR(group)) - return; - - iommu_group_put(group); -} - static int iommu_init_device(struct device *dev) { struct iommu_dev_data *dev_data; - struct amd_iommu *iommu; int devid; - if (dev->archdata.iommu) + if (dev_iommu_priv_get(dev)) return 0; devid = get_device_id(dev); if (devid < 0) return devid; - iommu = amd_iommu_rlookup_table[devid]; - dev_data = find_dev_data(devid); if (!dev_data) return -ENOMEM; @@ -412,9 +390,7 @@ static int iommu_init_device(struct device *dev) dev_data->iommu_v2 = iommu->is_iommu_v2; } - dev->archdata.iommu = dev_data; - - iommu_device_link(&iommu->iommu, dev); + dev_iommu_priv_set(dev, dev_data); return 0; } @@ -433,31 +409,18 @@ static void iommu_ignore_device(struct device *dev) setup_aliases(dev); } -static void iommu_uninit_device(struct device *dev) +static void amd_iommu_uninit_device(struct device *dev) { struct iommu_dev_data *dev_data; - struct amd_iommu *iommu; - int devid; - devid = get_device_id(dev); - if (devid < 0) - return; - - iommu = amd_iommu_rlookup_table[devid]; - - dev_data = search_dev_data(devid); + dev_data = dev_iommu_priv_get(dev); if (!dev_data) return; if (dev_data->domain) detach_device(dev); - iommu_device_unlink(&iommu->iommu, dev); - - iommu_group_remove_device(dev); - - /* Remove dma-ops */ - dev->dma_ops = NULL; + dev_iommu_priv_set(dev, NULL); /* * We keep dev_data around for unplugged devices and reuse it when the @@ -521,7 +484,7 @@ static void amd_iommu_report_page_fault(u16 devid, u16 domain_id, pdev = pci_get_domain_bus_and_slot(0, PCI_BUS_NUM(devid), devid & 0xff); if (pdev) - dev_data = get_dev_data(&pdev->dev); + dev_data = dev_iommu_priv_get(&pdev->dev); if (dev_data && __ratelimit(&dev_data->rs)) { pci_err(pdev, "Event logged [IO_PAGE_FAULT domain=0x%04x address=0x%llx flags=0x%04x]\n", @@ -1418,20 +1381,19 @@ static struct page *free_sub_pt(unsigned long root, int mode, return freelist; } -static void free_pagetable(struct protection_domain *domain) +static void free_pagetable(struct domain_pgtable *pgtable) { - struct domain_pgtable pgtable; struct page *freelist = NULL; unsigned long root; - amd_iommu_domain_get_pgtable(domain, &pgtable); - atomic64_set(&domain->pt_root, 0); + if (pgtable->mode == PAGE_MODE_NONE) + return; - BUG_ON(pgtable.mode < PAGE_MODE_NONE || - pgtable.mode > PAGE_MODE_6_LEVEL); + BUG_ON(pgtable->mode < PAGE_MODE_NONE || + pgtable->mode > PAGE_MODE_6_LEVEL); - root = (unsigned long)pgtable.root; - freelist = free_sub_pt(root, pgtable.mode, freelist); + root = (unsigned long)pgtable->root; + freelist = free_sub_pt(root, pgtable->mode, freelist); free_page_list(freelist); } @@ -1844,70 +1806,6 @@ static void free_gcr3_table(struct protection_domain *domain) free_page((unsigned long)domain->gcr3_tbl); } -/* - * Free a domain, only used if something went wrong in the - * allocation path and we need to free an already allocated page table - */ -static void dma_ops_domain_free(struct protection_domain *domain) -{ - if (!domain) - return; - - iommu_put_dma_cookie(&domain->domain); - - free_pagetable(domain); - - if (domain->id) - domain_id_free(domain->id); - - kfree(domain); -} - -/* - * Allocates a new protection domain usable for the dma_ops functions. - * It also initializes the page table and the address allocator data - * structures required for the dma_ops interface - */ -static struct protection_domain *dma_ops_domain_alloc(void) -{ - struct protection_domain *domain; - u64 *pt_root, root; - - domain = kzalloc(sizeof(struct protection_domain), GFP_KERNEL); - if (!domain) - return NULL; - - if (protection_domain_init(domain)) - goto free_domain; - - pt_root = (void *)get_zeroed_page(GFP_KERNEL); - if (!pt_root) - goto free_domain; - - root = amd_iommu_domain_encode_pgtable(pt_root, PAGE_MODE_3_LEVEL); - atomic64_set(&domain->pt_root, root); - domain->flags = PD_DMA_OPS_MASK; - - if (iommu_get_dma_cookie(&domain->domain) == -ENOMEM) - goto free_domain; - - return domain; - -free_domain: - dma_ops_domain_free(domain); - - return NULL; -} - -/* - * little helper function to check whether a given protection domain is a - * dma_ops domain - */ -static bool dma_ops_domain(struct protection_domain *domain) -{ - return domain->flags & PD_DMA_OPS_MASK; -} - static void set_dte_entry(u16 devid, struct protection_domain *domain, struct domain_pgtable *pgtable, bool ats, bool ppr) @@ -2119,14 +2017,14 @@ out_err: static int attach_device(struct device *dev, struct protection_domain *domain) { - struct pci_dev *pdev; struct iommu_dev_data *dev_data; + struct pci_dev *pdev; unsigned long flags; int ret; spin_lock_irqsave(&domain->lock, flags); - dev_data = get_dev_data(dev); + dev_data = dev_iommu_priv_get(dev); spin_lock(&dev_data->lock); @@ -2139,8 +2037,10 @@ static int attach_device(struct device *dev, pdev = to_pci_dev(dev); if (domain->flags & PD_IOMMUV2_MASK) { + struct iommu_domain *def_domain = iommu_get_dma_domain(dev); + ret = -EINVAL; - if (!dev_data->passthrough) + if (def_domain->type != IOMMU_DOMAIN_IDENTITY) goto out; if (dev_data->iommu_v2) { @@ -2188,7 +2088,7 @@ static void detach_device(struct device *dev) struct iommu_dev_data *dev_data; unsigned long flags; - dev_data = get_dev_data(dev); + dev_data = dev_iommu_priv_get(dev); domain = dev_data->domain; spin_lock_irqsave(&domain->lock, flags); @@ -2222,68 +2122,60 @@ out: spin_unlock_irqrestore(&domain->lock, flags); } -static int amd_iommu_add_device(struct device *dev) +static struct iommu_device *amd_iommu_probe_device(struct device *dev) { - struct iommu_dev_data *dev_data; - struct iommu_domain *domain; + struct iommu_device *iommu_dev; struct amd_iommu *iommu; int ret, devid; - if (!check_device(dev) || get_dev_data(dev)) - return 0; + if (!check_device(dev)) + return ERR_PTR(-ENODEV); devid = get_device_id(dev); if (devid < 0) - return devid; + return ERR_PTR(devid); iommu = amd_iommu_rlookup_table[devid]; + if (dev_iommu_priv_get(dev)) + return &iommu->iommu; + ret = iommu_init_device(dev); if (ret) { if (ret != -ENOTSUPP) dev_err(dev, "Failed to initialize - trying to proceed anyway\n"); - + iommu_dev = ERR_PTR(ret); iommu_ignore_device(dev); - dev->dma_ops = NULL; - goto out; + } else { + iommu_dev = &iommu->iommu; } - init_iommu_group(dev); - dev_data = get_dev_data(dev); + iommu_completion_wait(iommu); - BUG_ON(!dev_data); + return iommu_dev; +} - if (dev_data->iommu_v2) - iommu_request_dm_for_dev(dev); +static void amd_iommu_probe_finalize(struct device *dev) +{ + struct iommu_domain *domain; /* Domains are initialized for this device - have a look what we ended up with */ domain = iommu_get_domain_for_dev(dev); - if (domain->type == IOMMU_DOMAIN_IDENTITY) - dev_data->passthrough = true; - else if (domain->type == IOMMU_DOMAIN_DMA) + if (domain->type == IOMMU_DOMAIN_DMA) iommu_setup_dma_ops(dev, IOVA_START_PFN << PAGE_SHIFT, 0); - -out: - iommu_completion_wait(iommu); - - return 0; } -static void amd_iommu_remove_device(struct device *dev) +static void amd_iommu_release_device(struct device *dev) { + int devid = get_device_id(dev); struct amd_iommu *iommu; - int devid; if (!check_device(dev)) return; - devid = get_device_id(dev); - if (devid < 0) - return; - iommu = amd_iommu_rlookup_table[devid]; - iommu_uninit_device(dev); + amd_iommu_uninit_device(dev); iommu_completion_wait(iommu); } @@ -2418,27 +2310,46 @@ static void cleanup_domain(struct protection_domain *domain) static void protection_domain_free(struct protection_domain *domain) { + struct domain_pgtable pgtable; + if (!domain) return; if (domain->id) domain_id_free(domain->id); + amd_iommu_domain_get_pgtable(domain, &pgtable); + atomic64_set(&domain->pt_root, 0); + free_pagetable(&pgtable); + kfree(domain); } -static int protection_domain_init(struct protection_domain *domain) +static int protection_domain_init(struct protection_domain *domain, int mode) { + u64 *pt_root = NULL, root; + + BUG_ON(mode < PAGE_MODE_NONE || mode > PAGE_MODE_6_LEVEL); + spin_lock_init(&domain->lock); domain->id = domain_id_alloc(); if (!domain->id) return -ENOMEM; INIT_LIST_HEAD(&domain->dev_list); + if (mode != PAGE_MODE_NONE) { + pt_root = (void *)get_zeroed_page(GFP_KERNEL); + if (!pt_root) + return -ENOMEM; + } + + root = amd_iommu_domain_encode_pgtable(pt_root, mode); + atomic64_set(&domain->pt_root, root); + return 0; } -static struct protection_domain *protection_domain_alloc(void) +static struct protection_domain *protection_domain_alloc(int mode) { struct protection_domain *domain; @@ -2446,7 +2357,7 @@ static struct protection_domain *protection_domain_alloc(void) if (!domain) return NULL; - if (protection_domain_init(domain)) + if (protection_domain_init(domain, mode)) goto out_err; return domain; @@ -2459,54 +2370,35 @@ out_err: static struct iommu_domain *amd_iommu_domain_alloc(unsigned type) { - struct protection_domain *pdomain; - u64 *pt_root, root; + struct protection_domain *domain; + int mode = DEFAULT_PGTABLE_LEVEL; - switch (type) { - case IOMMU_DOMAIN_UNMANAGED: - pdomain = protection_domain_alloc(); - if (!pdomain) - return NULL; + if (type == IOMMU_DOMAIN_IDENTITY) + mode = PAGE_MODE_NONE; - pt_root = (void *)get_zeroed_page(GFP_KERNEL); - if (!pt_root) { - protection_domain_free(pdomain); - return NULL; - } + domain = protection_domain_alloc(mode); + if (!domain) + return NULL; - root = amd_iommu_domain_encode_pgtable(pt_root, PAGE_MODE_3_LEVEL); - atomic64_set(&pdomain->pt_root, root); + domain->domain.geometry.aperture_start = 0; + domain->domain.geometry.aperture_end = ~0ULL; + domain->domain.geometry.force_aperture = true; - pdomain->domain.geometry.aperture_start = 0; - pdomain->domain.geometry.aperture_end = ~0ULL; - pdomain->domain.geometry.force_aperture = true; + if (type == IOMMU_DOMAIN_DMA && + iommu_get_dma_cookie(&domain->domain) == -ENOMEM) + goto free_domain; - break; - case IOMMU_DOMAIN_DMA: - pdomain = dma_ops_domain_alloc(); - if (!pdomain) { - pr_err("Failed to allocate\n"); - return NULL; - } - break; - case IOMMU_DOMAIN_IDENTITY: - pdomain = protection_domain_alloc(); - if (!pdomain) - return NULL; + return &domain->domain; - atomic64_set(&pdomain->pt_root, PAGE_MODE_NONE); - break; - default: - return NULL; - } +free_domain: + protection_domain_free(domain); - return &pdomain->domain; + return NULL; } static void amd_iommu_domain_free(struct iommu_domain *dom) { struct protection_domain *domain; - struct domain_pgtable pgtable; domain = to_pdomain(dom); @@ -2518,29 +2410,19 @@ static void amd_iommu_domain_free(struct iommu_domain *dom) if (!dom) return; - switch (dom->type) { - case IOMMU_DOMAIN_DMA: - /* Now release the domain */ - dma_ops_domain_free(domain); - break; - default: - amd_iommu_domain_get_pgtable(domain, &pgtable); - - if (pgtable.mode != PAGE_MODE_NONE) - free_pagetable(domain); + if (dom->type == IOMMU_DOMAIN_DMA) + iommu_put_dma_cookie(&domain->domain); - if (domain->flags & PD_IOMMUV2_MASK) - free_gcr3_table(domain); + if (domain->flags & PD_IOMMUV2_MASK) + free_gcr3_table(domain); - protection_domain_free(domain); - break; - } + protection_domain_free(domain); } static void amd_iommu_detach_device(struct iommu_domain *dom, struct device *dev) { - struct iommu_dev_data *dev_data = dev->archdata.iommu; + struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev); struct amd_iommu *iommu; int devid; @@ -2578,7 +2460,7 @@ static int amd_iommu_attach_device(struct iommu_domain *dom, if (!check_device(dev)) return -EINVAL; - dev_data = dev->archdata.iommu; + dev_data = dev_iommu_priv_get(dev); dev_data->defer_attach = false; iommu = amd_iommu_rlookup_table[dev_data->devid]; @@ -2734,12 +2616,14 @@ static void amd_iommu_get_resv_regions(struct device *dev, list_add_tail(®ion->list, head); } -static bool amd_iommu_is_attach_deferred(struct iommu_domain *domain, - struct device *dev) +bool amd_iommu_is_attach_deferred(struct iommu_domain *domain, + struct device *dev) { - struct iommu_dev_data *dev_data = dev->archdata.iommu; + struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev); + return dev_data->defer_attach; } +EXPORT_SYMBOL_GPL(amd_iommu_is_attach_deferred); static void amd_iommu_flush_iotlb_all(struct iommu_domain *domain) { @@ -2758,6 +2642,20 @@ static void amd_iommu_iotlb_sync(struct iommu_domain *domain, amd_iommu_flush_iotlb_all(domain); } +static int amd_iommu_def_domain_type(struct device *dev) +{ + struct iommu_dev_data *dev_data; + + dev_data = dev_iommu_priv_get(dev); + if (!dev_data) + return 0; + + if (dev_data->iommu_v2) + return IOMMU_DOMAIN_IDENTITY; + + return 0; +} + const struct iommu_ops amd_iommu_ops = { .capable = amd_iommu_capable, .domain_alloc = amd_iommu_domain_alloc, @@ -2767,8 +2665,9 @@ const struct iommu_ops amd_iommu_ops = { .map = amd_iommu_map, .unmap = amd_iommu_unmap, .iova_to_phys = amd_iommu_iova_to_phys, - .add_device = amd_iommu_add_device, - .remove_device = amd_iommu_remove_device, + .probe_device = amd_iommu_probe_device, + .release_device = amd_iommu_release_device, + .probe_finalize = amd_iommu_probe_finalize, .device_group = amd_iommu_device_group, .domain_get_attr = amd_iommu_domain_get_attr, .get_resv_regions = amd_iommu_get_resv_regions, @@ -2777,6 +2676,7 @@ const struct iommu_ops amd_iommu_ops = { .pgsize_bitmap = AMD_IOMMU_PGSIZES, .flush_iotlb_all = amd_iommu_flush_iotlb_all, .iotlb_sync = amd_iommu_iotlb_sync, + .def_domain_type = amd_iommu_def_domain_type, }; /***************************************************************************** @@ -2807,7 +2707,6 @@ void amd_iommu_domain_direct_map(struct iommu_domain *dom) struct protection_domain *domain = to_pdomain(dom); struct domain_pgtable pgtable; unsigned long flags; - u64 pt_root; spin_lock_irqsave(&domain->lock, flags); @@ -2815,18 +2714,13 @@ void amd_iommu_domain_direct_map(struct iommu_domain *dom) amd_iommu_domain_get_pgtable(domain, &pgtable); /* Update data structure */ - pt_root = amd_iommu_domain_encode_pgtable(NULL, PAGE_MODE_NONE); - atomic64_set(&domain->pt_root, pt_root); + atomic64_set(&domain->pt_root, 0); /* Make changes visible to IOMMUs */ update_domain(domain); - /* Restore old pgtable in domain->ptroot to free page-table */ - pt_root = amd_iommu_domain_encode_pgtable(pgtable.root, pgtable.mode); - atomic64_set(&domain->pt_root, pt_root); - /* Page-table is not visible to IOMMU anymore, so free it */ - free_pagetable(domain); + free_pagetable(&pgtable); spin_unlock_irqrestore(&domain->lock, flags); } @@ -3085,7 +2979,7 @@ int amd_iommu_complete_ppr(struct pci_dev *pdev, int pasid, struct amd_iommu *iommu; struct iommu_cmd cmd; - dev_data = get_dev_data(&pdev->dev); + dev_data = dev_iommu_priv_get(&pdev->dev); iommu = amd_iommu_rlookup_table[dev_data->devid]; build_complete_ppr(&cmd, dev_data->devid, pasid, status, @@ -3098,23 +2992,27 @@ EXPORT_SYMBOL(amd_iommu_complete_ppr); struct iommu_domain *amd_iommu_get_v2_domain(struct pci_dev *pdev) { struct protection_domain *pdomain; - struct iommu_domain *io_domain; + struct iommu_dev_data *dev_data; struct device *dev = &pdev->dev; + struct iommu_domain *io_domain; if (!check_device(dev)) return NULL; - pdomain = get_dev_data(dev)->domain; - if (pdomain == NULL && get_dev_data(dev)->defer_attach) { - get_dev_data(dev)->defer_attach = false; - io_domain = iommu_get_domain_for_dev(dev); + dev_data = dev_iommu_priv_get(&pdev->dev); + pdomain = dev_data->domain; + io_domain = iommu_get_domain_for_dev(dev); + + if (pdomain == NULL && dev_data->defer_attach) { + dev_data->defer_attach = false; pdomain = to_pdomain(io_domain); attach_device(dev, pdomain); } + if (pdomain == NULL) return NULL; - if (!dma_ops_domain(pdomain)) + if (io_domain->type != IOMMU_DOMAIN_DMA) return NULL; /* Only return IOMMUv2 domains */ @@ -3132,7 +3030,7 @@ void amd_iommu_enable_device_erratum(struct pci_dev *pdev, u32 erratum) if (!amd_iommu_v2_supported()) return; - dev_data = get_dev_data(&pdev->dev); + dev_data = dev_iommu_priv_get(&pdev->dev); dev_data->errata |= (1 << erratum); } EXPORT_SYMBOL(amd_iommu_enable_device_erratum); @@ -3151,11 +3049,8 @@ int amd_iommu_device_info(struct pci_dev *pdev, memset(info, 0, sizeof(*info)); - if (!pci_ats_disabled()) { - pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS); - if (pos) - info->flags |= AMD_IOMMU_DEVICE_FLAG_ATS_SUP; - } + if (pci_ats_supported(pdev)) + info->flags |= AMD_IOMMU_DEVICE_FLAG_ATS_SUP; pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI); if (pos) diff --git a/drivers/iommu/amd_iommu.h b/drivers/iommu/amd_iommu.h index 12d540d9b59b..f892992c8744 100644 --- a/drivers/iommu/amd_iommu.h +++ b/drivers/iommu/amd_iommu.h @@ -1,9 +1,103 @@ /* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2009-2010 Advanced Micro Devices, Inc. + * Author: Joerg Roedel <jroedel@suse.de> + */ #ifndef AMD_IOMMU_H #define AMD_IOMMU_H -int __init add_special_device(u8 type, u8 id, u16 *devid, bool cmd_line); +#include <linux/iommu.h> + +#include "amd_iommu_types.h" + +extern int amd_iommu_get_num_iommus(void); +extern int amd_iommu_init_dma_ops(void); +extern int amd_iommu_init_passthrough(void); +extern irqreturn_t amd_iommu_int_thread(int irq, void *data); +extern irqreturn_t amd_iommu_int_handler(int irq, void *data); +extern void amd_iommu_apply_erratum_63(u16 devid); +extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu); +extern int amd_iommu_init_devices(void); +extern void amd_iommu_uninit_devices(void); +extern void amd_iommu_init_notifier(void); +extern int amd_iommu_init_api(void); + +#ifdef CONFIG_AMD_IOMMU_DEBUGFS +void amd_iommu_debugfs_setup(struct amd_iommu *iommu); +#else +static inline void amd_iommu_debugfs_setup(struct amd_iommu *iommu) {} +#endif + +/* Needed for interrupt remapping */ +extern int amd_iommu_prepare(void); +extern int amd_iommu_enable(void); +extern void amd_iommu_disable(void); +extern int amd_iommu_reenable(int); +extern int amd_iommu_enable_faulting(void); +extern int amd_iommu_guest_ir; + +/* IOMMUv2 specific functions */ +struct iommu_domain; + +extern bool amd_iommu_v2_supported(void); +extern int amd_iommu_register_ppr_notifier(struct notifier_block *nb); +extern int amd_iommu_unregister_ppr_notifier(struct notifier_block *nb); +extern void amd_iommu_domain_direct_map(struct iommu_domain *dom); +extern int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids); +extern int amd_iommu_flush_page(struct iommu_domain *dom, int pasid, + u64 address); +extern int amd_iommu_flush_tlb(struct iommu_domain *dom, int pasid); +extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, int pasid, + unsigned long cr3); +extern int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, int pasid); +extern struct iommu_domain *amd_iommu_get_v2_domain(struct pci_dev *pdev); + +#ifdef CONFIG_IRQ_REMAP +extern int amd_iommu_create_irq_domain(struct amd_iommu *iommu); +#else +static inline int amd_iommu_create_irq_domain(struct amd_iommu *iommu) +{ + return 0; +} +#endif + +#define PPR_SUCCESS 0x0 +#define PPR_INVALID 0x1 +#define PPR_FAILURE 0xf + +extern int amd_iommu_complete_ppr(struct pci_dev *pdev, int pasid, + int status, int tag); + +static inline bool is_rd890_iommu(struct pci_dev *pdev) +{ + return (pdev->vendor == PCI_VENDOR_ID_ATI) && + (pdev->device == PCI_DEVICE_ID_RD890_IOMMU); +} + +static inline bool iommu_feature(struct amd_iommu *iommu, u64 f) +{ + if (!(iommu->cap & (1 << IOMMU_CAP_EFR))) + return false; + + return !!(iommu->features & f); +} + +static inline u64 iommu_virt_to_phys(void *vaddr) +{ + return (u64)__sme_set(virt_to_phys(vaddr)); +} + +static inline void *iommu_phys_to_virt(unsigned long paddr) +{ + return phys_to_virt(__sme_clr(paddr)); +} + +extern bool translation_pre_enabled(struct amd_iommu *iommu); +extern bool amd_iommu_is_attach_deferred(struct iommu_domain *domain, + struct device *dev); +extern int __init add_special_device(u8 type, u8 id, u16 *devid, + bool cmd_line); #ifdef CONFIG_DMI void amd_iommu_apply_ivrs_quirks(void); diff --git a/drivers/iommu/amd_iommu_debugfs.c b/drivers/iommu/amd_iommu_debugfs.c index c6a5c737ef09..545372fcc72f 100644 --- a/drivers/iommu/amd_iommu_debugfs.c +++ b/drivers/iommu/amd_iommu_debugfs.c @@ -8,10 +8,9 @@ */ #include <linux/debugfs.h> -#include <linux/iommu.h> #include <linux/pci.h> -#include "amd_iommu_proto.h" -#include "amd_iommu_types.h" + +#include "amd_iommu.h" static struct dentry *amd_iommu_debugfs; static DEFINE_MUTEX(amd_iommu_debugfs_lock); diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 5b81fd16f5fa..3faff7f80fd2 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -18,7 +18,6 @@ #include <linux/msi.h> #include <linux/amd-iommu.h> #include <linux/export.h> -#include <linux/iommu.h> #include <linux/kmemleak.h> #include <linux/mem_encrypt.h> #include <asm/pci-direct.h> @@ -32,9 +31,8 @@ #include <asm/irq_remapping.h> #include <linux/crash_dump.h> + #include "amd_iommu.h" -#include "amd_iommu_proto.h" -#include "amd_iommu_types.h" #include "irq_remapping.h" /* diff --git a/drivers/iommu/amd_iommu_proto.h b/drivers/iommu/amd_iommu_proto.h deleted file mode 100644 index 92c2ba6468a0..000000000000 --- a/drivers/iommu/amd_iommu_proto.h +++ /dev/null @@ -1,96 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2009-2010 Advanced Micro Devices, Inc. - * Author: Joerg Roedel <jroedel@suse.de> - */ - -#ifndef _ASM_X86_AMD_IOMMU_PROTO_H -#define _ASM_X86_AMD_IOMMU_PROTO_H - -#include "amd_iommu_types.h" - -extern int amd_iommu_get_num_iommus(void); -extern int amd_iommu_init_dma_ops(void); -extern int amd_iommu_init_passthrough(void); -extern irqreturn_t amd_iommu_int_thread(int irq, void *data); -extern irqreturn_t amd_iommu_int_handler(int irq, void *data); -extern void amd_iommu_apply_erratum_63(u16 devid); -extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu); -extern int amd_iommu_init_devices(void); -extern void amd_iommu_uninit_devices(void); -extern void amd_iommu_init_notifier(void); -extern int amd_iommu_init_api(void); - -#ifdef CONFIG_AMD_IOMMU_DEBUGFS -void amd_iommu_debugfs_setup(struct amd_iommu *iommu); -#else -static inline void amd_iommu_debugfs_setup(struct amd_iommu *iommu) {} -#endif - -/* Needed for interrupt remapping */ -extern int amd_iommu_prepare(void); -extern int amd_iommu_enable(void); -extern void amd_iommu_disable(void); -extern int amd_iommu_reenable(int); -extern int amd_iommu_enable_faulting(void); -extern int amd_iommu_guest_ir; - -/* IOMMUv2 specific functions */ -struct iommu_domain; - -extern bool amd_iommu_v2_supported(void); -extern int amd_iommu_register_ppr_notifier(struct notifier_block *nb); -extern int amd_iommu_unregister_ppr_notifier(struct notifier_block *nb); -extern void amd_iommu_domain_direct_map(struct iommu_domain *dom); -extern int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids); -extern int amd_iommu_flush_page(struct iommu_domain *dom, int pasid, - u64 address); -extern int amd_iommu_flush_tlb(struct iommu_domain *dom, int pasid); -extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, int pasid, - unsigned long cr3); -extern int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, int pasid); -extern struct iommu_domain *amd_iommu_get_v2_domain(struct pci_dev *pdev); - -#ifdef CONFIG_IRQ_REMAP -extern int amd_iommu_create_irq_domain(struct amd_iommu *iommu); -#else -static inline int amd_iommu_create_irq_domain(struct amd_iommu *iommu) -{ - return 0; -} -#endif - -#define PPR_SUCCESS 0x0 -#define PPR_INVALID 0x1 -#define PPR_FAILURE 0xf - -extern int amd_iommu_complete_ppr(struct pci_dev *pdev, int pasid, - int status, int tag); - -static inline bool is_rd890_iommu(struct pci_dev *pdev) -{ - return (pdev->vendor == PCI_VENDOR_ID_ATI) && - (pdev->device == PCI_DEVICE_ID_RD890_IOMMU); -} - -static inline bool iommu_feature(struct amd_iommu *iommu, u64 f) -{ - if (!(iommu->cap & (1 << IOMMU_CAP_EFR))) - return false; - - return !!(iommu->features & f); -} - -static inline u64 iommu_virt_to_phys(void *vaddr) -{ - return (u64)__sme_set(virt_to_phys(vaddr)); -} - -static inline void *iommu_phys_to_virt(unsigned long paddr) -{ - return phys_to_virt(__sme_clr(paddr)); -} - -extern bool translation_pre_enabled(struct amd_iommu *iommu); -extern struct iommu_dev_data *get_dev_data(struct device *dev); -#endif /* _ASM_X86_AMD_IOMMU_PROTO_H */ diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index 7a8fdec138bd..30a5d412255a 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -395,10 +395,10 @@ #define PD_IOMMUV2_MASK (1UL << 3) /* domain has gcr3 table */ extern bool amd_iommu_dump; -#define DUMP_printk(format, arg...) \ - do { \ - if (amd_iommu_dump) \ - printk(KERN_INFO "AMD-Vi: " format, ## arg); \ +#define DUMP_printk(format, arg...) \ + do { \ + if (amd_iommu_dump) \ + pr_info("AMD-Vi: " format, ## arg); \ } while(0); /* global flag if IOMMUs cache non-present entries */ @@ -645,7 +645,6 @@ struct iommu_dev_data { struct pci_dev *pdev; u16 devid; /* PCI Device ID */ bool iommu_v2; /* Device can make use of IOMMUv2 */ - bool passthrough; /* Device is identity mapped */ struct { bool enabled; int qdep; diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c index d6d85debd01b..c8a7b6b39222 100644 --- a/drivers/iommu/amd_iommu_v2.c +++ b/drivers/iommu/amd_iommu_v2.c @@ -13,13 +13,11 @@ #include <linux/module.h> #include <linux/sched.h> #include <linux/sched/mm.h> -#include <linux/iommu.h> #include <linux/wait.h> #include <linux/pci.h> #include <linux/gfp.h> -#include "amd_iommu_types.h" -#include "amd_iommu_proto.h" +#include "amd_iommu.h" MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Joerg Roedel <jroedel@suse.de>"); @@ -517,13 +515,12 @@ static int ppr_notifier(struct notifier_block *nb, unsigned long e, void *data) struct amd_iommu_fault *iommu_fault; struct pasid_state *pasid_state; struct device_state *dev_state; + struct pci_dev *pdev = NULL; unsigned long flags; struct fault *fault; bool finish; u16 tag, devid; int ret; - struct iommu_dev_data *dev_data; - struct pci_dev *pdev = NULL; iommu_fault = data; tag = iommu_fault->tag & 0x1ff; @@ -534,12 +531,11 @@ static int ppr_notifier(struct notifier_block *nb, unsigned long e, void *data) devid & 0xff); if (!pdev) return -ENODEV; - dev_data = get_dev_data(&pdev->dev); - /* In kdump kernel pci dev is not initialized yet -> send INVALID */ ret = NOTIFY_DONE; - if (translation_pre_enabled(amd_iommu_rlookup_table[devid]) - && dev_data->defer_attach) { + + /* In kdump kernel pci dev is not initialized yet -> send INVALID */ + if (amd_iommu_is_attach_deferred(NULL, &pdev->dev)) { amd_iommu_complete_ppr(pdev, iommu_fault->pasid, PPR_INVALID, tag); goto out; diff --git a/drivers/iommu/arm-smmu-impl.c b/drivers/iommu/arm-smmu-impl.c index 74d97a886e93..c75b9d957b70 100644 --- a/drivers/iommu/arm-smmu-impl.c +++ b/drivers/iommu/arm-smmu-impl.c @@ -150,6 +150,8 @@ static const struct arm_smmu_impl arm_mmu500_impl = { struct arm_smmu_device *arm_smmu_impl_init(struct arm_smmu_device *smmu) { + const struct device_node *np = smmu->dev->of_node; + /* * We will inevitably have to combine model-specific implementation * quirks with platform-specific integration quirks, but everything @@ -166,11 +168,11 @@ struct arm_smmu_device *arm_smmu_impl_init(struct arm_smmu_device *smmu) break; } - if (of_property_read_bool(smmu->dev->of_node, - "calxeda,smmu-secure-config-access")) + if (of_property_read_bool(np, "calxeda,smmu-secure-config-access")) smmu->impl = &calxeda_impl; - if (of_device_is_compatible(smmu->dev->of_node, "qcom,sdm845-smmu-500")) + if (of_device_is_compatible(np, "qcom,sdm845-smmu-500") || + of_device_is_compatible(np, "qcom,sc7180-smmu-500")) return qcom_smmu_impl_init(smmu); return smmu; diff --git a/drivers/iommu/arm-smmu-qcom.c b/drivers/iommu/arm-smmu-qcom.c index 24c071c1d8b0..cf01d0215a39 100644 --- a/drivers/iommu/arm-smmu-qcom.c +++ b/drivers/iommu/arm-smmu-qcom.c @@ -3,6 +3,7 @@ * Copyright (c) 2019, The Linux Foundation. All rights reserved. */ +#include <linux/of_device.h> #include <linux/qcom_scm.h> #include "arm-smmu.h" @@ -11,12 +12,29 @@ struct qcom_smmu { struct arm_smmu_device smmu; }; +static const struct of_device_id qcom_smmu_client_of_match[] = { + { .compatible = "qcom,adreno" }, + { .compatible = "qcom,mdp4" }, + { .compatible = "qcom,mdss" }, + { .compatible = "qcom,sc7180-mdss" }, + { .compatible = "qcom,sc7180-mss-pil" }, + { .compatible = "qcom,sdm845-mdss" }, + { .compatible = "qcom,sdm845-mss-pil" }, + { } +}; + +static int qcom_smmu_def_domain_type(struct device *dev) +{ + const struct of_device_id *match = + of_match_device(qcom_smmu_client_of_match, dev); + + return match ? IOMMU_DOMAIN_IDENTITY : 0; +} + static int qcom_sdm845_smmu500_reset(struct arm_smmu_device *smmu) { int ret; - arm_mmu500_reset(smmu); - /* * To address performance degradation in non-real time clients, * such as USB and UFS, turn off wait-for-safe on sdm845 based boards, @@ -30,8 +48,21 @@ static int qcom_sdm845_smmu500_reset(struct arm_smmu_device *smmu) return ret; } +static int qcom_smmu500_reset(struct arm_smmu_device *smmu) +{ + const struct device_node *np = smmu->dev->of_node; + + arm_mmu500_reset(smmu); + + if (of_device_is_compatible(np, "qcom,sdm845-smmu-500")) + return qcom_sdm845_smmu500_reset(smmu); + + return 0; +} + static const struct arm_smmu_impl qcom_smmu_impl = { - .reset = qcom_sdm845_smmu500_reset, + .def_domain_type = qcom_smmu_def_domain_type, + .reset = qcom_smmu500_reset, }; struct arm_smmu_device *qcom_smmu_impl_init(struct arm_smmu_device *smmu) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 82508730feb7..f578677a5c41 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -171,6 +171,8 @@ #define ARM_SMMU_PRIQ_IRQ_CFG1 0xd8 #define ARM_SMMU_PRIQ_IRQ_CFG2 0xdc +#define ARM_SMMU_REG_SZ 0xe00 + /* Common MSI config fields */ #define MSI_CFG0_ADDR_MASK GENMASK_ULL(51, 2) #define MSI_CFG2_SH GENMASK(5, 4) @@ -628,6 +630,7 @@ struct arm_smmu_strtab_cfg { struct arm_smmu_device { struct device *dev; void __iomem *base; + void __iomem *page1; #define ARM_SMMU_FEAT_2_LVL_STRTAB (1 << 0) #define ARM_SMMU_FEAT_2_LVL_CDTAB (1 << 1) @@ -664,7 +667,6 @@ struct arm_smmu_device { #define ARM_SMMU_MAX_ASIDS (1 << 16) unsigned int asid_bits; - DECLARE_BITMAP(asid_map, ARM_SMMU_MAX_ASIDS); #define ARM_SMMU_MAX_VMIDS (1 << 16) unsigned int vmid_bits; @@ -724,6 +726,8 @@ struct arm_smmu_option_prop { const char *prop; }; +static DEFINE_XARRAY_ALLOC1(asid_xa); + static struct arm_smmu_option_prop arm_smmu_options[] = { { ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" }, { ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"}, @@ -733,9 +737,8 @@ static struct arm_smmu_option_prop arm_smmu_options[] = { static inline void __iomem *arm_smmu_page1_fixup(unsigned long offset, struct arm_smmu_device *smmu) { - if ((offset > SZ_64K) && - (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)) - offset -= SZ_64K; + if (offset > SZ_64K) + return smmu->page1 + offset - SZ_64K; return smmu->base + offset; } @@ -1763,6 +1766,14 @@ static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain) cdcfg->cdtab = NULL; } +static void arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd) +{ + if (!cd->asid) + return; + + xa_erase(&asid_xa, cd->asid); +} + /* Stream table manipulation functions */ static void arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc) @@ -2448,10 +2459,9 @@ static void arm_smmu_domain_free(struct iommu_domain *domain) if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg; - if (cfg->cdcfg.cdtab) { + if (cfg->cdcfg.cdtab) arm_smmu_free_cd_tables(smmu_domain); - arm_smmu_bitmap_free(smmu->asid_map, cfg->cd.asid); - } + arm_smmu_free_asid(&cfg->cd); } else { struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg; if (cfg->vmid) @@ -2466,14 +2476,15 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain, struct io_pgtable_cfg *pgtbl_cfg) { int ret; - int asid; + u32 asid; struct arm_smmu_device *smmu = smmu_domain->smmu; struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg; typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr; - asid = arm_smmu_bitmap_alloc(smmu->asid_map, smmu->asid_bits); - if (asid < 0) - return asid; + ret = xa_alloc(&asid_xa, &asid, &cfg->cd, + XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL); + if (ret) + return ret; cfg->s1cdmax = master->ssid_bits; @@ -2506,7 +2517,7 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain, out_free_cd_tables: arm_smmu_free_cd_tables(smmu_domain); out_free_asid: - arm_smmu_bitmap_free(smmu->asid_map, asid); + arm_smmu_free_asid(&cfg->cd); return ret; } @@ -2652,26 +2663,20 @@ static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master) } } -#ifdef CONFIG_PCI_ATS static bool arm_smmu_ats_supported(struct arm_smmu_master *master) { - struct pci_dev *pdev; + struct device *dev = master->dev; struct arm_smmu_device *smmu = master->smmu; - struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev); + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); - if (!(smmu->features & ARM_SMMU_FEAT_ATS) || !dev_is_pci(master->dev) || - !(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS) || pci_ats_disabled()) + if (!(smmu->features & ARM_SMMU_FEAT_ATS)) return false; - pdev = to_pci_dev(master->dev); - return !pdev->untrusted && pdev->ats_cap; -} -#else -static bool arm_smmu_ats_supported(struct arm_smmu_master *master) -{ - return false; + if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS)) + return false; + + return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev)); } -#endif static void arm_smmu_enable_ats(struct arm_smmu_master *master) { @@ -2914,27 +2919,26 @@ static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid) static struct iommu_ops arm_smmu_ops; -static int arm_smmu_add_device(struct device *dev) +static struct iommu_device *arm_smmu_probe_device(struct device *dev) { int i, ret; struct arm_smmu_device *smmu; struct arm_smmu_master *master; struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); - struct iommu_group *group; if (!fwspec || fwspec->ops != &arm_smmu_ops) - return -ENODEV; + return ERR_PTR(-ENODEV); if (WARN_ON_ONCE(dev_iommu_priv_get(dev))) - return -EBUSY; + return ERR_PTR(-EBUSY); smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode); if (!smmu) - return -ENODEV; + return ERR_PTR(-ENODEV); master = kzalloc(sizeof(*master), GFP_KERNEL); if (!master) - return -ENOMEM; + return ERR_PTR(-ENOMEM); master->dev = dev; master->smmu = smmu; @@ -2975,43 +2979,24 @@ static int arm_smmu_add_device(struct device *dev) master->ssid_bits = min_t(u8, master->ssid_bits, CTXDESC_LINEAR_CDMAX); - ret = iommu_device_link(&smmu->iommu, dev); - if (ret) - goto err_disable_pasid; - - group = iommu_group_get_for_dev(dev); - if (IS_ERR(group)) { - ret = PTR_ERR(group); - goto err_unlink; - } + return &smmu->iommu; - iommu_group_put(group); - return 0; - -err_unlink: - iommu_device_unlink(&smmu->iommu, dev); -err_disable_pasid: - arm_smmu_disable_pasid(master); err_free_master: kfree(master); dev_iommu_priv_set(dev, NULL); - return ret; + return ERR_PTR(ret); } -static void arm_smmu_remove_device(struct device *dev) +static void arm_smmu_release_device(struct device *dev) { struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); struct arm_smmu_master *master; - struct arm_smmu_device *smmu; if (!fwspec || fwspec->ops != &arm_smmu_ops) return; master = dev_iommu_priv_get(dev); - smmu = master->smmu; arm_smmu_detach_dev(master); - iommu_group_remove_device(dev); - iommu_device_unlink(&smmu->iommu, dev); arm_smmu_disable_pasid(master); kfree(master); iommu_fwspec_free(dev); @@ -3138,8 +3123,8 @@ static struct iommu_ops arm_smmu_ops = { .flush_iotlb_all = arm_smmu_flush_iotlb_all, .iotlb_sync = arm_smmu_iotlb_sync, .iova_to_phys = arm_smmu_iova_to_phys, - .add_device = arm_smmu_add_device, - .remove_device = arm_smmu_remove_device, + .probe_device = arm_smmu_probe_device, + .release_device = arm_smmu_release_device, .device_group = arm_smmu_device_group, .domain_get_attr = arm_smmu_domain_get_attr, .domain_set_attr = arm_smmu_domain_set_attr, @@ -4021,6 +4006,18 @@ err_reset_pci_ops: __maybe_unused; return err; } +static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start, + resource_size_t size) +{ + struct resource res = { + .flags = IORESOURCE_MEM, + .start = start, + .end = start + size - 1, + }; + + return devm_ioremap_resource(dev, &res); +} + static int arm_smmu_device_probe(struct platform_device *pdev) { int irq, ret; @@ -4056,10 +4053,23 @@ static int arm_smmu_device_probe(struct platform_device *pdev) } ioaddr = res->start; - smmu->base = devm_ioremap_resource(dev, res); + /* + * Don't map the IMPLEMENTATION DEFINED regions, since they may contain + * the PMCG registers which are reserved by the PMU driver. + */ + smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ); if (IS_ERR(smmu->base)) return PTR_ERR(smmu->base); + if (arm_smmu_resource_size(smmu) > SZ_64K) { + smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K, + ARM_SMMU_REG_SZ); + if (IS_ERR(smmu->page1)) + return PTR_ERR(smmu->page1); + } else { + smmu->page1 = smmu->base; + } + /* Interrupt lines */ irq = platform_get_irq_byname_optional(pdev, "combined"); diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index a6a5796e9c41..243bc4cb2705 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -220,7 +220,7 @@ static int arm_smmu_register_legacy_master(struct device *dev, * With the legacy DT binding in play, we have no guarantees about * probe order, but then we're also not doing default domains, so we can * delay setting bus ops until we're sure every possible SMMU is ready, - * and that way ensure that no add_device() calls get missed. + * and that way ensure that no probe_device() calls get missed. */ static int arm_smmu_legacy_bus_init(void) { @@ -1062,7 +1062,6 @@ static int arm_smmu_master_alloc_smes(struct device *dev) struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev); struct arm_smmu_device *smmu = cfg->smmu; struct arm_smmu_smr *smrs = smmu->smrs; - struct iommu_group *group; int i, idx, ret; mutex_lock(&smmu->stream_map_mutex); @@ -1090,18 +1089,9 @@ static int arm_smmu_master_alloc_smes(struct device *dev) cfg->smendx[i] = (s16)idx; } - group = iommu_group_get_for_dev(dev); - if (IS_ERR(group)) { - ret = PTR_ERR(group); - goto out_err; - } - iommu_group_put(group); - /* It worked! Now, poke the actual hardware */ - for_each_cfg_sme(cfg, fwspec, i, idx) { + for_each_cfg_sme(cfg, fwspec, i, idx) arm_smmu_write_sme(smmu, idx); - smmu->s2crs[idx].group = group; - } mutex_unlock(&smmu->stream_map_mutex); return 0; @@ -1172,7 +1162,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) /* * FIXME: The arch/arm DMA API code tries to attach devices to its own - * domains between of_xlate() and add_device() - we have no way to cope + * domains between of_xlate() and probe_device() - we have no way to cope * with that, so until ARM gets converted to rely on groups and default * domains, just say no (but more politely than by dereferencing NULL). * This should be at least a WARN_ON once that's sorted. @@ -1382,7 +1372,7 @@ struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode) return dev ? dev_get_drvdata(dev) : NULL; } -static int arm_smmu_add_device(struct device *dev) +static struct iommu_device *arm_smmu_probe_device(struct device *dev) { struct arm_smmu_device *smmu = NULL; struct arm_smmu_master_cfg *cfg; @@ -1403,7 +1393,7 @@ static int arm_smmu_add_device(struct device *dev) } else if (fwspec && fwspec->ops == &arm_smmu_ops) { smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode); } else { - return -ENODEV; + return ERR_PTR(-ENODEV); } ret = -EINVAL; @@ -1444,21 +1434,19 @@ static int arm_smmu_add_device(struct device *dev) if (ret) goto out_cfg_free; - iommu_device_link(&smmu->iommu, dev); - device_link_add(dev, smmu->dev, DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_SUPPLIER); - return 0; + return &smmu->iommu; out_cfg_free: kfree(cfg); out_free: iommu_fwspec_free(dev); - return ret; + return ERR_PTR(ret); } -static void arm_smmu_remove_device(struct device *dev) +static void arm_smmu_release_device(struct device *dev) { struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); struct arm_smmu_master_cfg *cfg; @@ -1475,13 +1463,11 @@ static void arm_smmu_remove_device(struct device *dev) if (ret < 0) return; - iommu_device_unlink(&smmu->iommu, dev); arm_smmu_master_free_smes(cfg, fwspec); arm_smmu_rpm_put(smmu); dev_iommu_priv_set(dev, NULL); - iommu_group_remove_device(dev); kfree(cfg); iommu_fwspec_free(dev); } @@ -1512,6 +1498,11 @@ static struct iommu_group *arm_smmu_device_group(struct device *dev) else group = generic_device_group(dev); + /* Remember group for faster lookups */ + if (!IS_ERR(group)) + for_each_cfg_sme(cfg, fwspec, i, idx) + smmu->s2crs[idx].group = group; + return group; } @@ -1618,6 +1609,17 @@ static void arm_smmu_get_resv_regions(struct device *dev, iommu_dma_get_resv_regions(dev, head); } +static int arm_smmu_def_domain_type(struct device *dev) +{ + struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev); + const struct arm_smmu_impl *impl = cfg->smmu->impl; + + if (impl && impl->def_domain_type) + return impl->def_domain_type(dev); + + return 0; +} + static struct iommu_ops arm_smmu_ops = { .capable = arm_smmu_capable, .domain_alloc = arm_smmu_domain_alloc, @@ -1628,14 +1630,15 @@ static struct iommu_ops arm_smmu_ops = { .flush_iotlb_all = arm_smmu_flush_iotlb_all, .iotlb_sync = arm_smmu_iotlb_sync, .iova_to_phys = arm_smmu_iova_to_phys, - .add_device = arm_smmu_add_device, - .remove_device = arm_smmu_remove_device, + .probe_device = arm_smmu_probe_device, + .release_device = arm_smmu_release_device, .device_group = arm_smmu_device_group, .domain_get_attr = arm_smmu_domain_get_attr, .domain_set_attr = arm_smmu_domain_set_attr, .of_xlate = arm_smmu_of_xlate, .get_resv_regions = arm_smmu_get_resv_regions, .put_resv_regions = generic_iommu_put_resv_regions, + .def_domain_type = arm_smmu_def_domain_type, .pgsize_bitmap = -1UL, /* Restricted during device attach */ }; @@ -2253,7 +2256,7 @@ static int arm_smmu_device_remove(struct platform_device *pdev) return -ENODEV; if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS)) - dev_err(&pdev->dev, "removing device with active domains!\n"); + dev_notice(&pdev->dev, "disabling translation\n"); arm_smmu_bus_init(NULL); iommu_device_unregister(&smmu->iommu); diff --git a/drivers/iommu/arm-smmu.h b/drivers/iommu/arm-smmu.h index 8d1cd54d82a6..d172c024be61 100644 --- a/drivers/iommu/arm-smmu.h +++ b/drivers/iommu/arm-smmu.h @@ -386,6 +386,7 @@ struct arm_smmu_impl { int (*init_context)(struct arm_smmu_domain *smmu_domain); void (*tlb_sync)(struct arm_smmu_device *smmu, int page, int sync, int status); + int (*def_domain_type)(struct device *dev); }; static inline void __iomem *arm_smmu_page(struct arm_smmu_device *smmu, int n) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index f77dae7ba7d4..60a2970c37ff 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -963,6 +963,7 @@ static int map_iommu(struct intel_iommu *iommu, u64 phys_addr) warn_invalid_dmar(phys_addr, " returns all ones"); goto unmap; } + iommu->vccap = dmar_readq(iommu->reg + DMAR_VCCAP_REG); /* the registers might be more than one page */ map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap), @@ -1156,12 +1157,11 @@ static inline void reclaim_free_desc(struct q_inval *qi) } } -static int qi_check_fault(struct intel_iommu *iommu, int index) +static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index) { u32 fault; int head, tail; struct q_inval *qi = iommu->qi; - int wait_index = (index + 1) % QI_LENGTH; int shift = qi_shift(iommu); if (qi->desc_status[wait_index] == QI_ABORT) @@ -1224,17 +1224,21 @@ static int qi_check_fault(struct intel_iommu *iommu, int index) } /* - * Submit the queued invalidation descriptor to the remapping - * hardware unit and wait for its completion. + * Function to submit invalidation descriptors of all types to the queued + * invalidation interface(QI). Multiple descriptors can be submitted at a + * time, a wait descriptor will be appended to each submission to ensure + * hardware has completed the invalidation before return. Wait descriptors + * can be part of the submission but it will not be polled for completion. */ -int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu) +int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc, + unsigned int count, unsigned long options) { - int rc; struct q_inval *qi = iommu->qi; - int offset, shift, length; struct qi_desc wait_desc; int wait_index, index; unsigned long flags; + int offset, shift; + int rc, i; if (!qi) return 0; @@ -1243,32 +1247,41 @@ restart: rc = 0; raw_spin_lock_irqsave(&qi->q_lock, flags); - while (qi->free_cnt < 3) { + /* + * Check if we have enough empty slots in the queue to submit, + * the calculation is based on: + * # of desc + 1 wait desc + 1 space between head and tail + */ + while (qi->free_cnt < count + 2) { raw_spin_unlock_irqrestore(&qi->q_lock, flags); cpu_relax(); raw_spin_lock_irqsave(&qi->q_lock, flags); } index = qi->free_head; - wait_index = (index + 1) % QI_LENGTH; + wait_index = (index + count) % QI_LENGTH; shift = qi_shift(iommu); - length = 1 << shift; - qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE; + for (i = 0; i < count; i++) { + offset = ((index + i) % QI_LENGTH) << shift; + memcpy(qi->desc + offset, &desc[i], 1 << shift); + qi->desc_status[(index + i) % QI_LENGTH] = QI_IN_USE; + } + qi->desc_status[wait_index] = QI_IN_USE; - offset = index << shift; - memcpy(qi->desc + offset, desc, length); wait_desc.qw0 = QI_IWD_STATUS_DATA(QI_DONE) | QI_IWD_STATUS_WRITE | QI_IWD_TYPE; + if (options & QI_OPT_WAIT_DRAIN) + wait_desc.qw0 |= QI_IWD_PRQ_DRAIN; wait_desc.qw1 = virt_to_phys(&qi->desc_status[wait_index]); wait_desc.qw2 = 0; wait_desc.qw3 = 0; offset = wait_index << shift; - memcpy(qi->desc + offset, &wait_desc, length); + memcpy(qi->desc + offset, &wait_desc, 1 << shift); - qi->free_head = (qi->free_head + 2) % QI_LENGTH; - qi->free_cnt -= 2; + qi->free_head = (qi->free_head + count + 1) % QI_LENGTH; + qi->free_cnt -= count + 1; /* * update the HW tail register indicating the presence of @@ -1284,7 +1297,7 @@ restart: * a deadlock where the interrupt context can wait indefinitely * for free slots in the queue. */ - rc = qi_check_fault(iommu, index); + rc = qi_check_fault(iommu, index, wait_index); if (rc) break; @@ -1293,7 +1306,8 @@ restart: raw_spin_lock(&qi->q_lock); } - qi->desc_status[index] = QI_DONE; + for (i = 0; i < count; i++) + qi->desc_status[(index + i) % QI_LENGTH] = QI_DONE; reclaim_free_desc(qi); raw_spin_unlock_irqrestore(&qi->q_lock, flags); @@ -1317,7 +1331,7 @@ void qi_global_iec(struct intel_iommu *iommu) desc.qw3 = 0; /* should never fail */ - qi_submit_sync(&desc, iommu); + qi_submit_sync(iommu, &desc, 1, 0); } void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm, @@ -1331,7 +1345,7 @@ void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm, desc.qw2 = 0; desc.qw3 = 0; - qi_submit_sync(&desc, iommu); + qi_submit_sync(iommu, &desc, 1, 0); } void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, @@ -1355,7 +1369,7 @@ void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, desc.qw2 = 0; desc.qw3 = 0; - qi_submit_sync(&desc, iommu); + qi_submit_sync(iommu, &desc, 1, 0); } void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid, @@ -1377,7 +1391,7 @@ void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid, desc.qw2 = 0; desc.qw3 = 0; - qi_submit_sync(&desc, iommu); + qi_submit_sync(iommu, &desc, 1, 0); } /* PASID-based IOTLB invalidation */ @@ -1418,7 +1432,46 @@ void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr, QI_EIOTLB_AM(mask); } - qi_submit_sync(&desc, iommu); + qi_submit_sync(iommu, &desc, 1, 0); +} + +/* PASID-based device IOTLB Invalidate */ +void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid, + u32 pasid, u16 qdep, u64 addr, + unsigned int size_order, u64 granu) +{ + unsigned long mask = 1UL << (VTD_PAGE_SHIFT + size_order - 1); + struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0}; + + desc.qw0 = QI_DEV_EIOTLB_PASID(pasid) | QI_DEV_EIOTLB_SID(sid) | + QI_DEV_EIOTLB_QDEP(qdep) | QI_DEIOTLB_TYPE | + QI_DEV_IOTLB_PFSID(pfsid); + desc.qw1 = QI_DEV_EIOTLB_GLOB(granu); + + /* + * If S bit is 0, we only flush a single page. If S bit is set, + * The least significant zero bit indicates the invalidation address + * range. VT-d spec 6.5.2.6. + * e.g. address bit 12[0] indicates 8KB, 13[0] indicates 16KB. + * size order = 0 is PAGE_SIZE 4KB + * Max Invs Pending (MIP) is set to 0 for now until we have DIT in + * ECAP. + */ + desc.qw1 |= addr & ~mask; + if (size_order) + desc.qw1 |= QI_DEV_EIOTLB_SIZE; + + qi_submit_sync(iommu, &desc, 1, 0); +} + +void qi_flush_pasid_cache(struct intel_iommu *iommu, u16 did, + u64 granu, int pasid) +{ + struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0}; + + desc.qw0 = QI_PC_PASID(pasid) | QI_PC_DID(did) | + QI_PC_GRAN(granu) | QI_PC_TYPE; + qi_submit_sync(iommu, &desc, 1, 0); } /* diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 186ff5cc975c..60c8a56e4a3f 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -1235,19 +1235,13 @@ static phys_addr_t exynos_iommu_iova_to_phys(struct iommu_domain *iommu_domain, return phys; } -static int exynos_iommu_add_device(struct device *dev) +static struct iommu_device *exynos_iommu_probe_device(struct device *dev) { struct exynos_iommu_owner *owner = dev->archdata.iommu; struct sysmmu_drvdata *data; - struct iommu_group *group; if (!has_sysmmu(dev)) - return -ENODEV; - - group = iommu_group_get_for_dev(dev); - - if (IS_ERR(group)) - return PTR_ERR(group); + return ERR_PTR(-ENODEV); list_for_each_entry(data, &owner->controllers, owner_node) { /* @@ -1259,12 +1253,15 @@ static int exynos_iommu_add_device(struct device *dev) DL_FLAG_STATELESS | DL_FLAG_PM_RUNTIME); } - iommu_group_put(group); - return 0; + /* There is always at least one entry, see exynos_iommu_of_xlate() */ + data = list_first_entry(&owner->controllers, + struct sysmmu_drvdata, owner_node); + + return &data->iommu; } -static void exynos_iommu_remove_device(struct device *dev) +static void exynos_iommu_release_device(struct device *dev) { struct exynos_iommu_owner *owner = dev->archdata.iommu; struct sysmmu_drvdata *data; @@ -1282,7 +1279,6 @@ static void exynos_iommu_remove_device(struct device *dev) iommu_group_put(group); } } - iommu_group_remove_device(dev); list_for_each_entry(data, &owner->controllers, owner_node) device_link_del(data->link); @@ -1331,8 +1327,8 @@ static const struct iommu_ops exynos_iommu_ops = { .unmap = exynos_iommu_unmap, .iova_to_phys = exynos_iommu_iova_to_phys, .device_group = generic_device_group, - .add_device = exynos_iommu_add_device, - .remove_device = exynos_iommu_remove_device, + .probe_device = exynos_iommu_probe_device, + .release_device = exynos_iommu_release_device, .pgsize_bitmap = SECT_SIZE | LPAGE_SIZE | SPAGE_SIZE, .of_xlate = exynos_iommu_of_xlate, }; diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c index 06828e2698d5..928d37771ece 100644 --- a/drivers/iommu/fsl_pamu_domain.c +++ b/drivers/iommu/fsl_pamu_domain.c @@ -1016,25 +1016,13 @@ static struct iommu_group *fsl_pamu_device_group(struct device *dev) return group; } -static int fsl_pamu_add_device(struct device *dev) +static struct iommu_device *fsl_pamu_probe_device(struct device *dev) { - struct iommu_group *group; - - group = iommu_group_get_for_dev(dev); - if (IS_ERR(group)) - return PTR_ERR(group); - - iommu_group_put(group); - - iommu_device_link(&pamu_iommu, dev); - - return 0; + return &pamu_iommu; } -static void fsl_pamu_remove_device(struct device *dev) +static void fsl_pamu_release_device(struct device *dev) { - iommu_device_unlink(&pamu_iommu, dev); - iommu_group_remove_device(dev); } static const struct iommu_ops fsl_pamu_ops = { @@ -1048,8 +1036,8 @@ static const struct iommu_ops fsl_pamu_ops = { .iova_to_phys = fsl_pamu_iova_to_phys, .domain_set_attr = fsl_pamu_set_domain_attr, .domain_get_attr = fsl_pamu_get_domain_attr, - .add_device = fsl_pamu_add_device, - .remove_device = fsl_pamu_remove_device, + .probe_device = fsl_pamu_probe_device, + .release_device = fsl_pamu_release_device, .device_group = fsl_pamu_device_group, }; diff --git a/drivers/iommu/hyperv-iommu.c b/drivers/iommu/hyperv-iommu.c index a386b83e0e34..3c0c67a99c7b 100644 --- a/drivers/iommu/hyperv-iommu.c +++ b/drivers/iommu/hyperv-iommu.c @@ -131,7 +131,7 @@ static int hyperv_irq_remapping_activate(struct irq_domain *domain, return 0; } -static struct irq_domain_ops hyperv_ir_domain_ops = { +static const struct irq_domain_ops hyperv_ir_domain_ops = { .alloc = hyperv_irq_remapping_alloc, .free = hyperv_irq_remapping_free, .activate = hyperv_irq_remapping_activate, diff --git a/drivers/iommu/intel-iommu-debugfs.c b/drivers/iommu/intel-iommu-debugfs.c index 3eb1fe240fb0..cf1ebb98e418 100644 --- a/drivers/iommu/intel-iommu-debugfs.c +++ b/drivers/iommu/intel-iommu-debugfs.c @@ -372,6 +372,66 @@ static int domain_translation_struct_show(struct seq_file *m, void *unused) } DEFINE_SHOW_ATTRIBUTE(domain_translation_struct); +static void invalidation_queue_entry_show(struct seq_file *m, + struct intel_iommu *iommu) +{ + int index, shift = qi_shift(iommu); + struct qi_desc *desc; + int offset; + + if (ecap_smts(iommu->ecap)) + seq_puts(m, "Index\t\tqw0\t\t\tqw1\t\t\tqw2\t\t\tqw3\t\t\tstatus\n"); + else + seq_puts(m, "Index\t\tqw0\t\t\tqw1\t\t\tstatus\n"); + + for (index = 0; index < QI_LENGTH; index++) { + offset = index << shift; + desc = iommu->qi->desc + offset; + if (ecap_smts(iommu->ecap)) + seq_printf(m, "%5d\t%016llx\t%016llx\t%016llx\t%016llx\t%016x\n", + index, desc->qw0, desc->qw1, + desc->qw2, desc->qw3, + iommu->qi->desc_status[index]); + else + seq_printf(m, "%5d\t%016llx\t%016llx\t%016x\n", + index, desc->qw0, desc->qw1, + iommu->qi->desc_status[index]); + } +} + +static int invalidation_queue_show(struct seq_file *m, void *unused) +{ + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; + unsigned long flags; + struct q_inval *qi; + int shift; + + rcu_read_lock(); + for_each_active_iommu(iommu, drhd) { + qi = iommu->qi; + shift = qi_shift(iommu); + + if (!qi || !ecap_qis(iommu->ecap)) + continue; + + seq_printf(m, "Invalidation queue on IOMMU: %s\n", iommu->name); + + raw_spin_lock_irqsave(&qi->q_lock, flags); + seq_printf(m, " Base: 0x%llx\tHead: %lld\tTail: %lld\n", + (u64)virt_to_phys(qi->desc), + dmar_readq(iommu->reg + DMAR_IQH_REG) >> shift, + dmar_readq(iommu->reg + DMAR_IQT_REG) >> shift); + invalidation_queue_entry_show(m, iommu); + raw_spin_unlock_irqrestore(&qi->q_lock, flags); + seq_putc(m, '\n'); + } + rcu_read_unlock(); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(invalidation_queue); + #ifdef CONFIG_IRQ_REMAP static void ir_tbl_remap_entry_show(struct seq_file *m, struct intel_iommu *iommu) @@ -490,6 +550,8 @@ void __init intel_iommu_debugfs_init(void) debugfs_create_file("domain_translation_struct", 0444, intel_iommu_debug, NULL, &domain_translation_struct_fops); + debugfs_create_file("invalidation_queue", 0444, intel_iommu_debug, + NULL, &invalidation_queue_fops); #ifdef CONFIG_IRQ_REMAP debugfs_create_file("ir_translation_struct", 0444, intel_iommu_debug, NULL, &ir_translation_struct_fops); diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 0182cff2c7ac..648a785e078a 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -296,31 +296,6 @@ static inline void context_clear_entry(struct context_entry *context) static struct dmar_domain *si_domain; static int hw_pass_through = 1; -/* si_domain contains mulitple devices */ -#define DOMAIN_FLAG_STATIC_IDENTITY BIT(0) - -/* - * This is a DMA domain allocated through the iommu domain allocation - * interface. But one or more devices belonging to this domain have - * been chosen to use a private domain. We should avoid to use the - * map/unmap/iova_to_phys APIs on it. - */ -#define DOMAIN_FLAG_LOSE_CHILDREN BIT(1) - -/* - * When VT-d works in the scalable mode, it allows DMA translation to - * happen through either first level or second level page table. This - * bit marks that the DMA translation for the domain goes through the - * first level page table, otherwise, it goes through the second level. - */ -#define DOMAIN_FLAG_USE_FIRST_LEVEL BIT(2) - -/* - * Domain represents a virtual machine which demands iommu nested - * translation mode support. - */ -#define DOMAIN_FLAG_NESTING_MODE BIT(3) - #define for_each_domain_iommu(idx, domain) \ for (idx = 0; idx < g_num_of_iommus; idx++) \ if (domain->iommu_refcnt[idx]) @@ -355,11 +330,6 @@ static void domain_exit(struct dmar_domain *domain); static void domain_remove_dev_info(struct dmar_domain *domain); static void dmar_remove_one_dev_info(struct device *dev); static void __dmar_remove_one_dev_info(struct device_domain_info *info); -static void domain_context_clear(struct intel_iommu *iommu, - struct device *dev); -static int domain_detach_iommu(struct dmar_domain *domain, - struct intel_iommu *iommu); -static bool device_is_rmrr_locked(struct device *dev); static int intel_iommu_attach_device(struct iommu_domain *domain, struct device *dev); static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, @@ -395,6 +365,21 @@ EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped); #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) #define DEFER_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-2)) +struct device_domain_info *get_domain_info(struct device *dev) +{ + struct device_domain_info *info; + + if (!dev) + return NULL; + + info = dev->archdata.iommu; + if (unlikely(info == DUMMY_DEVICE_DOMAIN_INFO || + info == DEFER_DEVICE_DOMAIN_INFO)) + return NULL; + + return info; +} + DEFINE_SPINLOCK(device_domain_lock); static LIST_HEAD(device_domain_list); @@ -446,12 +431,6 @@ static void init_translation_status(struct intel_iommu *iommu) iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED; } -/* Convert generic 'struct iommu_domain to private struct dmar_domain */ -static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom) -{ - return container_of(dom, struct dmar_domain, domain); -} - static int __init intel_iommu_setup(char *str) { if (!str) @@ -480,8 +459,7 @@ static int __init intel_iommu_setup(char *str) pr_info("Intel-IOMMU: scalable mode supported\n"); intel_iommu_sm = 1; } else if (!strncmp(str, "tboot_noforce", 13)) { - printk(KERN_INFO - "Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n"); + pr_info("Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n"); intel_iommu_tboot_noforce = 1; } else if (!strncmp(str, "nobounce", 8)) { pr_info("Intel-IOMMU: No bounce buffer. This could expose security risks of DMA attacks\n"); @@ -1454,8 +1432,7 @@ static void iommu_enable_dev_iotlb(struct device_domain_info *info) !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32)) info->pri_enabled = 1; #endif - if (!pdev->untrusted && info->ats_supported && - pci_ats_page_aligned(pdev) && + if (info->ats_supported && pci_ats_page_aligned(pdev) && !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) { info->ats_enabled = 1; domain_update_iotlb(info->domain); @@ -1763,6 +1740,9 @@ static void free_dmar_iommu(struct intel_iommu *iommu) if (ecap_prs(iommu->ecap)) intel_svm_finish_prq(iommu); } + if (ecap_vcs(iommu->ecap) && vccap_pasid(iommu->vccap)) + ioasid_unregister_allocator(&iommu->pasid_allocator); + #endif } @@ -1911,11 +1891,6 @@ static int dmar_init_reserved_ranges(void) return 0; } -static void domain_reserve_special_ranges(struct dmar_domain *domain) -{ - copy_reserved_iova(&reserved_iova_list, &domain->iovad); -} - static inline int guestwidth_to_adjustwidth(int gaw) { int agaw; @@ -1930,65 +1905,6 @@ static inline int guestwidth_to_adjustwidth(int gaw) return agaw; } -static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu, - int guest_width) -{ - int adjust_width, agaw; - unsigned long sagaw; - int ret; - - init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN); - - if (!intel_iommu_strict) { - ret = init_iova_flush_queue(&domain->iovad, - iommu_flush_iova, iova_entry_free); - if (ret) - pr_info("iova flush queue initialization failed\n"); - } - - domain_reserve_special_ranges(domain); - - /* calculate AGAW */ - if (guest_width > cap_mgaw(iommu->cap)) - guest_width = cap_mgaw(iommu->cap); - domain->gaw = guest_width; - adjust_width = guestwidth_to_adjustwidth(guest_width); - agaw = width_to_agaw(adjust_width); - sagaw = cap_sagaw(iommu->cap); - if (!test_bit(agaw, &sagaw)) { - /* hardware doesn't support it, choose a bigger one */ - pr_debug("Hardware doesn't support agaw %d\n", agaw); - agaw = find_next_bit(&sagaw, 5, agaw); - if (agaw >= 5) - return -ENODEV; - } - domain->agaw = agaw; - - if (ecap_coherent(iommu->ecap)) - domain->iommu_coherency = 1; - else - domain->iommu_coherency = 0; - - if (ecap_sc_support(iommu->ecap)) - domain->iommu_snooping = 1; - else - domain->iommu_snooping = 0; - - if (intel_iommu_superpage) - domain->iommu_superpage = fls(cap_super_page_val(iommu->cap)); - else - domain->iommu_superpage = 0; - - domain->nid = iommu->node; - - /* always allocate the top pgd */ - domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid); - if (!domain->pgd) - return -ENOMEM; - __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE); - return 0; -} - static void domain_exit(struct dmar_domain *domain) { @@ -1996,7 +1912,8 @@ static void domain_exit(struct dmar_domain *domain) domain_remove_dev_info(domain); /* destroy iovas */ - put_iova_domain(&domain->iovad); + if (domain->domain.type == IOMMU_DOMAIN_DMA) + put_iova_domain(&domain->iovad); if (domain->pgd) { struct page *freelist; @@ -2518,11 +2435,8 @@ struct dmar_domain *find_domain(struct device *dev) if (unlikely(attach_deferred(dev) || iommu_dummy(dev))) return NULL; - if (dev_is_pci(dev)) - dev = &pci_real_dma_dev(to_pci_dev(dev))->dev; - /* No lock here, assumes no domain exit in normal case */ - info = dev->archdata.iommu; + info = get_domain_info(dev); if (likely(info)) return info->domain; @@ -2545,7 +2459,7 @@ dmar_search_domain_by_dev_info(int segment, int bus, int devfn) struct device_domain_info *info; list_for_each_entry(info, &device_domain_list, global) - if (info->iommu->segment == segment && info->bus == bus && + if (info->segment == segment && info->bus == bus && info->devfn == devfn) return info; @@ -2582,6 +2496,12 @@ static int domain_setup_first_level(struct intel_iommu *iommu, flags); } +static bool dev_is_real_dma_subdevice(struct device *dev) +{ + return dev && dev_is_pci(dev) && + pci_real_dma_dev(to_pci_dev(dev)) != to_pci_dev(dev); +} + static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, int bus, int devfn, struct device *dev, @@ -2596,8 +2516,18 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, if (!info) return NULL; - info->bus = bus; - info->devfn = devfn; + if (!dev_is_real_dma_subdevice(dev)) { + info->bus = bus; + info->devfn = devfn; + info->segment = iommu->segment; + } else { + struct pci_dev *pdev = to_pci_dev(dev); + + info->bus = pdev->bus->number; + info->devfn = pdev->devfn; + info->segment = pci_domain_nr(pdev->bus); + } + info->ats_supported = info->pasid_supported = info->pri_supported = 0; info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0; info->ats_qdep = 0; @@ -2611,10 +2541,8 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, if (dev && dev_is_pci(dev)) { struct pci_dev *pdev = to_pci_dev(info->dev); - if (!pdev->untrusted && - !pci_ats_disabled() && - ecap_dev_iotlb_support(iommu->ecap) && - pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS) && + if (ecap_dev_iotlb_support(iommu->ecap) && + pci_ats_supported(pdev) && dmar_find_matched_atsr_unit(pdev)) info->ats_supported = 1; @@ -2637,7 +2565,8 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, if (!found) { struct device_domain_info *info2; - info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn); + info2 = dmar_search_domain_by_dev_info(info->segment, info->bus, + info->devfn); if (info2) { found = info2->domain; info2->dev = dev; @@ -2704,108 +2633,10 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, return domain; } -static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque) -{ - *(u16 *)opaque = alias; - return 0; -} - -static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw) -{ - struct device_domain_info *info; - struct dmar_domain *domain = NULL; - struct intel_iommu *iommu; - u16 dma_alias; - unsigned long flags; - u8 bus, devfn; - - iommu = device_to_iommu(dev, &bus, &devfn); - if (!iommu) - return NULL; - - if (dev_is_pci(dev)) { - struct pci_dev *pdev = to_pci_dev(dev); - - pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias); - - spin_lock_irqsave(&device_domain_lock, flags); - info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus), - PCI_BUS_NUM(dma_alias), - dma_alias & 0xff); - if (info) { - iommu = info->iommu; - domain = info->domain; - } - spin_unlock_irqrestore(&device_domain_lock, flags); - - /* DMA alias already has a domain, use it */ - if (info) - goto out; - } - - /* Allocate and initialize new domain for the device */ - domain = alloc_domain(0); - if (!domain) - return NULL; - if (domain_init(domain, iommu, gaw)) { - domain_exit(domain); - return NULL; - } - -out: - return domain; -} - -static struct dmar_domain *set_domain_for_dev(struct device *dev, - struct dmar_domain *domain) -{ - struct intel_iommu *iommu; - struct dmar_domain *tmp; - u16 req_id, dma_alias; - u8 bus, devfn; - - iommu = device_to_iommu(dev, &bus, &devfn); - if (!iommu) - return NULL; - - req_id = ((u16)bus << 8) | devfn; - - if (dev_is_pci(dev)) { - struct pci_dev *pdev = to_pci_dev(dev); - - pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias); - - /* register PCI DMA alias device */ - if (req_id != dma_alias) { - tmp = dmar_insert_one_dev_info(iommu, PCI_BUS_NUM(dma_alias), - dma_alias & 0xff, NULL, domain); - - if (!tmp || tmp != domain) - return tmp; - } - } - - tmp = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain); - if (!tmp || tmp != domain) - return tmp; - - return domain; -} - static int iommu_domain_identity_map(struct dmar_domain *domain, - unsigned long long start, - unsigned long long end) + unsigned long first_vpfn, + unsigned long last_vpfn) { - unsigned long first_vpfn = start >> VTD_PAGE_SHIFT; - unsigned long last_vpfn = end >> VTD_PAGE_SHIFT; - - if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn), - dma_to_mm_pfn(last_vpfn))) { - pr_err("Reserving iova failed\n"); - return -ENOMEM; - } - - pr_debug("Mapping reserved region %llx-%llx\n", start, end); /* * RMRR range might have overlap with physical memory range, * clear it first @@ -2817,45 +2648,6 @@ static int iommu_domain_identity_map(struct dmar_domain *domain, DMA_PTE_READ|DMA_PTE_WRITE); } -static int domain_prepare_identity_map(struct device *dev, - struct dmar_domain *domain, - unsigned long long start, - unsigned long long end) -{ - /* For _hardware_ passthrough, don't bother. But for software - passthrough, we do it anyway -- it may indicate a memory - range which is reserved in E820, so which didn't get set - up to start with in si_domain */ - if (domain == si_domain && hw_pass_through) { - dev_warn(dev, "Ignoring identity map for HW passthrough [0x%Lx - 0x%Lx]\n", - start, end); - return 0; - } - - dev_info(dev, "Setting identity map [0x%Lx - 0x%Lx]\n", start, end); - - if (end < start) { - WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n" - "BIOS vendor: %s; Ver: %s; Product Version: %s\n", - dmi_get_system_info(DMI_BIOS_VENDOR), - dmi_get_system_info(DMI_BIOS_VERSION), - dmi_get_system_info(DMI_PRODUCT_VERSION)); - return -EIO; - } - - if (end >> agaw_to_width(domain->agaw)) { - WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n" - "BIOS vendor: %s; Ver: %s; Product Version: %s\n", - agaw_to_width(domain->agaw), - dmi_get_system_info(DMI_BIOS_VENDOR), - dmi_get_system_info(DMI_BIOS_VERSION), - dmi_get_system_info(DMI_PRODUCT_VERSION)); - return -EIO; - } - - return iommu_domain_identity_map(domain, start, end); -} - static int md_domain_init(struct dmar_domain *domain, int guest_width); static int __init si_domain_init(int hw) @@ -2882,7 +2674,8 @@ static int __init si_domain_init(int hw) for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) { ret = iommu_domain_identity_map(si_domain, - PFN_PHYS(start_pfn), PFN_PHYS(end_pfn)); + mm_to_dma_pfn(start_pfn), + mm_to_dma_pfn(end_pfn)); if (ret) return ret; } @@ -2911,17 +2704,6 @@ static int __init si_domain_init(int hw) return 0; } -static int identity_mapping(struct device *dev) -{ - struct device_domain_info *info; - - info = dev->archdata.iommu; - if (info) - return (info->domain == si_domain); - - return 0; -} - static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev) { struct dmar_domain *ndomain; @@ -3048,31 +2830,6 @@ static int device_def_domain_type(struct device *dev) if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev)) return IOMMU_DOMAIN_IDENTITY; - - /* - * We want to start off with all devices in the 1:1 domain, and - * take them out later if we find they can't access all of memory. - * - * However, we can't do this for PCI devices behind bridges, - * because all PCI devices behind the same bridge will end up - * with the same source-id on their transactions. - * - * Practically speaking, we can't change things around for these - * devices at run-time, because we can't be sure there'll be no - * DMA transactions in flight for any of their siblings. - * - * So PCI devices (unless they're on the root bus) as well as - * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of - * the 1:1 domain, just in _case_ one of their siblings turns out - * not to be able to map all of memory. - */ - if (!pci_is_pcie(pdev)) { - if (!pci_is_root_bus(pdev->bus)) - return IOMMU_DOMAIN_DMA; - if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI) - return IOMMU_DOMAIN_DMA; - } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE) - return IOMMU_DOMAIN_DMA; } return 0; @@ -3297,6 +3054,85 @@ out_unmap: return ret; } +#ifdef CONFIG_INTEL_IOMMU_SVM +static ioasid_t intel_vcmd_ioasid_alloc(ioasid_t min, ioasid_t max, void *data) +{ + struct intel_iommu *iommu = data; + ioasid_t ioasid; + + if (!iommu) + return INVALID_IOASID; + /* + * VT-d virtual command interface always uses the full 20 bit + * PASID range. Host can partition guest PASID range based on + * policies but it is out of guest's control. + */ + if (min < PASID_MIN || max > intel_pasid_max_id) + return INVALID_IOASID; + + if (vcmd_alloc_pasid(iommu, &ioasid)) + return INVALID_IOASID; + + return ioasid; +} + +static void intel_vcmd_ioasid_free(ioasid_t ioasid, void *data) +{ + struct intel_iommu *iommu = data; + + if (!iommu) + return; + /* + * Sanity check the ioasid owner is done at upper layer, e.g. VFIO + * We can only free the PASID when all the devices are unbound. + */ + if (ioasid_find(NULL, ioasid, NULL)) { + pr_alert("Cannot free active IOASID %d\n", ioasid); + return; + } + vcmd_free_pasid(iommu, ioasid); +} + +static void register_pasid_allocator(struct intel_iommu *iommu) +{ + /* + * If we are running in the host, no need for custom allocator + * in that PASIDs are allocated from the host system-wide. + */ + if (!cap_caching_mode(iommu->cap)) + return; + + if (!sm_supported(iommu)) { + pr_warn("VT-d Scalable Mode not enabled, no PASID allocation\n"); + return; + } + + /* + * Register a custom PASID allocator if we are running in a guest, + * guest PASID must be obtained via virtual command interface. + * There can be multiple vIOMMUs in each guest but only one allocator + * is active. All vIOMMU allocators will eventually be calling the same + * host allocator. + */ + if (!ecap_vcs(iommu->ecap) || !vccap_pasid(iommu->vccap)) + return; + + pr_info("Register custom PASID allocator\n"); + iommu->pasid_allocator.alloc = intel_vcmd_ioasid_alloc; + iommu->pasid_allocator.free = intel_vcmd_ioasid_free; + iommu->pasid_allocator.pdata = (void *)iommu; + if (ioasid_register_allocator(&iommu->pasid_allocator)) { + pr_warn("Custom PASID allocator failed, scalable mode disabled\n"); + /* + * Disable scalable mode on this IOMMU if there + * is no custom allocator. Mixing SM capable vIOMMU + * and non-SM vIOMMU are not supported. + */ + intel_iommu_sm = 0; + } +} +#endif + static int __init init_dmars(void) { struct dmar_drhd_unit *drhd; @@ -3414,6 +3250,9 @@ static int __init init_dmars(void) */ for_each_active_iommu(iommu, drhd) { iommu_flush_write_buffer(iommu); +#ifdef CONFIG_INTEL_IOMMU_SVM + register_pasid_allocator(iommu); +#endif iommu_set_root_entry(iommu); iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL); iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); @@ -3531,100 +3370,6 @@ static unsigned long intel_alloc_iova(struct device *dev, return iova_pfn; } -static struct dmar_domain *get_private_domain_for_dev(struct device *dev) -{ - struct dmar_domain *domain, *tmp; - struct dmar_rmrr_unit *rmrr; - struct device *i_dev; - int i, ret; - - /* Device shouldn't be attached by any domains. */ - domain = find_domain(dev); - if (domain) - return NULL; - - domain = find_or_alloc_domain(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH); - if (!domain) - goto out; - - /* We have a new domain - setup possible RMRRs for the device */ - rcu_read_lock(); - for_each_rmrr_units(rmrr) { - for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt, - i, i_dev) { - if (i_dev != dev) - continue; - - ret = domain_prepare_identity_map(dev, domain, - rmrr->base_address, - rmrr->end_address); - if (ret) - dev_err(dev, "Mapping reserved region failed\n"); - } - } - rcu_read_unlock(); - - tmp = set_domain_for_dev(dev, domain); - if (!tmp || domain != tmp) { - domain_exit(domain); - domain = tmp; - } - -out: - if (!domain) - dev_err(dev, "Allocating domain failed\n"); - else - domain->domain.type = IOMMU_DOMAIN_DMA; - - return domain; -} - -/* Check if the dev needs to go through non-identity map and unmap process.*/ -static bool iommu_need_mapping(struct device *dev) -{ - int ret; - - if (iommu_dummy(dev)) - return false; - - if (unlikely(attach_deferred(dev))) - do_deferred_attach(dev); - - ret = identity_mapping(dev); - if (ret) { - u64 dma_mask = *dev->dma_mask; - - if (dev->coherent_dma_mask && dev->coherent_dma_mask < dma_mask) - dma_mask = dev->coherent_dma_mask; - - if (dma_mask >= dma_direct_get_required_mask(dev)) - return false; - - /* - * 32 bit DMA is removed from si_domain and fall back to - * non-identity mapping. - */ - dmar_remove_one_dev_info(dev); - ret = iommu_request_dma_domain_for_dev(dev); - if (ret) { - struct iommu_domain *domain; - struct dmar_domain *dmar_domain; - - domain = iommu_get_domain_for_dev(dev); - if (domain) { - dmar_domain = to_dmar_domain(domain); - dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN; - } - dmar_remove_one_dev_info(dev); - get_private_domain_for_dev(dev); - } - - dev_info(dev, "32bit DMA uses non-identity mapping\n"); - } - - return true; -} - static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir, u64 dma_mask) { @@ -3638,6 +3383,9 @@ static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr, BUG_ON(dir == DMA_NONE); + if (unlikely(attach_deferred(dev))) + do_deferred_attach(dev); + domain = find_domain(dev); if (!domain) return DMA_MAPPING_ERROR; @@ -3689,20 +3437,15 @@ static dma_addr_t intel_map_page(struct device *dev, struct page *page, enum dma_data_direction dir, unsigned long attrs) { - if (iommu_need_mapping(dev)) - return __intel_map_single(dev, page_to_phys(page) + offset, - size, dir, *dev->dma_mask); - return dma_direct_map_page(dev, page, offset, size, dir, attrs); + return __intel_map_single(dev, page_to_phys(page) + offset, + size, dir, *dev->dma_mask); } static dma_addr_t intel_map_resource(struct device *dev, phys_addr_t phys_addr, size_t size, enum dma_data_direction dir, unsigned long attrs) { - if (iommu_need_mapping(dev)) - return __intel_map_single(dev, phys_addr, size, dir, - *dev->dma_mask); - return dma_direct_map_resource(dev, phys_addr, size, dir, attrs); + return __intel_map_single(dev, phys_addr, size, dir, *dev->dma_mask); } static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size) @@ -3753,17 +3496,13 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr, size_t size, enum dma_data_direction dir, unsigned long attrs) { - if (iommu_need_mapping(dev)) - intel_unmap(dev, dev_addr, size); - else - dma_direct_unmap_page(dev, dev_addr, size, dir, attrs); + intel_unmap(dev, dev_addr, size); } static void intel_unmap_resource(struct device *dev, dma_addr_t dev_addr, size_t size, enum dma_data_direction dir, unsigned long attrs) { - if (iommu_need_mapping(dev)) - intel_unmap(dev, dev_addr, size); + intel_unmap(dev, dev_addr, size); } static void *intel_alloc_coherent(struct device *dev, size_t size, @@ -3773,8 +3512,8 @@ static void *intel_alloc_coherent(struct device *dev, size_t size, struct page *page = NULL; int order; - if (!iommu_need_mapping(dev)) - return dma_direct_alloc(dev, size, dma_handle, flags, attrs); + if (unlikely(attach_deferred(dev))) + do_deferred_attach(dev); size = PAGE_ALIGN(size); order = get_order(size); @@ -3809,9 +3548,6 @@ static void intel_free_coherent(struct device *dev, size_t size, void *vaddr, int order; struct page *page = virt_to_page(vaddr); - if (!iommu_need_mapping(dev)) - return dma_direct_free(dev, size, vaddr, dma_handle, attrs); - size = PAGE_ALIGN(size); order = get_order(size); @@ -3829,9 +3565,6 @@ static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist, struct scatterlist *sg; int i; - if (!iommu_need_mapping(dev)) - return dma_direct_unmap_sg(dev, sglist, nelems, dir, attrs); - for_each_sg(sglist, sg, nelems, i) { nrpages += aligned_nrpages(sg_dma_address(sg), sg_dma_len(sg)); } @@ -3855,8 +3588,9 @@ static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nele struct intel_iommu *iommu; BUG_ON(dir == DMA_NONE); - if (!iommu_need_mapping(dev)) - return dma_direct_map_sg(dev, sglist, nelems, dir, attrs); + + if (unlikely(attach_deferred(dev))) + do_deferred_attach(dev); domain = find_domain(dev); if (!domain) @@ -3903,8 +3637,6 @@ static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nele static u64 intel_get_required_mask(struct device *dev) { - if (!iommu_need_mapping(dev)) - return dma_direct_get_required_mask(dev); return DMA_BIT_MASK(32); } @@ -4813,58 +4545,37 @@ static int intel_iommu_memory_notifier(struct notifier_block *nb, unsigned long val, void *v) { struct memory_notify *mhp = v; - unsigned long long start, end; - unsigned long start_vpfn, last_vpfn; + unsigned long start_vpfn = mm_to_dma_pfn(mhp->start_pfn); + unsigned long last_vpfn = mm_to_dma_pfn(mhp->start_pfn + + mhp->nr_pages - 1); switch (val) { case MEM_GOING_ONLINE: - start = mhp->start_pfn << PAGE_SHIFT; - end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1; - if (iommu_domain_identity_map(si_domain, start, end)) { - pr_warn("Failed to build identity map for [%llx-%llx]\n", - start, end); + if (iommu_domain_identity_map(si_domain, + start_vpfn, last_vpfn)) { + pr_warn("Failed to build identity map for [%lx-%lx]\n", + start_vpfn, last_vpfn); return NOTIFY_BAD; } break; case MEM_OFFLINE: case MEM_CANCEL_ONLINE: - start_vpfn = mm_to_dma_pfn(mhp->start_pfn); - last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1); - while (start_vpfn <= last_vpfn) { - struct iova *iova; + { struct dmar_drhd_unit *drhd; struct intel_iommu *iommu; struct page *freelist; - iova = find_iova(&si_domain->iovad, start_vpfn); - if (iova == NULL) { - pr_debug("Failed get IOVA for PFN %lx\n", - start_vpfn); - break; - } - - iova = split_and_remove_iova(&si_domain->iovad, iova, - start_vpfn, last_vpfn); - if (iova == NULL) { - pr_warn("Failed to split IOVA PFN [%lx-%lx]\n", - start_vpfn, last_vpfn); - return NOTIFY_BAD; - } - - freelist = domain_unmap(si_domain, iova->pfn_lo, - iova->pfn_hi); + freelist = domain_unmap(si_domain, + start_vpfn, last_vpfn); rcu_read_lock(); for_each_active_iommu(iommu, drhd) iommu_flush_iotlb_psi(iommu, si_domain, - iova->pfn_lo, iova_size(iova), + start_vpfn, mhp->nr_pages, !freelist, 0); rcu_read_unlock(); dma_free_pagelist(freelist); - - start_vpfn = iova->pfn_hi + 1; - free_iova_mem(iova); } break; } @@ -4892,8 +4603,9 @@ static void free_all_cpu_cached_iovas(unsigned int cpu) for (did = 0; did < cap_ndoms(iommu->cap); did++) { domain = get_iommu_domain(iommu, (u16)did); - if (!domain) + if (!domain || domain->domain.type != IOMMU_DOMAIN_DMA) continue; + free_cpu_cached_iovas(cpu, &domain->iovad); } } @@ -5186,18 +4898,6 @@ int __init intel_iommu_init(void) } up_write(&dmar_global_lock); -#if defined(CONFIG_X86) && defined(CONFIG_SWIOTLB) - /* - * If the system has no untrusted device or the user has decided - * to disable the bounce page mechanisms, we don't need swiotlb. - * Mark this and the pre-allocated bounce pages will be released - * later. - */ - if (!has_untrusted_dev() || intel_no_bounce) - swiotlb = 0; -#endif - dma_ops = &intel_dma_ops; - init_iommu_pm_ops(); down_read(&dmar_global_lock); @@ -5283,10 +4983,11 @@ static void __dmar_remove_one_dev_info(struct device_domain_info *info) if (info->dev) { if (dev_is_pci(info->dev) && sm_supported(iommu)) intel_pasid_tear_down_entry(iommu, info->dev, - PASID_RID2PASID); + PASID_RID2PASID, false); iommu_disable_dev_iotlb(info); - domain_context_clear(iommu, info->dev); + if (!dev_is_real_dma_subdevice(info->dev)) + domain_context_clear(iommu, info->dev); intel_pasid_free_table(info->dev); } @@ -5296,12 +4997,6 @@ static void __dmar_remove_one_dev_info(struct device_domain_info *info) domain_detach_iommu(domain, iommu); spin_unlock_irqrestore(&iommu->lock, flags); - /* free the private domain */ - if (domain->flags & DOMAIN_FLAG_LOSE_CHILDREN && - !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) && - list_empty(&domain->devices)) - domain_exit(info->domain); - free_devinfo_mem(info); } @@ -5311,9 +5006,8 @@ static void dmar_remove_one_dev_info(struct device *dev) unsigned long flags; spin_lock_irqsave(&device_domain_lock, flags); - info = dev->archdata.iommu; - if (info && info != DEFER_DEVICE_DOMAIN_INFO - && info != DUMMY_DEVICE_DOMAIN_INFO) + info = get_domain_info(dev); + if (info) __dmar_remove_one_dev_info(info); spin_unlock_irqrestore(&device_domain_lock, flags); } @@ -5322,9 +5016,6 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width) { int adjust_width; - init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN); - domain_reserve_special_ranges(domain); - /* calculate AGAW */ domain->gaw = guest_width; adjust_width = guestwidth_to_adjustwidth(guest_width); @@ -5343,11 +5034,21 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width) return 0; } +static void intel_init_iova_domain(struct dmar_domain *dmar_domain) +{ + init_iova_domain(&dmar_domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN); + copy_reserved_iova(&reserved_iova_list, &dmar_domain->iovad); + + if (!intel_iommu_strict && + init_iova_flush_queue(&dmar_domain->iovad, + iommu_flush_iova, iova_entry_free)) + pr_info("iova flush queue initialization failed\n"); +} + static struct iommu_domain *intel_iommu_domain_alloc(unsigned type) { struct dmar_domain *dmar_domain; struct iommu_domain *domain; - int ret; switch (type) { case IOMMU_DOMAIN_DMA: @@ -5364,13 +5065,8 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type) return NULL; } - if (!intel_iommu_strict && type == IOMMU_DOMAIN_DMA) { - ret = init_iova_flush_queue(&dmar_domain->iovad, - iommu_flush_iova, - iova_entry_free); - if (ret) - pr_info("iova flush queue initialization failed\n"); - } + if (type == IOMMU_DOMAIN_DMA) + intel_init_iova_domain(dmar_domain); domain_update_iommu_cap(dmar_domain); @@ -5403,7 +5099,7 @@ static void intel_iommu_domain_free(struct iommu_domain *domain) static inline bool is_aux_domain(struct device *dev, struct iommu_domain *domain) { - struct device_domain_info *info = dev->archdata.iommu; + struct device_domain_info *info = get_domain_info(dev); return info && info->auxd_enabled && domain->type == IOMMU_DOMAIN_UNMANAGED; @@ -5412,7 +5108,7 @@ is_aux_domain(struct device *dev, struct iommu_domain *domain) static void auxiliary_link_device(struct dmar_domain *domain, struct device *dev) { - struct device_domain_info *info = dev->archdata.iommu; + struct device_domain_info *info = get_domain_info(dev); assert_spin_locked(&device_domain_lock); if (WARN_ON(!info)) @@ -5425,7 +5121,7 @@ static void auxiliary_link_device(struct dmar_domain *domain, static void auxiliary_unlink_device(struct dmar_domain *domain, struct device *dev) { - struct device_domain_info *info = dev->archdata.iommu; + struct device_domain_info *info = get_domain_info(dev); assert_spin_locked(&device_domain_lock); if (WARN_ON(!info)) @@ -5513,13 +5209,13 @@ static void aux_domain_remove_dev(struct dmar_domain *domain, return; spin_lock_irqsave(&device_domain_lock, flags); - info = dev->archdata.iommu; + info = get_domain_info(dev); iommu = info->iommu; auxiliary_unlink_device(domain, dev); spin_lock(&iommu->lock); - intel_pasid_tear_down_entry(iommu, dev, domain->default_pasid); + intel_pasid_tear_down_entry(iommu, dev, domain->default_pasid, false); domain_detach_iommu(domain, iommu); spin_unlock(&iommu->lock); @@ -5626,6 +5322,176 @@ static void intel_iommu_aux_detach_device(struct iommu_domain *domain, aux_domain_remove_dev(to_dmar_domain(domain), dev); } +/* + * 2D array for converting and sanitizing IOMMU generic TLB granularity to + * VT-d granularity. Invalidation is typically included in the unmap operation + * as a result of DMA or VFIO unmap. However, for assigned devices guest + * owns the first level page tables. Invalidations of translation caches in the + * guest are trapped and passed down to the host. + * + * vIOMMU in the guest will only expose first level page tables, therefore + * we do not support IOTLB granularity for request without PASID (second level). + * + * For example, to find the VT-d granularity encoding for IOTLB + * type and page selective granularity within PASID: + * X: indexed by iommu cache type + * Y: indexed by enum iommu_inv_granularity + * [IOMMU_CACHE_INV_TYPE_IOTLB][IOMMU_INV_GRANU_ADDR] + */ + +static const int +inv_type_granu_table[IOMMU_CACHE_INV_TYPE_NR][IOMMU_INV_GRANU_NR] = { + /* + * PASID based IOTLB invalidation: PASID selective (per PASID), + * page selective (address granularity) + */ + {-EINVAL, QI_GRAN_NONG_PASID, QI_GRAN_PSI_PASID}, + /* PASID based dev TLBs */ + {-EINVAL, -EINVAL, QI_DEV_IOTLB_GRAN_PASID_SEL}, + /* PASID cache */ + {-EINVAL, -EINVAL, -EINVAL} +}; + +static inline int to_vtd_granularity(int type, int granu) +{ + return inv_type_granu_table[type][granu]; +} + +static inline u64 to_vtd_size(u64 granu_size, u64 nr_granules) +{ + u64 nr_pages = (granu_size * nr_granules) >> VTD_PAGE_SHIFT; + + /* VT-d size is encoded as 2^size of 4K pages, 0 for 4k, 9 for 2MB, etc. + * IOMMU cache invalidate API passes granu_size in bytes, and number of + * granu size in contiguous memory. + */ + return order_base_2(nr_pages); +} + +#ifdef CONFIG_INTEL_IOMMU_SVM +static int +intel_iommu_sva_invalidate(struct iommu_domain *domain, struct device *dev, + struct iommu_cache_invalidate_info *inv_info) +{ + struct dmar_domain *dmar_domain = to_dmar_domain(domain); + struct device_domain_info *info; + struct intel_iommu *iommu; + unsigned long flags; + int cache_type; + u8 bus, devfn; + u16 did, sid; + int ret = 0; + u64 size = 0; + + if (!inv_info || !dmar_domain || + inv_info->version != IOMMU_CACHE_INVALIDATE_INFO_VERSION_1) + return -EINVAL; + + if (!dev || !dev_is_pci(dev)) + return -ENODEV; + + iommu = device_to_iommu(dev, &bus, &devfn); + if (!iommu) + return -ENODEV; + + if (!(dmar_domain->flags & DOMAIN_FLAG_NESTING_MODE)) + return -EINVAL; + + spin_lock_irqsave(&device_domain_lock, flags); + spin_lock(&iommu->lock); + info = get_domain_info(dev); + if (!info) { + ret = -EINVAL; + goto out_unlock; + } + did = dmar_domain->iommu_did[iommu->seq_id]; + sid = PCI_DEVID(bus, devfn); + + /* Size is only valid in address selective invalidation */ + if (inv_info->granularity != IOMMU_INV_GRANU_PASID) + size = to_vtd_size(inv_info->addr_info.granule_size, + inv_info->addr_info.nb_granules); + + for_each_set_bit(cache_type, + (unsigned long *)&inv_info->cache, + IOMMU_CACHE_INV_TYPE_NR) { + int granu = 0; + u64 pasid = 0; + + granu = to_vtd_granularity(cache_type, inv_info->granularity); + if (granu == -EINVAL) { + pr_err_ratelimited("Invalid cache type and granu combination %d/%d\n", + cache_type, inv_info->granularity); + break; + } + + /* + * PASID is stored in different locations based on the + * granularity. + */ + if (inv_info->granularity == IOMMU_INV_GRANU_PASID && + (inv_info->pasid_info.flags & IOMMU_INV_PASID_FLAGS_PASID)) + pasid = inv_info->pasid_info.pasid; + else if (inv_info->granularity == IOMMU_INV_GRANU_ADDR && + (inv_info->addr_info.flags & IOMMU_INV_ADDR_FLAGS_PASID)) + pasid = inv_info->addr_info.pasid; + + switch (BIT(cache_type)) { + case IOMMU_CACHE_INV_TYPE_IOTLB: + if (inv_info->granularity == IOMMU_INV_GRANU_ADDR && + size && + (inv_info->addr_info.addr & ((BIT(VTD_PAGE_SHIFT + size)) - 1))) { + pr_err_ratelimited("Address out of range, 0x%llx, size order %llu\n", + inv_info->addr_info.addr, size); + ret = -ERANGE; + goto out_unlock; + } + + /* + * If granu is PASID-selective, address is ignored. + * We use npages = -1 to indicate that. + */ + qi_flush_piotlb(iommu, did, pasid, + mm_to_dma_pfn(inv_info->addr_info.addr), + (granu == QI_GRAN_NONG_PASID) ? -1 : 1 << size, + inv_info->addr_info.flags & IOMMU_INV_ADDR_FLAGS_LEAF); + + /* + * Always flush device IOTLB if ATS is enabled. vIOMMU + * in the guest may assume IOTLB flush is inclusive, + * which is more efficient. + */ + if (info->ats_enabled) + qi_flush_dev_iotlb_pasid(iommu, sid, + info->pfsid, pasid, + info->ats_qdep, + inv_info->addr_info.addr, + size, granu); + break; + case IOMMU_CACHE_INV_TYPE_DEV_IOTLB: + if (info->ats_enabled) + qi_flush_dev_iotlb_pasid(iommu, sid, + info->pfsid, pasid, + info->ats_qdep, + inv_info->addr_info.addr, + size, granu); + else + pr_warn_ratelimited("Passdown device IOTLB flush w/o ATS!\n"); + break; + default: + dev_err_ratelimited(dev, "Unsupported IOMMU invalidation type %d\n", + cache_type); + ret = -EINVAL; + } + } +out_unlock: + spin_unlock(&iommu->lock); + spin_unlock_irqrestore(&device_domain_lock, flags); + + return ret; +} +#endif + static int intel_iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t hpa, size_t size, int iommu_prot, gfp_t gfp) @@ -5781,78 +5647,22 @@ static bool intel_iommu_capable(enum iommu_cap cap) return false; } -static int intel_iommu_add_device(struct device *dev) +static struct iommu_device *intel_iommu_probe_device(struct device *dev) { - struct dmar_domain *dmar_domain; - struct iommu_domain *domain; struct intel_iommu *iommu; - struct iommu_group *group; u8 bus, devfn; - int ret; iommu = device_to_iommu(dev, &bus, &devfn); if (!iommu) - return -ENODEV; - - iommu_device_link(&iommu->iommu, dev); + return ERR_PTR(-ENODEV); if (translation_pre_enabled(iommu)) dev->archdata.iommu = DEFER_DEVICE_DOMAIN_INFO; - group = iommu_group_get_for_dev(dev); - - if (IS_ERR(group)) { - ret = PTR_ERR(group); - goto unlink; - } - - iommu_group_put(group); - - domain = iommu_get_domain_for_dev(dev); - dmar_domain = to_dmar_domain(domain); - if (domain->type == IOMMU_DOMAIN_DMA) { - if (device_def_domain_type(dev) == IOMMU_DOMAIN_IDENTITY) { - ret = iommu_request_dm_for_dev(dev); - if (ret) { - dmar_remove_one_dev_info(dev); - dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN; - domain_add_dev_info(si_domain, dev); - dev_info(dev, - "Device uses a private identity domain.\n"); - } - } - } else { - if (device_def_domain_type(dev) == IOMMU_DOMAIN_DMA) { - ret = iommu_request_dma_domain_for_dev(dev); - if (ret) { - dmar_remove_one_dev_info(dev); - dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN; - if (!get_private_domain_for_dev(dev)) { - dev_warn(dev, - "Failed to get a private domain.\n"); - ret = -ENOMEM; - goto unlink; - } - - dev_info(dev, - "Device uses a private dma domain.\n"); - } - } - } - - if (device_needs_bounce(dev)) { - dev_info(dev, "Use Intel IOMMU bounce page dma_ops\n"); - set_dma_ops(dev, &bounce_dma_ops); - } - - return 0; - -unlink: - iommu_device_unlink(&iommu->iommu, dev); - return ret; + return &iommu->iommu; } -static void intel_iommu_remove_device(struct device *dev) +static void intel_iommu_release_device(struct device *dev) { struct intel_iommu *iommu; u8 bus, devfn; @@ -5863,11 +5673,19 @@ static void intel_iommu_remove_device(struct device *dev) dmar_remove_one_dev_info(dev); - iommu_group_remove_device(dev); + set_dma_ops(dev, NULL); +} - iommu_device_unlink(&iommu->iommu, dev); +static void intel_iommu_probe_finalize(struct device *dev) +{ + struct iommu_domain *domain; + domain = iommu_get_domain_for_dev(dev); if (device_needs_bounce(dev)) + set_dma_ops(dev, &bounce_dma_ops); + else if (domain && domain->type == IOMMU_DOMAIN_DMA) + set_dma_ops(dev, &intel_dma_ops); + else set_dma_ops(dev, NULL); } @@ -5945,7 +5763,7 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev) spin_lock(&iommu->lock); ret = -EINVAL; - info = dev->archdata.iommu; + info = get_domain_info(dev); if (!info || !info->pasid_supported) goto out; @@ -6041,7 +5859,7 @@ static int intel_iommu_enable_auxd(struct device *dev) return -ENODEV; spin_lock_irqsave(&device_domain_lock, flags); - info = dev->archdata.iommu; + info = get_domain_info(dev); info->auxd_enabled = 1; spin_unlock_irqrestore(&device_domain_lock, flags); @@ -6054,7 +5872,7 @@ static int intel_iommu_disable_auxd(struct device *dev) unsigned long flags; spin_lock_irqsave(&device_domain_lock, flags); - info = dev->archdata.iommu; + info = get_domain_info(dev); if (!WARN_ON(!info)) info->auxd_enabled = 0; spin_unlock_irqrestore(&device_domain_lock, flags); @@ -6107,6 +5925,14 @@ intel_iommu_dev_has_feat(struct device *dev, enum iommu_dev_features feat) return !!siov_find_pci_dvsec(to_pci_dev(dev)); } + if (feat == IOMMU_DEV_FEAT_SVA) { + struct device_domain_info *info = get_domain_info(dev); + + return info && (info->iommu->flags & VTD_FLAG_SVM_CAPABLE) && + info->pasid_supported && info->pri_supported && + info->ats_supported; + } + return false; } @@ -6116,6 +5942,16 @@ intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat) if (feat == IOMMU_DEV_FEAT_AUX) return intel_iommu_enable_auxd(dev); + if (feat == IOMMU_DEV_FEAT_SVA) { + struct device_domain_info *info = get_domain_info(dev); + + if (!info) + return -EINVAL; + + if (info->iommu->flags & VTD_FLAG_SVM_CAPABLE) + return 0; + } + return -ENODEV; } @@ -6131,7 +5967,7 @@ intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat) static bool intel_iommu_dev_feat_enabled(struct device *dev, enum iommu_dev_features feat) { - struct device_domain_info *info = dev->archdata.iommu; + struct device_domain_info *info = get_domain_info(dev); if (feat == IOMMU_DEV_FEAT_AUX) return scalable_mode_support() && info && info->auxd_enabled; @@ -6198,8 +6034,9 @@ const struct iommu_ops intel_iommu_ops = { .map = intel_iommu_map, .unmap = intel_iommu_unmap, .iova_to_phys = intel_iommu_iova_to_phys, - .add_device = intel_iommu_add_device, - .remove_device = intel_iommu_remove_device, + .probe_device = intel_iommu_probe_device, + .probe_finalize = intel_iommu_probe_finalize, + .release_device = intel_iommu_release_device, .get_resv_regions = intel_iommu_get_resv_regions, .put_resv_regions = generic_iommu_put_resv_regions, .apply_resv_region = intel_iommu_apply_resv_region, @@ -6209,7 +6046,16 @@ const struct iommu_ops intel_iommu_ops = { .dev_enable_feat = intel_iommu_dev_enable_feat, .dev_disable_feat = intel_iommu_dev_disable_feat, .is_attach_deferred = intel_iommu_is_attach_deferred, + .def_domain_type = device_def_domain_type, .pgsize_bitmap = INTEL_IOMMU_PGSIZES, +#ifdef CONFIG_INTEL_IOMMU_SVM + .cache_invalidate = intel_iommu_sva_invalidate, + .sva_bind_gpasid = intel_svm_bind_gpasid, + .sva_unbind_gpasid = intel_svm_unbind_gpasid, + .sva_bind = intel_svm_bind, + .sva_unbind = intel_svm_unbind, + .sva_get_pasid = intel_svm_get_pasid, +#endif }; static void quirk_iommu_igfx(struct pci_dev *dev) diff --git a/drivers/iommu/intel-pasid.c b/drivers/iommu/intel-pasid.c index 22b30f10b396..c81f0f17c6ba 100644 --- a/drivers/iommu/intel-pasid.c +++ b/drivers/iommu/intel-pasid.c @@ -27,6 +27,63 @@ static DEFINE_SPINLOCK(pasid_lock); u32 intel_pasid_max_id = PASID_MAX; +int vcmd_alloc_pasid(struct intel_iommu *iommu, unsigned int *pasid) +{ + unsigned long flags; + u8 status_code; + int ret = 0; + u64 res; + + raw_spin_lock_irqsave(&iommu->register_lock, flags); + dmar_writeq(iommu->reg + DMAR_VCMD_REG, VCMD_CMD_ALLOC); + IOMMU_WAIT_OP(iommu, DMAR_VCRSP_REG, dmar_readq, + !(res & VCMD_VRSP_IP), res); + raw_spin_unlock_irqrestore(&iommu->register_lock, flags); + + status_code = VCMD_VRSP_SC(res); + switch (status_code) { + case VCMD_VRSP_SC_SUCCESS: + *pasid = VCMD_VRSP_RESULT_PASID(res); + break; + case VCMD_VRSP_SC_NO_PASID_AVAIL: + pr_info("IOMMU: %s: No PASID available\n", iommu->name); + ret = -ENOSPC; + break; + default: + ret = -ENODEV; + pr_warn("IOMMU: %s: Unexpected error code %d\n", + iommu->name, status_code); + } + + return ret; +} + +void vcmd_free_pasid(struct intel_iommu *iommu, unsigned int pasid) +{ + unsigned long flags; + u8 status_code; + u64 res; + + raw_spin_lock_irqsave(&iommu->register_lock, flags); + dmar_writeq(iommu->reg + DMAR_VCMD_REG, + VCMD_CMD_OPERAND(pasid) | VCMD_CMD_FREE); + IOMMU_WAIT_OP(iommu, DMAR_VCRSP_REG, dmar_readq, + !(res & VCMD_VRSP_IP), res); + raw_spin_unlock_irqrestore(&iommu->register_lock, flags); + + status_code = VCMD_VRSP_SC(res); + switch (status_code) { + case VCMD_VRSP_SC_SUCCESS: + break; + case VCMD_VRSP_SC_INVALID_PASID: + pr_info("IOMMU: %s: Invalid PASID\n", iommu->name); + break; + default: + pr_warn("IOMMU: %s: Unexpected error code %d\n", + iommu->name, status_code); + } +} + /* * Per device pasid table management: */ @@ -94,7 +151,7 @@ int intel_pasid_alloc_table(struct device *dev) int size; might_sleep(); - info = dev->archdata.iommu; + info = get_domain_info(dev); if (WARN_ON(!info || !dev_is_pci(dev) || info->pasid_table)) return -EINVAL; @@ -141,7 +198,7 @@ void intel_pasid_free_table(struct device *dev) struct pasid_entry *table; int i, max_pde; - info = dev->archdata.iommu; + info = get_domain_info(dev); if (!info || !dev_is_pci(dev) || !info->pasid_table) return; @@ -167,7 +224,7 @@ struct pasid_table *intel_pasid_get_table(struct device *dev) { struct device_domain_info *info; - info = dev->archdata.iommu; + info = get_domain_info(dev); if (!info) return NULL; @@ -178,7 +235,7 @@ int intel_pasid_get_dev_max_id(struct device *dev) { struct device_domain_info *info; - info = dev->archdata.iommu; + info = get_domain_info(dev); if (!info || !info->pasid_table) return 0; @@ -199,7 +256,7 @@ struct pasid_entry *intel_pasid_get_entry(struct device *dev, int pasid) return NULL; dir = pasid_table->table; - info = dev->archdata.iommu; + info = get_domain_info(dev); dir_index = pasid >> PASID_PDE_SHIFT; index = pasid & PASID_PTE_MASK; @@ -235,7 +292,20 @@ static inline void pasid_clear_entry(struct pasid_entry *pe) WRITE_ONCE(pe->val[7], 0); } -static void intel_pasid_clear_entry(struct device *dev, int pasid) +static inline void pasid_clear_entry_with_fpd(struct pasid_entry *pe) +{ + WRITE_ONCE(pe->val[0], PASID_PTE_FPD); + WRITE_ONCE(pe->val[1], 0); + WRITE_ONCE(pe->val[2], 0); + WRITE_ONCE(pe->val[3], 0); + WRITE_ONCE(pe->val[4], 0); + WRITE_ONCE(pe->val[5], 0); + WRITE_ONCE(pe->val[6], 0); + WRITE_ONCE(pe->val[7], 0); +} + +static void +intel_pasid_clear_entry(struct device *dev, int pasid, bool fault_ignore) { struct pasid_entry *pe; @@ -243,7 +313,10 @@ static void intel_pasid_clear_entry(struct device *dev, int pasid) if (WARN_ON(!pe)) return; - pasid_clear_entry(pe); + if (fault_ignore && pasid_pte_is_present(pe)) + pasid_clear_entry_with_fpd(pe); + else + pasid_clear_entry(pe); } static inline void pasid_set_bits(u64 *ptr, u64 mask, u64 bits) @@ -359,18 +432,29 @@ pasid_set_flpm(struct pasid_entry *pe, u64 value) pasid_set_bits(&pe->val[2], GENMASK_ULL(3, 2), value << 2); } +/* + * Setup the Extended Access Flag Enable (EAFE) field (Bit 135) + * of a scalable mode PASID entry. + */ +static inline void +pasid_set_eafe(struct pasid_entry *pe) +{ + pasid_set_bits(&pe->val[2], 1 << 7, 1 << 7); +} + static void pasid_cache_invalidation_with_pasid(struct intel_iommu *iommu, u16 did, int pasid) { struct qi_desc desc; - desc.qw0 = QI_PC_DID(did) | QI_PC_PASID_SEL | QI_PC_PASID(pasid); + desc.qw0 = QI_PC_DID(did) | QI_PC_GRAN(QI_PC_PASID_SEL) | + QI_PC_PASID(pasid) | QI_PC_TYPE; desc.qw1 = 0; desc.qw2 = 0; desc.qw3 = 0; - qi_submit_sync(&desc, iommu); + qi_submit_sync(iommu, &desc, 1, 0); } static void @@ -384,7 +468,7 @@ iotlb_invalidation_with_pasid(struct intel_iommu *iommu, u16 did, u32 pasid) desc.qw2 = 0; desc.qw3 = 0; - qi_submit_sync(&desc, iommu); + qi_submit_sync(iommu, &desc, 1, 0); } static void @@ -394,7 +478,7 @@ devtlb_invalidation_with_pasid(struct intel_iommu *iommu, struct device_domain_info *info; u16 sid, qdep, pfsid; - info = dev->archdata.iommu; + info = get_domain_info(dev); if (!info || !info->ats_enabled) return; @@ -405,8 +489,8 @@ devtlb_invalidation_with_pasid(struct intel_iommu *iommu, qi_flush_dev_iotlb(iommu, sid, pfsid, qdep, 0, 64 - VTD_PAGE_SHIFT); } -void intel_pasid_tear_down_entry(struct intel_iommu *iommu, - struct device *dev, int pasid) +void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev, + int pasid, bool fault_ignore) { struct pasid_entry *pte; u16 did; @@ -416,7 +500,7 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu, return; did = pasid_get_domain_id(pte); - intel_pasid_clear_entry(dev, pasid); + intel_pasid_clear_entry(dev, pasid, fault_ignore); if (!ecap_coherent(iommu->ecap)) clflush_cache_range(pte, sizeof(*pte)); @@ -492,7 +576,7 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu, pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); /* Setup Present and PASID Granular Transfer Type: */ - pasid_set_translation_type(pte, 1); + pasid_set_translation_type(pte, PASID_ENTRY_PGTT_FL_ONLY); pasid_set_present(pte); pasid_flush_caches(iommu, pte, pasid, did); @@ -500,6 +584,25 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu, } /* + * Skip top levels of page tables for iommu which has less agaw + * than default. Unnecessary for PT mode. + */ +static inline int iommu_skip_agaw(struct dmar_domain *domain, + struct intel_iommu *iommu, + struct dma_pte **pgd) +{ + int agaw; + + for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) { + *pgd = phys_to_virt(dma_pte_addr(*pgd)); + if (!dma_pte_present(*pgd)) + return -EINVAL; + } + + return agaw; +} + +/* * Set up the scalable mode pasid entry for second only translation type. */ int intel_pasid_setup_second_level(struct intel_iommu *iommu, @@ -522,17 +625,11 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu, return -EINVAL; } - /* - * Skip top levels of page tables for iommu which has less agaw - * than default. Unnecessary for PT mode. - */ pgd = domain->pgd; - for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) { - pgd = phys_to_virt(dma_pte_addr(pgd)); - if (!dma_pte_present(pgd)) { - dev_err(dev, "Invalid domain page table\n"); - return -EINVAL; - } + agaw = iommu_skip_agaw(domain, iommu, &pgd); + if (agaw < 0) { + dev_err(dev, "Invalid domain page table\n"); + return -EINVAL; } pgd_val = virt_to_phys(pgd); @@ -548,7 +645,7 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu, pasid_set_domain_id(pte, did); pasid_set_slptr(pte, pgd_val); pasid_set_address_width(pte, agaw); - pasid_set_translation_type(pte, 2); + pasid_set_translation_type(pte, PASID_ENTRY_PGTT_SL_ONLY); pasid_set_fault_enable(pte); pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); @@ -582,7 +679,7 @@ int intel_pasid_setup_pass_through(struct intel_iommu *iommu, pasid_clear_entry(pte); pasid_set_domain_id(pte, did); pasid_set_address_width(pte, iommu->agaw); - pasid_set_translation_type(pte, 4); + pasid_set_translation_type(pte, PASID_ENTRY_PGTT_PT); pasid_set_fault_enable(pte); pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); @@ -596,3 +693,161 @@ int intel_pasid_setup_pass_through(struct intel_iommu *iommu, return 0; } + +static int +intel_pasid_setup_bind_data(struct intel_iommu *iommu, struct pasid_entry *pte, + struct iommu_gpasid_bind_data_vtd *pasid_data) +{ + /* + * Not all guest PASID table entry fields are passed down during bind, + * here we only set up the ones that are dependent on guest settings. + * Execution related bits such as NXE, SMEP are not supported. + * Other fields, such as snoop related, are set based on host needs + * regardless of guest settings. + */ + if (pasid_data->flags & IOMMU_SVA_VTD_GPASID_SRE) { + if (!ecap_srs(iommu->ecap)) { + pr_err_ratelimited("No supervisor request support on %s\n", + iommu->name); + return -EINVAL; + } + pasid_set_sre(pte); + } + + if (pasid_data->flags & IOMMU_SVA_VTD_GPASID_EAFE) { + if (!ecap_eafs(iommu->ecap)) { + pr_err_ratelimited("No extended access flag support on %s\n", + iommu->name); + return -EINVAL; + } + pasid_set_eafe(pte); + } + + /* + * Memory type is only applicable to devices inside processor coherent + * domain. Will add MTS support once coherent devices are available. + */ + if (pasid_data->flags & IOMMU_SVA_VTD_GPASID_MTS_MASK) { + pr_warn_ratelimited("No memory type support %s\n", + iommu->name); + return -EINVAL; + } + + return 0; +} + +/** + * intel_pasid_setup_nested() - Set up PASID entry for nested translation. + * This could be used for guest shared virtual address. In this case, the + * first level page tables are used for GVA-GPA translation in the guest, + * second level page tables are used for GPA-HPA translation. + * + * @iommu: IOMMU which the device belong to + * @dev: Device to be set up for translation + * @gpgd: FLPTPTR: First Level Page translation pointer in GPA + * @pasid: PASID to be programmed in the device PASID table + * @pasid_data: Additional PASID info from the guest bind request + * @domain: Domain info for setting up second level page tables + * @addr_width: Address width of the first level (guest) + */ +int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev, + pgd_t *gpgd, int pasid, + struct iommu_gpasid_bind_data_vtd *pasid_data, + struct dmar_domain *domain, int addr_width) +{ + struct pasid_entry *pte; + struct dma_pte *pgd; + int ret = 0; + u64 pgd_val; + int agaw; + u16 did; + + if (!ecap_nest(iommu->ecap)) { + pr_err_ratelimited("IOMMU: %s: No nested translation support\n", + iommu->name); + return -EINVAL; + } + + if (!(domain->flags & DOMAIN_FLAG_NESTING_MODE)) { + pr_err_ratelimited("Domain is not in nesting mode, %x\n", + domain->flags); + return -EINVAL; + } + + pte = intel_pasid_get_entry(dev, pasid); + if (WARN_ON(!pte)) + return -EINVAL; + + /* + * Caller must ensure PASID entry is not in use, i.e. not bind the + * same PASID to the same device twice. + */ + if (pasid_pte_is_present(pte)) + return -EBUSY; + + pasid_clear_entry(pte); + + /* Sanity checking performed by caller to make sure address + * width matching in two dimensions: + * 1. CPU vs. IOMMU + * 2. Guest vs. Host. + */ + switch (addr_width) { +#ifdef CONFIG_X86 + case ADDR_WIDTH_5LEVEL: + if (!cpu_feature_enabled(X86_FEATURE_LA57) || + !cap_5lp_support(iommu->cap)) { + dev_err_ratelimited(dev, + "5-level paging not supported\n"); + return -EINVAL; + } + + pasid_set_flpm(pte, 1); + break; +#endif + case ADDR_WIDTH_4LEVEL: + pasid_set_flpm(pte, 0); + break; + default: + dev_err_ratelimited(dev, "Invalid guest address width %d\n", + addr_width); + return -EINVAL; + } + + /* First level PGD is in GPA, must be supported by the second level */ + if ((uintptr_t)gpgd > domain->max_addr) { + dev_err_ratelimited(dev, + "Guest PGD %lx not supported, max %llx\n", + (uintptr_t)gpgd, domain->max_addr); + return -EINVAL; + } + pasid_set_flptr(pte, (uintptr_t)gpgd); + + ret = intel_pasid_setup_bind_data(iommu, pte, pasid_data); + if (ret) + return ret; + + /* Setup the second level based on the given domain */ + pgd = domain->pgd; + + agaw = iommu_skip_agaw(domain, iommu, &pgd); + if (agaw < 0) { + dev_err_ratelimited(dev, "Invalid domain page table\n"); + return -EINVAL; + } + pgd_val = virt_to_phys(pgd); + pasid_set_slptr(pte, pgd_val); + pasid_set_fault_enable(pte); + + did = domain->iommu_did[iommu->seq_id]; + pasid_set_domain_id(pte, did); + + pasid_set_address_width(pte, agaw); + pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); + + pasid_set_translation_type(pte, PASID_ENTRY_PGTT_NESTED); + pasid_set_present(pte); + pasid_flush_caches(iommu, pte, pasid, did); + + return ret; +} diff --git a/drivers/iommu/intel-pasid.h b/drivers/iommu/intel-pasid.h index 92de6df24ccb..c5318d40e0fa 100644 --- a/drivers/iommu/intel-pasid.h +++ b/drivers/iommu/intel-pasid.h @@ -15,6 +15,7 @@ #define PASID_MAX 0x100000 #define PASID_PTE_MASK 0x3F #define PASID_PTE_PRESENT 1 +#define PASID_PTE_FPD 2 #define PDE_PFN_MASK PAGE_MASK #define PASID_PDE_SHIFT 6 #define MAX_NR_PASID_BITS 20 @@ -23,6 +24,16 @@ #define is_pasid_enabled(entry) (((entry)->lo >> 3) & 0x1) #define get_pasid_dir_size(entry) (1 << ((((entry)->lo >> 9) & 0x7) + 7)) +/* Virtual command interface for enlightened pasid management. */ +#define VCMD_CMD_ALLOC 0x1 +#define VCMD_CMD_FREE 0x2 +#define VCMD_VRSP_IP 0x1 +#define VCMD_VRSP_SC(e) (((e) >> 1) & 0x3) +#define VCMD_VRSP_SC_SUCCESS 0 +#define VCMD_VRSP_SC_NO_PASID_AVAIL 1 +#define VCMD_VRSP_SC_INVALID_PASID 1 +#define VCMD_VRSP_RESULT_PASID(e) (((e) >> 8) & 0xfffff) +#define VCMD_CMD_OPERAND(e) ((e) << 8) /* * Domain ID reserved for pasid entries programmed for first-level * only and pass-through transfer modes. @@ -36,6 +47,7 @@ * to vmalloc or even module mappings. */ #define PASID_FLAG_SUPERVISOR_MODE BIT(0) +#define PASID_FLAG_NESTED BIT(1) /* * The PASID_FLAG_FL5LP flag Indicates using 5-level paging for first- @@ -51,6 +63,11 @@ struct pasid_entry { u64 val[8]; }; +#define PASID_ENTRY_PGTT_FL_ONLY (1) +#define PASID_ENTRY_PGTT_SL_ONLY (2) +#define PASID_ENTRY_PGTT_NESTED (3) +#define PASID_ENTRY_PGTT_PT (4) + /* The representative of a PASID table */ struct pasid_table { void *table; /* pasid table pointer */ @@ -99,7 +116,13 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu, int intel_pasid_setup_pass_through(struct intel_iommu *iommu, struct dmar_domain *domain, struct device *dev, int pasid); +int intel_pasid_setup_nested(struct intel_iommu *iommu, + struct device *dev, pgd_t *pgd, int pasid, + struct iommu_gpasid_bind_data_vtd *pasid_data, + struct dmar_domain *domain, int addr_width); void intel_pasid_tear_down_entry(struct intel_iommu *iommu, - struct device *dev, int pasid); - + struct device *dev, int pasid, + bool fault_ignore); +int vcmd_alloc_pasid(struct intel_iommu *iommu, unsigned int *pasid); +void vcmd_free_pasid(struct intel_iommu *iommu, unsigned int pasid); #endif /* __INTEL_PASID_H */ diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index 2998418f0a38..a035ef911fba 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -23,6 +23,7 @@ #include "intel-pasid.h" static irqreturn_t prq_event_thread(int irq, void *d); +static void intel_svm_drain_prq(struct device *dev, int pasid); #define PRQ_ORDER 0 @@ -66,6 +67,8 @@ int intel_svm_enable_prq(struct intel_iommu *iommu) dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL); dmar_writeq(iommu->reg + DMAR_PQA_REG, virt_to_phys(iommu->prq) | PRQ_ORDER); + init_completion(&iommu->prq_complete); + return 0; } @@ -138,7 +141,7 @@ static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_d } desc.qw2 = 0; desc.qw3 = 0; - qi_submit_sync(&desc, svm->iommu); + qi_submit_sync(svm->iommu, &desc, 1, 0); if (sdev->dev_iotlb) { desc.qw0 = QI_DEV_EIOTLB_PASID(svm->pasid) | @@ -162,7 +165,7 @@ static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_d } desc.qw2 = 0; desc.qw3 = 0; - qi_submit_sync(&desc, svm->iommu); + qi_submit_sync(svm->iommu, &desc, 1, 0); } } @@ -206,10 +209,9 @@ static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) * *has* to handle gracefully without affecting other processes. */ rcu_read_lock(); - list_for_each_entry_rcu(sdev, &svm->devs, list) { - intel_pasid_tear_down_entry(svm->iommu, sdev->dev, svm->pasid); - intel_flush_svm_range_dev(svm, sdev, 0, -1, 0); - } + list_for_each_entry_rcu(sdev, &svm->devs, list) + intel_pasid_tear_down_entry(svm->iommu, sdev->dev, + svm->pasid, true); rcu_read_unlock(); } @@ -226,13 +228,212 @@ static LIST_HEAD(global_svm_list); list_for_each_entry((sdev), &(svm)->devs, list) \ if ((d) != (sdev)->dev) {} else -int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ops *ops) +int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev, + struct iommu_gpasid_bind_data *data) +{ + struct intel_iommu *iommu = intel_svm_device_to_iommu(dev); + struct dmar_domain *dmar_domain; + struct intel_svm_dev *sdev; + struct intel_svm *svm; + int ret = 0; + + if (WARN_ON(!iommu) || !data) + return -EINVAL; + + if (data->version != IOMMU_GPASID_BIND_VERSION_1 || + data->format != IOMMU_PASID_FORMAT_INTEL_VTD) + return -EINVAL; + + if (!dev_is_pci(dev)) + return -ENOTSUPP; + + /* VT-d supports devices with full 20 bit PASIDs only */ + if (pci_max_pasids(to_pci_dev(dev)) != PASID_MAX) + return -EINVAL; + + /* + * We only check host PASID range, we have no knowledge to check + * guest PASID range. + */ + if (data->hpasid <= 0 || data->hpasid >= PASID_MAX) + return -EINVAL; + + dmar_domain = to_dmar_domain(domain); + + mutex_lock(&pasid_mutex); + svm = ioasid_find(NULL, data->hpasid, NULL); + if (IS_ERR(svm)) { + ret = PTR_ERR(svm); + goto out; + } + + if (svm) { + /* + * If we found svm for the PASID, there must be at + * least one device bond, otherwise svm should be freed. + */ + if (WARN_ON(list_empty(&svm->devs))) { + ret = -EINVAL; + goto out; + } + + for_each_svm_dev(sdev, svm, dev) { + /* + * For devices with aux domains, we should allow + * multiple bind calls with the same PASID and pdev. + */ + if (iommu_dev_feature_enabled(dev, + IOMMU_DEV_FEAT_AUX)) { + sdev->users++; + } else { + dev_warn_ratelimited(dev, + "Already bound with PASID %u\n", + svm->pasid); + ret = -EBUSY; + } + goto out; + } + } else { + /* We come here when PASID has never been bond to a device. */ + svm = kzalloc(sizeof(*svm), GFP_KERNEL); + if (!svm) { + ret = -ENOMEM; + goto out; + } + /* REVISIT: upper layer/VFIO can track host process that bind + * the PASID. ioasid_set = mm might be sufficient for vfio to + * check pasid VMM ownership. We can drop the following line + * once VFIO and IOASID set check is in place. + */ + svm->mm = get_task_mm(current); + svm->pasid = data->hpasid; + if (data->flags & IOMMU_SVA_GPASID_VAL) { + svm->gpasid = data->gpasid; + svm->flags |= SVM_FLAG_GUEST_PASID; + } + ioasid_set_data(data->hpasid, svm); + INIT_LIST_HEAD_RCU(&svm->devs); + mmput(svm->mm); + } + sdev = kzalloc(sizeof(*sdev), GFP_KERNEL); + if (!sdev) { + ret = -ENOMEM; + goto out; + } + sdev->dev = dev; + + /* Only count users if device has aux domains */ + if (iommu_dev_feature_enabled(dev, IOMMU_DEV_FEAT_AUX)) + sdev->users = 1; + + /* Set up device context entry for PASID if not enabled already */ + ret = intel_iommu_enable_pasid(iommu, sdev->dev); + if (ret) { + dev_err_ratelimited(dev, "Failed to enable PASID capability\n"); + kfree(sdev); + goto out; + } + + /* + * PASID table is per device for better security. Therefore, for + * each bind of a new device even with an existing PASID, we need to + * call the nested mode setup function here. + */ + spin_lock(&iommu->lock); + ret = intel_pasid_setup_nested(iommu, dev, + (pgd_t *)(uintptr_t)data->gpgd, + data->hpasid, &data->vtd, dmar_domain, + data->addr_width); + spin_unlock(&iommu->lock); + if (ret) { + dev_err_ratelimited(dev, "Failed to set up PASID %llu in nested mode, Err %d\n", + data->hpasid, ret); + /* + * PASID entry should be in cleared state if nested mode + * set up failed. So we only need to clear IOASID tracking + * data such that free call will succeed. + */ + kfree(sdev); + goto out; + } + + svm->flags |= SVM_FLAG_GUEST_MODE; + + init_rcu_head(&sdev->rcu); + list_add_rcu(&sdev->list, &svm->devs); + out: + if (!IS_ERR_OR_NULL(svm) && list_empty(&svm->devs)) { + ioasid_set_data(data->hpasid, NULL); + kfree(svm); + } + + mutex_unlock(&pasid_mutex); + return ret; +} + +int intel_svm_unbind_gpasid(struct device *dev, int pasid) +{ + struct intel_iommu *iommu = intel_svm_device_to_iommu(dev); + struct intel_svm_dev *sdev; + struct intel_svm *svm; + int ret = -EINVAL; + + if (WARN_ON(!iommu)) + return -EINVAL; + + mutex_lock(&pasid_mutex); + svm = ioasid_find(NULL, pasid, NULL); + if (!svm) { + ret = -EINVAL; + goto out; + } + + if (IS_ERR(svm)) { + ret = PTR_ERR(svm); + goto out; + } + + for_each_svm_dev(sdev, svm, dev) { + ret = 0; + if (iommu_dev_feature_enabled(dev, IOMMU_DEV_FEAT_AUX)) + sdev->users--; + if (!sdev->users) { + list_del_rcu(&sdev->list); + intel_pasid_tear_down_entry(iommu, dev, + svm->pasid, false); + intel_svm_drain_prq(dev, svm->pasid); + kfree_rcu(sdev, rcu); + + if (list_empty(&svm->devs)) { + /* + * We do not free the IOASID here in that + * IOMMU driver did not allocate it. + * Unlike native SVM, IOASID for guest use was + * allocated prior to the bind call. + * In any case, if the free call comes before + * the unbind, IOMMU driver will get notified + * and perform cleanup. + */ + ioasid_set_data(pasid, NULL); + kfree(svm); + } + } + break; + } +out: + mutex_unlock(&pasid_mutex); + return ret; +} + +/* Caller must hold pasid_mutex, mm reference */ +static int +intel_svm_bind_mm(struct device *dev, int flags, struct svm_dev_ops *ops, + struct mm_struct *mm, struct intel_svm_dev **sd) { struct intel_iommu *iommu = intel_svm_device_to_iommu(dev); struct device_domain_info *info; struct intel_svm_dev *sdev; struct intel_svm *svm = NULL; - struct mm_struct *mm = NULL; int pasid_max; int ret; @@ -249,16 +450,15 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ } else pasid_max = 1 << 20; + /* Bind supervisor PASID shuld have mm = NULL */ if (flags & SVM_FLAG_SUPERVISOR_MODE) { - if (!ecap_srs(iommu->ecap)) + if (!ecap_srs(iommu->ecap) || mm) { + pr_err("Supervisor PASID with user provided mm.\n"); return -EINVAL; - } else if (pasid) { - mm = get_task_mm(current); - BUG_ON(!mm); + } } - mutex_lock(&pasid_mutex); - if (pasid && !(flags & SVM_FLAG_PRIVATE_PASID)) { + if (!(flags & SVM_FLAG_PRIVATE_PASID)) { struct intel_svm *t; list_for_each_entry(t, &global_svm_list, list) { @@ -296,19 +496,12 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ sdev->dev = dev; ret = intel_iommu_enable_pasid(iommu, dev); - if (ret || !pasid) { - /* If they don't actually want to assign a PASID, this is - * just an enabling check/preparation. */ - kfree(sdev); - goto out; - } - - info = dev->archdata.iommu; - if (!info || !info->pasid_supported) { + if (ret) { kfree(sdev); goto out; } + info = get_domain_info(dev); sdev->did = FLPT_DEFAULT_DID; sdev->sid = PCI_DEVID(info->bus, info->devfn); if (info->ats_enabled) { @@ -397,26 +590,24 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ } } list_add_rcu(&sdev->list, &svm->devs); - - success: - *pasid = svm->pasid; +success: + sdev->pasid = svm->pasid; + sdev->sva.dev = dev; + if (sd) + *sd = sdev; ret = 0; out: - mutex_unlock(&pasid_mutex); - if (mm) - mmput(mm); return ret; } -EXPORT_SYMBOL_GPL(intel_svm_bind_mm); -int intel_svm_unbind_mm(struct device *dev, int pasid) +/* Caller must hold pasid_mutex */ +static int intel_svm_unbind_mm(struct device *dev, int pasid) { struct intel_svm_dev *sdev; struct intel_iommu *iommu; struct intel_svm *svm; int ret = -EINVAL; - mutex_lock(&pasid_mutex); iommu = intel_svm_device_to_iommu(dev); if (!iommu) goto out; @@ -442,8 +633,9 @@ int intel_svm_unbind_mm(struct device *dev, int pasid) * to use. We have a *shared* PASID table, because it's * large and has to be physically contiguous. So it's * hard to be as defensive as we might like. */ - intel_pasid_tear_down_entry(iommu, dev, svm->pasid); - intel_flush_svm_range_dev(svm, sdev, 0, -1, 0); + intel_pasid_tear_down_entry(iommu, dev, + svm->pasid, false); + intel_svm_drain_prq(dev, svm->pasid); kfree_rcu(sdev, rcu); if (list_empty(&svm->devs)) { @@ -462,45 +654,9 @@ int intel_svm_unbind_mm(struct device *dev, int pasid) break; } out: - mutex_unlock(&pasid_mutex); return ret; } -EXPORT_SYMBOL_GPL(intel_svm_unbind_mm); - -int intel_svm_is_pasid_valid(struct device *dev, int pasid) -{ - struct intel_iommu *iommu; - struct intel_svm *svm; - int ret = -EINVAL; - - mutex_lock(&pasid_mutex); - iommu = intel_svm_device_to_iommu(dev); - if (!iommu) - goto out; - - svm = ioasid_find(NULL, pasid, NULL); - if (!svm) - goto out; - - if (IS_ERR(svm)) { - ret = PTR_ERR(svm); - goto out; - } - /* init_mm is used in this case */ - if (!svm->mm) - ret = 1; - else if (atomic_read(&svm->mm->mm_users) > 0) - ret = 1; - else - ret = 0; - - out: - mutex_unlock(&pasid_mutex); - - return ret; -} -EXPORT_SYMBOL_GPL(intel_svm_is_pasid_valid); /* Page request queue descriptor */ struct page_req_dsc { @@ -557,6 +713,93 @@ static bool is_canonical_address(u64 addr) return (((saddr << shift) >> shift) == saddr); } +/** + * intel_svm_drain_prq - Drain page requests and responses for a pasid + * @dev: target device + * @pasid: pasid for draining + * + * Drain all pending page requests and responses related to @pasid in both + * software and hardware. This is supposed to be called after the device + * driver has stopped DMA, the pasid entry has been cleared, and both IOTLB + * and DevTLB have been invalidated. + * + * It waits until all pending page requests for @pasid in the page fault + * queue are completed by the prq handling thread. Then follow the steps + * described in VT-d spec CH7.10 to drain all page requests and page + * responses pending in the hardware. + */ +static void intel_svm_drain_prq(struct device *dev, int pasid) +{ + struct device_domain_info *info; + struct dmar_domain *domain; + struct intel_iommu *iommu; + struct qi_desc desc[3]; + struct pci_dev *pdev; + int head, tail; + u16 sid, did; + int qdep; + + info = get_domain_info(dev); + if (WARN_ON(!info || !dev_is_pci(dev))) + return; + + if (!info->pri_enabled) + return; + + iommu = info->iommu; + domain = info->domain; + pdev = to_pci_dev(dev); + sid = PCI_DEVID(info->bus, info->devfn); + did = domain->iommu_did[iommu->seq_id]; + qdep = pci_ats_queue_depth(pdev); + + /* + * Check and wait until all pending page requests in the queue are + * handled by the prq handling thread. + */ +prq_retry: + reinit_completion(&iommu->prq_complete); + tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK; + head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK; + while (head != tail) { + struct page_req_dsc *req; + + req = &iommu->prq[head / sizeof(*req)]; + if (!req->pasid_present || req->pasid != pasid) { + head = (head + sizeof(*req)) & PRQ_RING_MASK; + continue; + } + + wait_for_completion(&iommu->prq_complete); + goto prq_retry; + } + + /* + * Perform steps described in VT-d spec CH7.10 to drain page + * requests and responses in hardware. + */ + memset(desc, 0, sizeof(desc)); + desc[0].qw0 = QI_IWD_STATUS_DATA(QI_DONE) | + QI_IWD_FENCE | + QI_IWD_TYPE; + desc[1].qw0 = QI_EIOTLB_PASID(pasid) | + QI_EIOTLB_DID(did) | + QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | + QI_EIOTLB_TYPE; + desc[2].qw0 = QI_DEV_EIOTLB_PASID(pasid) | + QI_DEV_EIOTLB_SID(sid) | + QI_DEV_EIOTLB_QDEP(qdep) | + QI_DEIOTLB_TYPE | + QI_DEV_IOTLB_PFSID(info->pfsid); +qi_retry: + reinit_completion(&iommu->prq_complete); + qi_submit_sync(iommu, desc, 3, QI_OPT_WAIT_DRAIN); + if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) { + wait_for_completion(&iommu->prq_complete); + goto qi_retry; + } +} + static irqreturn_t prq_event_thread(int irq, void *d) { struct intel_iommu *iommu = d; @@ -685,12 +928,75 @@ static irqreturn_t prq_event_thread(int irq, void *d) sizeof(req->priv_data)); resp.qw2 = 0; resp.qw3 = 0; - qi_submit_sync(&resp, iommu); + qi_submit_sync(iommu, &resp, 1, 0); } head = (head + sizeof(*req)) & PRQ_RING_MASK; } dmar_writeq(iommu->reg + DMAR_PQH_REG, tail); + /* + * Clear the page request overflow bit and wake up all threads that + * are waiting for the completion of this handling. + */ + if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) + writel(DMA_PRS_PRO, iommu->reg + DMAR_PRS_REG); + + if (!completion_done(&iommu->prq_complete)) + complete(&iommu->prq_complete); + return IRQ_RETVAL(handled); } + +#define to_intel_svm_dev(handle) container_of(handle, struct intel_svm_dev, sva) +struct iommu_sva * +intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata) +{ + struct iommu_sva *sva = ERR_PTR(-EINVAL); + struct intel_svm_dev *sdev = NULL; + int flags = 0; + int ret; + + /* + * TODO: Consolidate with generic iommu-sva bind after it is merged. + * It will require shared SVM data structures, i.e. combine io_mm + * and intel_svm etc. + */ + if (drvdata) + flags = *(int *)drvdata; + mutex_lock(&pasid_mutex); + ret = intel_svm_bind_mm(dev, flags, NULL, mm, &sdev); + if (ret) + sva = ERR_PTR(ret); + else if (sdev) + sva = &sdev->sva; + else + WARN(!sdev, "SVM bind succeeded with no sdev!\n"); + + mutex_unlock(&pasid_mutex); + + return sva; +} + +void intel_svm_unbind(struct iommu_sva *sva) +{ + struct intel_svm_dev *sdev; + + mutex_lock(&pasid_mutex); + sdev = to_intel_svm_dev(sva); + intel_svm_unbind_mm(sdev->dev, sdev->pasid); + mutex_unlock(&pasid_mutex); +} + +int intel_svm_get_pasid(struct iommu_sva *sva) +{ + struct intel_svm_dev *sdev; + int pasid; + + mutex_lock(&pasid_mutex); + sdev = to_intel_svm_dev(sva); + pasid = sdev->pasid; + mutex_unlock(&pasid_mutex); + + return pasid; +} diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index 81e43c1df7ec..a042f123b091 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -151,7 +151,7 @@ static int qi_flush_iec(struct intel_iommu *iommu, int index, int mask) desc.qw2 = 0; desc.qw3 = 0; - return qi_submit_sync(&desc, iommu); + return qi_submit_sync(iommu, &desc, 1, 0); } static int modify_irte(struct irq_2_iommu *irq_iommu, diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 03d6a26687bc..d43120eb1dc5 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -44,6 +44,7 @@ struct iommu_group { int id; struct iommu_domain *default_domain; struct iommu_domain *domain; + struct list_head entry; }; struct group_device { @@ -79,6 +80,20 @@ static bool iommu_cmd_line_dma_api(void) return !!(iommu_cmd_line & IOMMU_CMD_LINE_DMA_API); } +static int iommu_alloc_default_domain(struct iommu_group *group, + struct device *dev); +static struct iommu_domain *__iommu_domain_alloc(struct bus_type *bus, + unsigned type); +static int __iommu_attach_device(struct iommu_domain *domain, + struct device *dev); +static int __iommu_attach_group(struct iommu_domain *domain, + struct iommu_group *group); +static void __iommu_detach_group(struct iommu_domain *domain, + struct iommu_group *group); +static int iommu_create_device_direct_mappings(struct iommu_group *group, + struct device *dev); +static struct iommu_group *iommu_group_get_for_dev(struct device *dev); + #define IOMMU_GROUP_ATTR(_name, _mode, _show, _store) \ struct iommu_group_attribute iommu_group_attr_##_name = \ __ATTR(_name, _mode, _show, _store) @@ -175,57 +190,118 @@ static void dev_iommu_free(struct device *dev) dev->iommu = NULL; } -int iommu_probe_device(struct device *dev) +static int __iommu_probe_device(struct device *dev, struct list_head *group_list) { const struct iommu_ops *ops = dev->bus->iommu_ops; + struct iommu_device *iommu_dev; + struct iommu_group *group; int ret; - WARN_ON(dev->iommu_group); if (!ops) - return -EINVAL; + return -ENODEV; if (!dev_iommu_get(dev)) return -ENOMEM; if (!try_module_get(ops->owner)) { ret = -EINVAL; - goto err_free_dev_param; + goto err_free; } - ret = ops->add_device(dev); - if (ret) - goto err_module_put; + iommu_dev = ops->probe_device(dev); + if (IS_ERR(iommu_dev)) { + ret = PTR_ERR(iommu_dev); + goto out_module_put; + } + + dev->iommu->iommu_dev = iommu_dev; + + group = iommu_group_get_for_dev(dev); + if (IS_ERR(group)) { + ret = PTR_ERR(group); + goto out_release; + } + iommu_group_put(group); + + if (group_list && !group->default_domain && list_empty(&group->entry)) + list_add_tail(&group->entry, group_list); + + iommu_device_link(iommu_dev, dev); return 0; -err_module_put: +out_release: + ops->release_device(dev); + +out_module_put: module_put(ops->owner); -err_free_dev_param: + +err_free: dev_iommu_free(dev); + return ret; } -void iommu_release_device(struct device *dev) +int iommu_probe_device(struct device *dev) { const struct iommu_ops *ops = dev->bus->iommu_ops; + struct iommu_group *group; + int ret; - if (dev->iommu_group) - ops->remove_device(dev); + ret = __iommu_probe_device(dev, NULL); + if (ret) + goto err_out; + + group = iommu_group_get(dev); + if (!group) + goto err_release; + + /* + * Try to allocate a default domain - needs support from the + * IOMMU driver. There are still some drivers which don't + * support default domains, so the return value is not yet + * checked. + */ + iommu_alloc_default_domain(group, dev); + + if (group->default_domain) + ret = __iommu_attach_device(group->default_domain, dev); + + iommu_create_device_direct_mappings(group, dev); + + iommu_group_put(group); + + if (ret) + goto err_release; + + if (ops->probe_finalize) + ops->probe_finalize(dev); + + return 0; + +err_release: + iommu_release_device(dev); + +err_out: + return ret; - if (dev->iommu) { - module_put(ops->owner); - dev_iommu_free(dev); - } } -static struct iommu_domain *__iommu_domain_alloc(struct bus_type *bus, - unsigned type); -static int __iommu_attach_device(struct iommu_domain *domain, - struct device *dev); -static int __iommu_attach_group(struct iommu_domain *domain, - struct iommu_group *group); -static void __iommu_detach_group(struct iommu_domain *domain, - struct iommu_group *group); +void iommu_release_device(struct device *dev) +{ + const struct iommu_ops *ops = dev->bus->iommu_ops; + + if (!dev->iommu) + return; + + iommu_device_unlink(dev->iommu->iommu_dev, dev); + iommu_group_remove_device(dev); + + ops->release_device(dev); + + module_put(ops->owner); + dev_iommu_free(dev); +} static int __init iommu_set_def_domain_type(char *str) { @@ -497,6 +573,7 @@ struct iommu_group *iommu_group_alloc(void) group->kobj.kset = iommu_group_kset; mutex_init(&group->mutex); INIT_LIST_HEAD(&group->devices); + INIT_LIST_HEAD(&group->entry); BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier); ret = ida_simple_get(&iommu_group_ida, 0, 0, GFP_KERNEL); @@ -638,8 +715,8 @@ int iommu_group_set_name(struct iommu_group *group, const char *name) } EXPORT_SYMBOL_GPL(iommu_group_set_name); -static int iommu_group_create_direct_mappings(struct iommu_group *group, - struct device *dev) +static int iommu_create_device_direct_mappings(struct iommu_group *group, + struct device *dev) { struct iommu_domain *domain = group->default_domain; struct iommu_resv_region *entry; @@ -752,8 +829,6 @@ rename: dev->iommu_group = group; - iommu_group_create_direct_mappings(group, dev); - mutex_lock(&group->mutex); list_add_tail(&device->list, &group->devices); if (group->domain && !iommu_is_attach_deferred(group->domain, dev)) @@ -1371,6 +1446,61 @@ struct iommu_group *fsl_mc_device_group(struct device *dev) } EXPORT_SYMBOL_GPL(fsl_mc_device_group); +static int iommu_get_def_domain_type(struct device *dev) +{ + const struct iommu_ops *ops = dev->bus->iommu_ops; + unsigned int type = 0; + + if (ops->def_domain_type) + type = ops->def_domain_type(dev); + + return (type == 0) ? iommu_def_domain_type : type; +} + +static int iommu_group_alloc_default_domain(struct bus_type *bus, + struct iommu_group *group, + unsigned int type) +{ + struct iommu_domain *dom; + + dom = __iommu_domain_alloc(bus, type); + if (!dom && type != IOMMU_DOMAIN_DMA) { + dom = __iommu_domain_alloc(bus, IOMMU_DOMAIN_DMA); + if (dom) + pr_warn("Failed to allocate default IOMMU domain of type %u for group %s - Falling back to IOMMU_DOMAIN_DMA", + type, group->name); + } + + if (!dom) + return -ENOMEM; + + group->default_domain = dom; + if (!group->domain) + group->domain = dom; + + if (!iommu_dma_strict) { + int attr = 1; + iommu_domain_set_attr(dom, + DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE, + &attr); + } + + return 0; +} + +static int iommu_alloc_default_domain(struct iommu_group *group, + struct device *dev) +{ + unsigned int type; + + if (group->default_domain) + return 0; + + type = iommu_get_def_domain_type(dev); + + return iommu_group_alloc_default_domain(dev->bus, group, type); +} + /** * iommu_group_get_for_dev - Find or create the IOMMU group for a device * @dev: target device @@ -1381,7 +1511,7 @@ EXPORT_SYMBOL_GPL(fsl_mc_device_group); * to the returned IOMMU group, which will already include the provided * device. The reference should be released with iommu_group_put(). */ -struct iommu_group *iommu_group_get_for_dev(struct device *dev) +static struct iommu_group *iommu_group_get_for_dev(struct device *dev) { const struct iommu_ops *ops = dev->bus->iommu_ops; struct iommu_group *group; @@ -1401,59 +1531,37 @@ struct iommu_group *iommu_group_get_for_dev(struct device *dev) if (IS_ERR(group)) return group; - /* - * Try to allocate a default domain - needs support from the - * IOMMU driver. - */ - if (!group->default_domain) { - struct iommu_domain *dom; - - dom = __iommu_domain_alloc(dev->bus, iommu_def_domain_type); - if (!dom && iommu_def_domain_type != IOMMU_DOMAIN_DMA) { - dom = __iommu_domain_alloc(dev->bus, IOMMU_DOMAIN_DMA); - if (dom) { - dev_warn(dev, - "failed to allocate default IOMMU domain of type %u; falling back to IOMMU_DOMAIN_DMA", - iommu_def_domain_type); - } - } - - group->default_domain = dom; - if (!group->domain) - group->domain = dom; - - if (dom && !iommu_dma_strict) { - int attr = 1; - iommu_domain_set_attr(dom, - DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE, - &attr); - } - } - ret = iommu_group_add_device(group, dev); - if (ret) { - iommu_group_put(group); - return ERR_PTR(ret); - } + if (ret) + goto out_put_group; return group; + +out_put_group: + iommu_group_put(group); + + return ERR_PTR(ret); } -EXPORT_SYMBOL_GPL(iommu_group_get_for_dev); struct iommu_domain *iommu_group_default_domain(struct iommu_group *group) { return group->default_domain; } -static int add_iommu_group(struct device *dev, void *data) +static int probe_iommu_group(struct device *dev, void *data) { - int ret = iommu_probe_device(dev); + struct list_head *group_list = data; + struct iommu_group *group; + int ret; - /* - * We ignore -ENODEV errors for now, as they just mean that the - * device is not translated by an IOMMU. We still care about - * other errors and fail to initialize when they happen. - */ + /* Device is probed already if in a group */ + group = iommu_group_get(dev); + if (group) { + iommu_group_put(group); + return 0; + } + + ret = __iommu_probe_device(dev, group_list); if (ret == -ENODEV) ret = 0; @@ -1519,10 +1627,152 @@ static int iommu_bus_notifier(struct notifier_block *nb, return 0; } +struct __group_domain_type { + struct device *dev; + unsigned int type; +}; + +static int probe_get_default_domain_type(struct device *dev, void *data) +{ + const struct iommu_ops *ops = dev->bus->iommu_ops; + struct __group_domain_type *gtype = data; + unsigned int type = 0; + + if (ops->def_domain_type) + type = ops->def_domain_type(dev); + + if (type) { + if (gtype->type && gtype->type != type) { + dev_warn(dev, "Device needs domain type %s, but device %s in the same iommu group requires type %s - using default\n", + iommu_domain_type_str(type), + dev_name(gtype->dev), + iommu_domain_type_str(gtype->type)); + gtype->type = 0; + } + + if (!gtype->dev) { + gtype->dev = dev; + gtype->type = type; + } + } + + return 0; +} + +static void probe_alloc_default_domain(struct bus_type *bus, + struct iommu_group *group) +{ + struct __group_domain_type gtype; + + memset(>ype, 0, sizeof(gtype)); + + /* Ask for default domain requirements of all devices in the group */ + __iommu_group_for_each_dev(group, >ype, + probe_get_default_domain_type); + + if (!gtype.type) + gtype.type = iommu_def_domain_type; + + iommu_group_alloc_default_domain(bus, group, gtype.type); + +} + +static int iommu_group_do_dma_attach(struct device *dev, void *data) +{ + struct iommu_domain *domain = data; + int ret = 0; + + if (!iommu_is_attach_deferred(domain, dev)) + ret = __iommu_attach_device(domain, dev); + + return ret; +} + +static int __iommu_group_dma_attach(struct iommu_group *group) +{ + return __iommu_group_for_each_dev(group, group->default_domain, + iommu_group_do_dma_attach); +} + +static int iommu_group_do_probe_finalize(struct device *dev, void *data) +{ + struct iommu_domain *domain = data; + + if (domain->ops->probe_finalize) + domain->ops->probe_finalize(dev); + + return 0; +} + +static void __iommu_group_dma_finalize(struct iommu_group *group) +{ + __iommu_group_for_each_dev(group, group->default_domain, + iommu_group_do_probe_finalize); +} + +static int iommu_do_create_direct_mappings(struct device *dev, void *data) +{ + struct iommu_group *group = data; + + iommu_create_device_direct_mappings(group, dev); + + return 0; +} + +static int iommu_group_create_direct_mappings(struct iommu_group *group) +{ + return __iommu_group_for_each_dev(group, group, + iommu_do_create_direct_mappings); +} + +int bus_iommu_probe(struct bus_type *bus) +{ + struct iommu_group *group, *next; + LIST_HEAD(group_list); + int ret; + + /* + * This code-path does not allocate the default domain when + * creating the iommu group, so do it after the groups are + * created. + */ + ret = bus_for_each_dev(bus, NULL, &group_list, probe_iommu_group); + if (ret) + return ret; + + list_for_each_entry_safe(group, next, &group_list, entry) { + /* Remove item from the list */ + list_del_init(&group->entry); + + mutex_lock(&group->mutex); + + /* Try to allocate default domain */ + probe_alloc_default_domain(bus, group); + + if (!group->default_domain) { + mutex_unlock(&group->mutex); + continue; + } + + iommu_group_create_direct_mappings(group); + + ret = __iommu_group_dma_attach(group); + + mutex_unlock(&group->mutex); + + if (ret) + break; + + __iommu_group_dma_finalize(group); + } + + return ret; +} + static int iommu_bus_init(struct bus_type *bus, const struct iommu_ops *ops) { - int err; struct notifier_block *nb; + int err; nb = kzalloc(sizeof(struct notifier_block), GFP_KERNEL); if (!nb) @@ -1534,7 +1784,7 @@ static int iommu_bus_init(struct bus_type *bus, const struct iommu_ops *ops) if (err) goto out_free; - err = bus_for_each_dev(bus, NULL, NULL, add_iommu_group); + err = bus_iommu_probe(bus); if (err) goto out_err; @@ -2301,71 +2551,6 @@ struct iommu_resv_region *iommu_alloc_resv_region(phys_addr_t start, } EXPORT_SYMBOL_GPL(iommu_alloc_resv_region); -static int -request_default_domain_for_dev(struct device *dev, unsigned long type) -{ - struct iommu_domain *domain; - struct iommu_group *group; - int ret; - - /* Device must already be in a group before calling this function */ - group = iommu_group_get(dev); - if (!group) - return -EINVAL; - - mutex_lock(&group->mutex); - - ret = 0; - if (group->default_domain && group->default_domain->type == type) - goto out; - - /* Don't change mappings of existing devices */ - ret = -EBUSY; - if (iommu_group_device_count(group) != 1) - goto out; - - ret = -ENOMEM; - domain = __iommu_domain_alloc(dev->bus, type); - if (!domain) - goto out; - - /* Attach the device to the domain */ - ret = __iommu_attach_group(domain, group); - if (ret) { - iommu_domain_free(domain); - goto out; - } - - /* Make the domain the default for this group */ - if (group->default_domain) - iommu_domain_free(group->default_domain); - group->default_domain = domain; - - iommu_group_create_direct_mappings(group, dev); - - dev_info(dev, "Using iommu %s mapping\n", - type == IOMMU_DOMAIN_DMA ? "dma" : "direct"); - - ret = 0; -out: - mutex_unlock(&group->mutex); - iommu_group_put(group); - - return ret; -} - -/* Request that a device is direct mapped by the IOMMU */ -int iommu_request_dm_for_dev(struct device *dev) -{ - return request_default_domain_for_dev(dev, IOMMU_DOMAIN_IDENTITY); -} - -/* Request that a device can't be direct mapped by the IOMMU */ -int iommu_request_dma_domain_for_dev(struct device *dev) -{ - return request_default_domain_for_dev(dev, IOMMU_DOMAIN_DMA); -} - void iommu_set_default_passthrough(bool cmd_line) { if (cmd_line) @@ -2643,17 +2828,6 @@ void iommu_sva_unbind_device(struct iommu_sva *handle) } EXPORT_SYMBOL_GPL(iommu_sva_unbind_device); -int iommu_sva_set_ops(struct iommu_sva *handle, - const struct iommu_sva_ops *sva_ops) -{ - if (handle->ops && handle->ops != sva_ops) - return -EEXIST; - - handle->ops = sva_ops; - return 0; -} -EXPORT_SYMBOL_GPL(iommu_sva_set_ops); - int iommu_sva_get_pasid(struct iommu_sva *handle) { const struct iommu_ops *ops = handle->dev->bus->iommu_ops; diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 0e6a9536eca6..49fc01f2a28d 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -253,7 +253,7 @@ int iova_cache_get(void) SLAB_HWCACHE_ALIGN, NULL); if (!iova_cache) { mutex_unlock(&iova_cache_mutex); - printk(KERN_ERR "Couldn't create iova cache\n"); + pr_err("Couldn't create iova cache\n"); return -ENOMEM; } } @@ -718,8 +718,8 @@ copy_reserved_iova(struct iova_domain *from, struct iova_domain *to) new_iova = reserve_iova(to, iova->pfn_lo, iova->pfn_hi); if (!new_iova) - printk(KERN_ERR "Reserve iova range %lx@%lx failed\n", - iova->pfn_lo, iova->pfn_lo); + pr_err("Reserve iova range %lx@%lx failed\n", + iova->pfn_lo, iova->pfn_lo); } spin_unlock_irqrestore(&from->iova_rbtree_lock, flags); } diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 310cf09feea3..4c2972f3153b 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -805,24 +805,8 @@ static int ipmmu_of_xlate(struct device *dev, static int ipmmu_init_arm_mapping(struct device *dev) { struct ipmmu_vmsa_device *mmu = to_ipmmu(dev); - struct iommu_group *group; int ret; - /* Create a device group and add the device to it. */ - group = iommu_group_alloc(); - if (IS_ERR(group)) { - dev_err(dev, "Failed to allocate IOMMU group\n"); - return PTR_ERR(group); - } - - ret = iommu_group_add_device(group, dev); - iommu_group_put(group); - - if (ret < 0) { - dev_err(dev, "Failed to add device to IPMMU group\n"); - return ret; - } - /* * Create the ARM mapping, used by the ARM DMA mapping core to allocate * VAs. This will allocate a corresponding IOMMU domain. @@ -856,48 +840,39 @@ static int ipmmu_init_arm_mapping(struct device *dev) return 0; error: - iommu_group_remove_device(dev); if (mmu->mapping) arm_iommu_release_mapping(mmu->mapping); return ret; } -static int ipmmu_add_device(struct device *dev) +static struct iommu_device *ipmmu_probe_device(struct device *dev) { struct ipmmu_vmsa_device *mmu = to_ipmmu(dev); - struct iommu_group *group; - int ret; /* * Only let through devices that have been verified in xlate() */ if (!mmu) - return -ENODEV; + return ERR_PTR(-ENODEV); - if (IS_ENABLED(CONFIG_ARM) && !IS_ENABLED(CONFIG_IOMMU_DMA)) { - ret = ipmmu_init_arm_mapping(dev); - if (ret) - return ret; - } else { - group = iommu_group_get_for_dev(dev); - if (IS_ERR(group)) - return PTR_ERR(group); + return &mmu->iommu; +} - iommu_group_put(group); - } +static void ipmmu_probe_finalize(struct device *dev) +{ + int ret = 0; - iommu_device_link(&mmu->iommu, dev); - return 0; + if (IS_ENABLED(CONFIG_ARM) && !IS_ENABLED(CONFIG_IOMMU_DMA)) + ret = ipmmu_init_arm_mapping(dev); + + if (ret) + dev_err(dev, "Can't create IOMMU mapping - DMA-OPS will not work\n"); } -static void ipmmu_remove_device(struct device *dev) +static void ipmmu_release_device(struct device *dev) { - struct ipmmu_vmsa_device *mmu = to_ipmmu(dev); - - iommu_device_unlink(&mmu->iommu, dev); arm_iommu_detach_device(dev); - iommu_group_remove_device(dev); } static struct iommu_group *ipmmu_find_group(struct device *dev) @@ -925,9 +900,11 @@ static const struct iommu_ops ipmmu_ops = { .flush_iotlb_all = ipmmu_flush_iotlb_all, .iotlb_sync = ipmmu_iotlb_sync, .iova_to_phys = ipmmu_iova_to_phys, - .add_device = ipmmu_add_device, - .remove_device = ipmmu_remove_device, - .device_group = ipmmu_find_group, + .probe_device = ipmmu_probe_device, + .release_device = ipmmu_release_device, + .probe_finalize = ipmmu_probe_finalize, + .device_group = IS_ENABLED(CONFIG_ARM) && !IS_ENABLED(CONFIG_IOMMU_DMA) + ? generic_device_group : ipmmu_find_group, .pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K, .of_xlate = ipmmu_of_xlate, }; diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index 94a6df1bddd6..3d8a63555c25 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -34,7 +34,7 @@ __asm__ __volatile__ ( \ /* bitmap of the page sizes currently supported */ #define MSM_IOMMU_PGSIZES (SZ_4K | SZ_64K | SZ_1M | SZ_16M) -DEFINE_SPINLOCK(msm_iommu_lock); +static DEFINE_SPINLOCK(msm_iommu_lock); static LIST_HEAD(qcom_iommu_devices); static struct iommu_ops msm_iommu_ops; @@ -388,43 +388,23 @@ static struct msm_iommu_dev *find_iommu_for_dev(struct device *dev) return ret; } -static int msm_iommu_add_device(struct device *dev) +static struct iommu_device *msm_iommu_probe_device(struct device *dev) { struct msm_iommu_dev *iommu; - struct iommu_group *group; unsigned long flags; spin_lock_irqsave(&msm_iommu_lock, flags); iommu = find_iommu_for_dev(dev); spin_unlock_irqrestore(&msm_iommu_lock, flags); - if (iommu) - iommu_device_link(&iommu->iommu, dev); - else - return -ENODEV; - - group = iommu_group_get_for_dev(dev); - if (IS_ERR(group)) - return PTR_ERR(group); - - iommu_group_put(group); + if (!iommu) + return ERR_PTR(-ENODEV); - return 0; + return &iommu->iommu; } -static void msm_iommu_remove_device(struct device *dev) +static void msm_iommu_release_device(struct device *dev) { - struct msm_iommu_dev *iommu; - unsigned long flags; - - spin_lock_irqsave(&msm_iommu_lock, flags); - iommu = find_iommu_for_dev(dev); - spin_unlock_irqrestore(&msm_iommu_lock, flags); - - if (iommu) - iommu_device_unlink(&iommu->iommu, dev); - - iommu_group_remove_device(dev); } static int msm_iommu_attach_dev(struct iommu_domain *domain, struct device *dev) @@ -708,8 +688,8 @@ static struct iommu_ops msm_iommu_ops = { */ .iotlb_sync = NULL, .iova_to_phys = msm_iommu_iova_to_phys, - .add_device = msm_iommu_add_device, - .remove_device = msm_iommu_remove_device, + .probe_device = msm_iommu_probe_device, + .release_device = msm_iommu_release_device, .device_group = generic_device_group, .pgsize_bitmap = MSM_IOMMU_PGSIZES, .of_xlate = qcom_iommu_of_xlate, diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 5f4d6df59cf6..2be96f1cdbd2 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -441,38 +441,26 @@ static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain, return pa; } -static int mtk_iommu_add_device(struct device *dev) +static struct iommu_device *mtk_iommu_probe_device(struct device *dev) { struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); struct mtk_iommu_data *data; - struct iommu_group *group; if (!fwspec || fwspec->ops != &mtk_iommu_ops) - return -ENODEV; /* Not a iommu client device */ + return ERR_PTR(-ENODEV); /* Not a iommu client device */ data = dev_iommu_priv_get(dev); - iommu_device_link(&data->iommu, dev); - group = iommu_group_get_for_dev(dev); - if (IS_ERR(group)) - return PTR_ERR(group); - - iommu_group_put(group); - return 0; + return &data->iommu; } -static void mtk_iommu_remove_device(struct device *dev) +static void mtk_iommu_release_device(struct device *dev) { struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); - struct mtk_iommu_data *data; if (!fwspec || fwspec->ops != &mtk_iommu_ops) return; - data = dev_iommu_priv_get(dev); - iommu_device_unlink(&data->iommu, dev); - - iommu_group_remove_device(dev); iommu_fwspec_free(dev); } @@ -526,8 +514,8 @@ static const struct iommu_ops mtk_iommu_ops = { .flush_iotlb_all = mtk_iommu_flush_iotlb_all, .iotlb_sync = mtk_iommu_iotlb_sync, .iova_to_phys = mtk_iommu_iova_to_phys, - .add_device = mtk_iommu_add_device, - .remove_device = mtk_iommu_remove_device, + .probe_device = mtk_iommu_probe_device, + .release_device = mtk_iommu_release_device, .device_group = mtk_iommu_device_group, .of_xlate = mtk_iommu_of_xlate, .pgsize_bitmap = SZ_4K | SZ_64K | SZ_1M | SZ_16M, diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index a31be05601c9..c9d79cff4d17 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -265,10 +265,13 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain, { struct mtk_iommu_data *data = dev_iommu_priv_get(dev); struct mtk_iommu_domain *dom = to_mtk_domain(domain); + struct dma_iommu_mapping *mtk_mapping; int ret; - if (!data) - return -ENODEV; + /* Only allow the domain created internally. */ + mtk_mapping = data->dev->archdata.iommu; + if (mtk_mapping->domain != domain) + return 0; if (!data->m4u_dom) { data->m4u_dom = dom; @@ -288,9 +291,6 @@ static void mtk_iommu_detach_device(struct iommu_domain *domain, { struct mtk_iommu_data *data = dev_iommu_priv_get(dev); - if (!data) - return; - mtk_iommu_config(data, dev, false); } @@ -416,14 +416,17 @@ static int mtk_iommu_create_mapping(struct device *dev, return 0; } -static int mtk_iommu_add_device(struct device *dev) +static int mtk_iommu_def_domain_type(struct device *dev) +{ + return IOMMU_DOMAIN_UNMANAGED; +} + +static struct iommu_device *mtk_iommu_probe_device(struct device *dev) { struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); - struct dma_iommu_mapping *mtk_mapping; struct of_phandle_args iommu_spec; struct of_phandle_iterator it; struct mtk_iommu_data *data; - struct iommu_group *group; int err; of_for_each_phandle(&it, err, dev->of_node, "iommus", @@ -442,46 +445,34 @@ static int mtk_iommu_add_device(struct device *dev) } if (!fwspec || fwspec->ops != &mtk_iommu_ops) - return -ENODEV; /* Not a iommu client device */ + return ERR_PTR(-ENODEV); /* Not a iommu client device */ - /* - * This is a short-term bodge because the ARM DMA code doesn't - * understand multi-device groups, but we have to call into it - * successfully (and not just rely on a normal IOMMU API attach - * here) in order to set the correct DMA API ops on @dev. - */ - group = iommu_group_alloc(); - if (IS_ERR(group)) - return PTR_ERR(group); + data = dev_iommu_priv_get(dev); - err = iommu_group_add_device(group, dev); - iommu_group_put(group); - if (err) - return err; + return &data->iommu; +} - data = dev_iommu_priv_get(dev); +static void mtk_iommu_probe_finalize(struct device *dev) +{ + struct dma_iommu_mapping *mtk_mapping; + struct mtk_iommu_data *data; + int err; + + data = dev_iommu_priv_get(dev); mtk_mapping = data->dev->archdata.iommu; - err = arm_iommu_attach_device(dev, mtk_mapping); - if (err) { - iommu_group_remove_device(dev); - return err; - } - return iommu_device_link(&data->iommu, dev); + err = arm_iommu_attach_device(dev, mtk_mapping); + if (err) + dev_err(dev, "Can't create IOMMU mapping - DMA-OPS will not work\n"); } -static void mtk_iommu_remove_device(struct device *dev) +static void mtk_iommu_release_device(struct device *dev) { struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); - struct mtk_iommu_data *data; if (!fwspec || fwspec->ops != &mtk_iommu_ops) return; - data = dev_iommu_priv_get(dev); - iommu_device_unlink(&data->iommu, dev); - - iommu_group_remove_device(dev); iommu_fwspec_free(dev); } @@ -534,8 +525,11 @@ static const struct iommu_ops mtk_iommu_ops = { .map = mtk_iommu_map, .unmap = mtk_iommu_unmap, .iova_to_phys = mtk_iommu_iova_to_phys, - .add_device = mtk_iommu_add_device, - .remove_device = mtk_iommu_remove_device, + .probe_device = mtk_iommu_probe_device, + .probe_finalize = mtk_iommu_probe_finalize, + .release_device = mtk_iommu_release_device, + .def_domain_type = mtk_iommu_def_domain_type, + .device_group = generic_device_group, .pgsize_bitmap = ~0UL << MT2701_IOMMU_PAGE_SHIFT, }; diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index 887fefcb03b4..c8282cc212cb 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -35,15 +35,6 @@ static const struct iommu_ops omap_iommu_ops; -struct orphan_dev { - struct device *dev; - struct list_head node; -}; - -static LIST_HEAD(orphan_dev_list); - -static DEFINE_SPINLOCK(orphan_lock); - #define to_iommu(dev) ((struct omap_iommu *)dev_get_drvdata(dev)) /* bitmap of the page sizes currently supported */ @@ -62,8 +53,6 @@ static DEFINE_SPINLOCK(orphan_lock); static struct platform_driver omap_iommu_driver; static struct kmem_cache *iopte_cachep; -static int _omap_iommu_add_device(struct device *dev); - /** * to_omap_domain - Get struct omap_iommu_domain from generic iommu_domain * @dom: generic iommu domain handle @@ -1177,7 +1166,6 @@ static int omap_iommu_probe(struct platform_device *pdev) struct omap_iommu *obj; struct resource *res; struct device_node *of = pdev->dev.of_node; - struct orphan_dev *orphan_dev, *tmp; if (!of) { pr_err("%s: only DT-based devices are supported\n", __func__); @@ -1248,6 +1236,7 @@ static int omap_iommu_probe(struct platform_device *pdev) goto out_group; iommu_device_set_ops(&obj->iommu, &omap_iommu_ops); + iommu_device_set_fwnode(&obj->iommu, &of->fwnode); err = iommu_device_register(&obj->iommu); if (err) @@ -1260,13 +1249,8 @@ static int omap_iommu_probe(struct platform_device *pdev) dev_info(&pdev->dev, "%s registered\n", obj->name); - list_for_each_entry_safe(orphan_dev, tmp, &orphan_dev_list, node) { - err = _omap_iommu_add_device(orphan_dev->dev); - if (!err) { - list_del(&orphan_dev->node); - kfree(orphan_dev); - } - } + /* Re-probe bus to probe device attached to this IOMMU */ + bus_iommu_probe(&platform_bus_type); return 0; @@ -1657,17 +1641,13 @@ static phys_addr_t omap_iommu_iova_to_phys(struct iommu_domain *domain, return ret; } -static int _omap_iommu_add_device(struct device *dev) +static struct iommu_device *omap_iommu_probe_device(struct device *dev) { struct omap_iommu_arch_data *arch_data, *tmp; + struct platform_device *pdev; struct omap_iommu *oiommu; - struct iommu_group *group; struct device_node *np; - struct platform_device *pdev; int num_iommus, i; - int ret; - struct orphan_dev *orphan_dev; - unsigned long flags; /* * Allocate the archdata iommu structure for DT-based devices. @@ -1676,7 +1656,7 @@ static int _omap_iommu_add_device(struct device *dev) * IOMMU users. */ if (!dev->of_node) - return 0; + return ERR_PTR(-ENODEV); /* * retrieve the count of IOMMU nodes using phandle size as element size @@ -1689,43 +1669,27 @@ static int _omap_iommu_add_device(struct device *dev) arch_data = kcalloc(num_iommus + 1, sizeof(*arch_data), GFP_KERNEL); if (!arch_data) - return -ENOMEM; + return ERR_PTR(-ENOMEM); for (i = 0, tmp = arch_data; i < num_iommus; i++, tmp++) { np = of_parse_phandle(dev->of_node, "iommus", i); if (!np) { kfree(arch_data); - return -EINVAL; + return ERR_PTR(-EINVAL); } pdev = of_find_device_by_node(np); if (!pdev) { of_node_put(np); kfree(arch_data); - spin_lock_irqsave(&orphan_lock, flags); - list_for_each_entry(orphan_dev, &orphan_dev_list, - node) { - if (orphan_dev->dev == dev) - break; - } - spin_unlock_irqrestore(&orphan_lock, flags); - - if (orphan_dev && orphan_dev->dev == dev) - return -EPROBE_DEFER; - - orphan_dev = kzalloc(sizeof(*orphan_dev), GFP_KERNEL); - orphan_dev->dev = dev; - spin_lock_irqsave(&orphan_lock, flags); - list_add(&orphan_dev->node, &orphan_dev_list); - spin_unlock_irqrestore(&orphan_lock, flags); - return -EPROBE_DEFER; + return ERR_PTR(-ENODEV); } oiommu = platform_get_drvdata(pdev); if (!oiommu) { of_node_put(np); kfree(arch_data); - return -EINVAL; + return ERR_PTR(-EINVAL); } tmp->iommu_dev = oiommu; @@ -1734,57 +1698,25 @@ static int _omap_iommu_add_device(struct device *dev) of_node_put(np); } + dev->archdata.iommu = arch_data; + /* * use the first IOMMU alone for the sysfs device linking. * TODO: Evaluate if a single iommu_group needs to be * maintained for both IOMMUs */ oiommu = arch_data->iommu_dev; - ret = iommu_device_link(&oiommu->iommu, dev); - if (ret) { - kfree(arch_data); - return ret; - } - - dev->archdata.iommu = arch_data; - - /* - * IOMMU group initialization calls into omap_iommu_device_group, which - * needs a valid dev->archdata.iommu pointer - */ - group = iommu_group_get_for_dev(dev); - if (IS_ERR(group)) { - iommu_device_unlink(&oiommu->iommu, dev); - dev->archdata.iommu = NULL; - kfree(arch_data); - return PTR_ERR(group); - } - iommu_group_put(group); - return 0; + return &oiommu->iommu; } -static int omap_iommu_add_device(struct device *dev) -{ - int ret; - - ret = _omap_iommu_add_device(dev); - if (ret == -EPROBE_DEFER) - return 0; - - return ret; -} - -static void omap_iommu_remove_device(struct device *dev) +static void omap_iommu_release_device(struct device *dev) { struct omap_iommu_arch_data *arch_data = dev->archdata.iommu; if (!dev->of_node || !arch_data) return; - iommu_device_unlink(&arch_data->iommu_dev->iommu, dev); - iommu_group_remove_device(dev); - dev->archdata.iommu = NULL; kfree(arch_data); @@ -1795,6 +1727,9 @@ static struct iommu_group *omap_iommu_device_group(struct device *dev) struct omap_iommu_arch_data *arch_data = dev->archdata.iommu; struct iommu_group *group = ERR_PTR(-EINVAL); + if (!arch_data) + return ERR_PTR(-ENODEV); + if (arch_data->iommu_dev) group = iommu_group_ref_get(arch_data->iommu_dev->group); @@ -1809,8 +1744,8 @@ static const struct iommu_ops omap_iommu_ops = { .map = omap_iommu_map, .unmap = omap_iommu_unmap, .iova_to_phys = omap_iommu_iova_to_phys, - .add_device = omap_iommu_add_device, - .remove_device = omap_iommu_remove_device, + .probe_device = omap_iommu_probe_device, + .release_device = omap_iommu_release_device, .device_group = omap_iommu_device_group, .pgsize_bitmap = OMAP_IOMMU_PGSIZES, }; diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c index 5b3b270972f8..c3e1fbd1988c 100644 --- a/drivers/iommu/qcom_iommu.c +++ b/drivers/iommu/qcom_iommu.c @@ -524,14 +524,13 @@ static bool qcom_iommu_capable(enum iommu_cap cap) } } -static int qcom_iommu_add_device(struct device *dev) +static struct iommu_device *qcom_iommu_probe_device(struct device *dev) { struct qcom_iommu_dev *qcom_iommu = to_iommu(dev); - struct iommu_group *group; struct device_link *link; if (!qcom_iommu) - return -ENODEV; + return ERR_PTR(-ENODEV); /* * Establish the link between iommu and master, so that the @@ -542,28 +541,19 @@ static int qcom_iommu_add_device(struct device *dev) if (!link) { dev_err(qcom_iommu->dev, "Unable to create device link between %s and %s\n", dev_name(qcom_iommu->dev), dev_name(dev)); - return -ENODEV; + return ERR_PTR(-ENODEV); } - group = iommu_group_get_for_dev(dev); - if (IS_ERR(group)) - return PTR_ERR(group); - - iommu_group_put(group); - iommu_device_link(&qcom_iommu->iommu, dev); - - return 0; + return &qcom_iommu->iommu; } -static void qcom_iommu_remove_device(struct device *dev) +static void qcom_iommu_release_device(struct device *dev) { struct qcom_iommu_dev *qcom_iommu = to_iommu(dev); if (!qcom_iommu) return; - iommu_device_unlink(&qcom_iommu->iommu, dev); - iommu_group_remove_device(dev); iommu_fwspec_free(dev); } @@ -619,8 +609,8 @@ static const struct iommu_ops qcom_iommu_ops = { .flush_iotlb_all = qcom_iommu_flush_iotlb_all, .iotlb_sync = qcom_iommu_iotlb_sync, .iova_to_phys = qcom_iommu_iova_to_phys, - .add_device = qcom_iommu_add_device, - .remove_device = qcom_iommu_remove_device, + .probe_device = qcom_iommu_probe_device, + .release_device = qcom_iommu_release_device, .device_group = generic_device_group, .of_xlate = qcom_iommu_of_xlate, .pgsize_bitmap = SZ_4K | SZ_64K | SZ_1M | SZ_16M, diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c index b33cdd5aad81..d25c2486ca07 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -1054,40 +1054,28 @@ static void rk_iommu_domain_free(struct iommu_domain *domain) kfree(rk_domain); } -static int rk_iommu_add_device(struct device *dev) +static struct iommu_device *rk_iommu_probe_device(struct device *dev) { - struct iommu_group *group; - struct rk_iommu *iommu; struct rk_iommudata *data; + struct rk_iommu *iommu; data = dev->archdata.iommu; if (!data) - return -ENODEV; + return ERR_PTR(-ENODEV); iommu = rk_iommu_from_dev(dev); - group = iommu_group_get_for_dev(dev); - if (IS_ERR(group)) - return PTR_ERR(group); - iommu_group_put(group); - - iommu_device_link(&iommu->iommu, dev); data->link = device_link_add(dev, iommu->dev, DL_FLAG_STATELESS | DL_FLAG_PM_RUNTIME); - return 0; + return &iommu->iommu; } -static void rk_iommu_remove_device(struct device *dev) +static void rk_iommu_release_device(struct device *dev) { - struct rk_iommu *iommu; struct rk_iommudata *data = dev->archdata.iommu; - iommu = rk_iommu_from_dev(dev); - device_link_del(data->link); - iommu_device_unlink(&iommu->iommu, dev); - iommu_group_remove_device(dev); } static struct iommu_group *rk_iommu_device_group(struct device *dev) @@ -1126,8 +1114,8 @@ static const struct iommu_ops rk_iommu_ops = { .detach_dev = rk_iommu_detach_device, .map = rk_iommu_map, .unmap = rk_iommu_unmap, - .add_device = rk_iommu_add_device, - .remove_device = rk_iommu_remove_device, + .probe_device = rk_iommu_probe_device, + .release_device = rk_iommu_release_device, .iova_to_phys = rk_iommu_iova_to_phys, .device_group = rk_iommu_device_group, .pgsize_bitmap = RK_IOMMU_PGSIZE_BITMAP, diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c index 1137f3ddcb85..610f0828f22d 100644 --- a/drivers/iommu/s390-iommu.c +++ b/drivers/iommu/s390-iommu.c @@ -166,21 +166,14 @@ static void s390_iommu_detach_device(struct iommu_domain *domain, } } -static int s390_iommu_add_device(struct device *dev) +static struct iommu_device *s390_iommu_probe_device(struct device *dev) { - struct iommu_group *group = iommu_group_get_for_dev(dev); struct zpci_dev *zdev = to_pci_dev(dev)->sysdata; - if (IS_ERR(group)) - return PTR_ERR(group); - - iommu_group_put(group); - iommu_device_link(&zdev->iommu_dev, dev); - - return 0; + return &zdev->iommu_dev; } -static void s390_iommu_remove_device(struct device *dev) +static void s390_iommu_release_device(struct device *dev) { struct zpci_dev *zdev = to_pci_dev(dev)->sysdata; struct iommu_domain *domain; @@ -191,7 +184,7 @@ static void s390_iommu_remove_device(struct device *dev) * to vfio-pci and completing the VFIO_SET_IOMMU ioctl (which triggers * the attach_dev), removing the device via * "echo 1 > /sys/bus/pci/devices/.../remove" won't trigger detach_dev, - * only remove_device will be called via the BUS_NOTIFY_REMOVED_DEVICE + * only release_device will be called via the BUS_NOTIFY_REMOVED_DEVICE * notifier. * * So let's call detach_dev from here if it hasn't been called before. @@ -201,9 +194,6 @@ static void s390_iommu_remove_device(struct device *dev) if (domain) s390_iommu_detach_device(domain, dev); } - - iommu_device_unlink(&zdev->iommu_dev, dev); - iommu_group_remove_device(dev); } static int s390_iommu_update_trans(struct s390_domain *s390_domain, @@ -373,8 +363,8 @@ static const struct iommu_ops s390_iommu_ops = { .map = s390_iommu_map, .unmap = s390_iommu_unmap, .iova_to_phys = s390_iommu_iova_to_phys, - .add_device = s390_iommu_add_device, - .remove_device = s390_iommu_remove_device, + .probe_device = s390_iommu_probe_device, + .release_device = s390_iommu_release_device, .device_group = generic_device_group, .pgsize_bitmap = S390_IOMMU_PGSIZES, }; diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c new file mode 100644 index 000000000000..fce605e96aa2 --- /dev/null +++ b/drivers/iommu/sun50i-iommu.c @@ -0,0 +1,1023 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +// Copyright (C) 2016-2018, Allwinner Technology CO., LTD. +// Copyright (C) 2019-2020, Cerno + +#include <linux/bitfield.h> +#include <linux/bug.h> +#include <linux/clk.h> +#include <linux/device.h> +#include <linux/dma-direction.h> +#include <linux/dma-iommu.h> +#include <linux/dma-mapping.h> +#include <linux/err.h> +#include <linux/errno.h> +#include <linux/interrupt.h> +#include <linux/iommu.h> +#include <linux/iopoll.h> +#include <linux/ioport.h> +#include <linux/log2.h> +#include <linux/module.h> +#include <linux/of_platform.h> +#include <linux/platform_device.h> +#include <linux/pm.h> +#include <linux/pm_runtime.h> +#include <linux/reset.h> +#include <linux/sizes.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/types.h> + +#define IOMMU_RESET_REG 0x010 +#define IOMMU_ENABLE_REG 0x020 +#define IOMMU_ENABLE_ENABLE BIT(0) + +#define IOMMU_BYPASS_REG 0x030 +#define IOMMU_AUTO_GATING_REG 0x040 +#define IOMMU_AUTO_GATING_ENABLE BIT(0) + +#define IOMMU_WBUF_CTRL_REG 0x044 +#define IOMMU_OOO_CTRL_REG 0x048 +#define IOMMU_4KB_BDY_PRT_CTRL_REG 0x04c +#define IOMMU_TTB_REG 0x050 +#define IOMMU_TLB_ENABLE_REG 0x060 +#define IOMMU_TLB_PREFETCH_REG 0x070 +#define IOMMU_TLB_PREFETCH_MASTER_ENABLE(m) BIT(m) + +#define IOMMU_TLB_FLUSH_REG 0x080 +#define IOMMU_TLB_FLUSH_PTW_CACHE BIT(17) +#define IOMMU_TLB_FLUSH_MACRO_TLB BIT(16) +#define IOMMU_TLB_FLUSH_MICRO_TLB(i) (BIT(i) & GENMASK(5, 0)) + +#define IOMMU_TLB_IVLD_ADDR_REG 0x090 +#define IOMMU_TLB_IVLD_ADDR_MASK_REG 0x094 +#define IOMMU_TLB_IVLD_ENABLE_REG 0x098 +#define IOMMU_TLB_IVLD_ENABLE_ENABLE BIT(0) + +#define IOMMU_PC_IVLD_ADDR_REG 0x0a0 +#define IOMMU_PC_IVLD_ENABLE_REG 0x0a8 +#define IOMMU_PC_IVLD_ENABLE_ENABLE BIT(0) + +#define IOMMU_DM_AUT_CTRL_REG(d) (0x0b0 + ((d) / 2) * 4) +#define IOMMU_DM_AUT_CTRL_RD_UNAVAIL(d, m) (1 << (((d & 1) * 16) + ((m) * 2))) +#define IOMMU_DM_AUT_CTRL_WR_UNAVAIL(d, m) (1 << (((d & 1) * 16) + ((m) * 2) + 1)) + +#define IOMMU_DM_AUT_OVWT_REG 0x0d0 +#define IOMMU_INT_ENABLE_REG 0x100 +#define IOMMU_INT_CLR_REG 0x104 +#define IOMMU_INT_STA_REG 0x108 +#define IOMMU_INT_ERR_ADDR_REG(i) (0x110 + (i) * 4) +#define IOMMU_INT_ERR_ADDR_L1_REG 0x130 +#define IOMMU_INT_ERR_ADDR_L2_REG 0x134 +#define IOMMU_INT_ERR_DATA_REG(i) (0x150 + (i) * 4) +#define IOMMU_L1PG_INT_REG 0x0180 +#define IOMMU_L2PG_INT_REG 0x0184 + +#define IOMMU_INT_INVALID_L2PG BIT(17) +#define IOMMU_INT_INVALID_L1PG BIT(16) +#define IOMMU_INT_MASTER_PERMISSION(m) BIT(m) +#define IOMMU_INT_MASTER_MASK (IOMMU_INT_MASTER_PERMISSION(0) | \ + IOMMU_INT_MASTER_PERMISSION(1) | \ + IOMMU_INT_MASTER_PERMISSION(2) | \ + IOMMU_INT_MASTER_PERMISSION(3) | \ + IOMMU_INT_MASTER_PERMISSION(4) | \ + IOMMU_INT_MASTER_PERMISSION(5)) +#define IOMMU_INT_MASK (IOMMU_INT_INVALID_L1PG | \ + IOMMU_INT_INVALID_L2PG | \ + IOMMU_INT_MASTER_MASK) + +#define PT_ENTRY_SIZE sizeof(u32) + +#define NUM_DT_ENTRIES 4096 +#define DT_SIZE (NUM_DT_ENTRIES * PT_ENTRY_SIZE) + +#define NUM_PT_ENTRIES 256 +#define PT_SIZE (NUM_PT_ENTRIES * PT_ENTRY_SIZE) + +struct sun50i_iommu { + struct iommu_device iommu; + + /* Lock to modify the IOMMU registers */ + spinlock_t iommu_lock; + + struct device *dev; + void __iomem *base; + struct reset_control *reset; + struct clk *clk; + + struct iommu_domain *domain; + struct iommu_group *group; + struct kmem_cache *pt_pool; +}; + +struct sun50i_iommu_domain { + struct iommu_domain domain; + + /* Number of devices attached to the domain */ + refcount_t refcnt; + + /* L1 Page Table */ + u32 *dt; + dma_addr_t dt_dma; + + struct sun50i_iommu *iommu; +}; + +static struct sun50i_iommu_domain *to_sun50i_domain(struct iommu_domain *domain) +{ + return container_of(domain, struct sun50i_iommu_domain, domain); +} + +static struct sun50i_iommu *sun50i_iommu_from_dev(struct device *dev) +{ + return dev_iommu_priv_get(dev); +} + +static u32 iommu_read(struct sun50i_iommu *iommu, u32 offset) +{ + return readl(iommu->base + offset); +} + +static void iommu_write(struct sun50i_iommu *iommu, u32 offset, u32 value) +{ + writel(value, iommu->base + offset); +} + +/* + * The Allwinner H6 IOMMU uses a 2-level page table. + * + * The first level is the usual Directory Table (DT), that consists of + * 4096 4-bytes Directory Table Entries (DTE), each pointing to a Page + * Table (PT). + * + * Each PT consits of 256 4-bytes Page Table Entries (PTE), each + * pointing to a 4kB page of physical memory. + * + * The IOMMU supports a single DT, pointed by the IOMMU_TTB_REG + * register that contains its physical address. + */ + +#define SUN50I_IOVA_DTE_MASK GENMASK(31, 20) +#define SUN50I_IOVA_PTE_MASK GENMASK(19, 12) +#define SUN50I_IOVA_PAGE_MASK GENMASK(11, 0) + +static u32 sun50i_iova_get_dte_index(dma_addr_t iova) +{ + return FIELD_GET(SUN50I_IOVA_DTE_MASK, iova); +} + +static u32 sun50i_iova_get_pte_index(dma_addr_t iova) +{ + return FIELD_GET(SUN50I_IOVA_PTE_MASK, iova); +} + +static u32 sun50i_iova_get_page_offset(dma_addr_t iova) +{ + return FIELD_GET(SUN50I_IOVA_PAGE_MASK, iova); +} + +/* + * Each Directory Table Entry has a Page Table address and a valid + * bit: + + * +---------------------+-----------+-+ + * | PT address | Reserved |V| + * +---------------------+-----------+-+ + * 31:10 - Page Table address + * 9:2 - Reserved + * 1:0 - 1 if the entry is valid + */ + +#define SUN50I_DTE_PT_ADDRESS_MASK GENMASK(31, 10) +#define SUN50I_DTE_PT_ATTRS GENMASK(1, 0) +#define SUN50I_DTE_PT_VALID 1 + +static phys_addr_t sun50i_dte_get_pt_address(u32 dte) +{ + return (phys_addr_t)dte & SUN50I_DTE_PT_ADDRESS_MASK; +} + +static bool sun50i_dte_is_pt_valid(u32 dte) +{ + return (dte & SUN50I_DTE_PT_ATTRS) == SUN50I_DTE_PT_VALID; +} + +static u32 sun50i_mk_dte(dma_addr_t pt_dma) +{ + return (pt_dma & SUN50I_DTE_PT_ADDRESS_MASK) | SUN50I_DTE_PT_VALID; +} + +/* + * Each PTE has a Page address, an authority index and a valid bit: + * + * +----------------+-----+-----+-----+---+-----+ + * | Page address | Rsv | ACI | Rsv | V | Rsv | + * +----------------+-----+-----+-----+---+-----+ + * 31:12 - Page address + * 11:8 - Reserved + * 7:4 - Authority Control Index + * 3:2 - Reserved + * 1 - 1 if the entry is valid + * 0 - Reserved + * + * The way permissions work is that the IOMMU has 16 "domains" that + * can be configured to give each masters either read or write + * permissions through the IOMMU_DM_AUT_CTRL_REG registers. The domain + * 0 seems like the default domain, and its permissions in the + * IOMMU_DM_AUT_CTRL_REG are only read-only, so it's not really + * useful to enforce any particular permission. + * + * Each page entry will then have a reference to the domain they are + * affected to, so that we can actually enforce them on a per-page + * basis. + * + * In order to make it work with the IOMMU framework, we will be using + * 4 different domains, starting at 1: RD_WR, RD, WR and NONE + * depending on the permission we want to enforce. Each domain will + * have each master setup in the same way, since the IOMMU framework + * doesn't seem to restrict page access on a per-device basis. And + * then we will use the relevant domain index when generating the page + * table entry depending on the permissions we want to be enforced. + */ + +enum sun50i_iommu_aci { + SUN50I_IOMMU_ACI_DO_NOT_USE = 0, + SUN50I_IOMMU_ACI_NONE, + SUN50I_IOMMU_ACI_RD, + SUN50I_IOMMU_ACI_WR, + SUN50I_IOMMU_ACI_RD_WR, +}; + +#define SUN50I_PTE_PAGE_ADDRESS_MASK GENMASK(31, 12) +#define SUN50I_PTE_ACI_MASK GENMASK(7, 4) +#define SUN50I_PTE_PAGE_VALID BIT(1) + +static phys_addr_t sun50i_pte_get_page_address(u32 pte) +{ + return (phys_addr_t)pte & SUN50I_PTE_PAGE_ADDRESS_MASK; +} + +static enum sun50i_iommu_aci sun50i_get_pte_aci(u32 pte) +{ + return FIELD_GET(SUN50I_PTE_ACI_MASK, pte); +} + +static bool sun50i_pte_is_page_valid(u32 pte) +{ + return pte & SUN50I_PTE_PAGE_VALID; +} + +static u32 sun50i_mk_pte(phys_addr_t page, int prot) +{ + enum sun50i_iommu_aci aci; + u32 flags = 0; + + if (prot & (IOMMU_READ | IOMMU_WRITE)) + aci = SUN50I_IOMMU_ACI_RD_WR; + else if (prot & IOMMU_READ) + aci = SUN50I_IOMMU_ACI_RD; + else if (prot & IOMMU_WRITE) + aci = SUN50I_IOMMU_ACI_WR; + else + aci = SUN50I_IOMMU_ACI_NONE; + + flags |= FIELD_PREP(SUN50I_PTE_ACI_MASK, aci); + page &= SUN50I_PTE_PAGE_ADDRESS_MASK; + return page | flags | SUN50I_PTE_PAGE_VALID; +} + +static void sun50i_table_flush(struct sun50i_iommu_domain *sun50i_domain, + void *vaddr, unsigned int count) +{ + struct sun50i_iommu *iommu = sun50i_domain->iommu; + dma_addr_t dma = virt_to_phys(vaddr); + size_t size = count * PT_ENTRY_SIZE; + + dma_sync_single_for_device(iommu->dev, dma, size, DMA_TO_DEVICE); +} + +static int sun50i_iommu_flush_all_tlb(struct sun50i_iommu *iommu) +{ + u32 reg; + int ret; + + assert_spin_locked(&iommu->iommu_lock); + + iommu_write(iommu, + IOMMU_TLB_FLUSH_REG, + IOMMU_TLB_FLUSH_PTW_CACHE | + IOMMU_TLB_FLUSH_MACRO_TLB | + IOMMU_TLB_FLUSH_MICRO_TLB(5) | + IOMMU_TLB_FLUSH_MICRO_TLB(4) | + IOMMU_TLB_FLUSH_MICRO_TLB(3) | + IOMMU_TLB_FLUSH_MICRO_TLB(2) | + IOMMU_TLB_FLUSH_MICRO_TLB(1) | + IOMMU_TLB_FLUSH_MICRO_TLB(0)); + + ret = readl_poll_timeout(iommu->base + IOMMU_TLB_FLUSH_REG, + reg, !reg, + 1, 2000); + if (ret) + dev_warn(iommu->dev, "TLB Flush timed out!\n"); + + return ret; +} + +static void sun50i_iommu_flush_iotlb_all(struct iommu_domain *domain) +{ + struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain); + struct sun50i_iommu *iommu = sun50i_domain->iommu; + unsigned long flags; + + /* + * At boot, we'll have a first call into .flush_iotlb_all right after + * .probe_device, and since we link our (single) domain to our iommu in + * the .attach_device callback, we don't have that pointer set. + * + * It shouldn't really be any trouble to ignore it though since we flush + * all caches as part of the device powerup. + */ + if (!iommu) + return; + + spin_lock_irqsave(&iommu->iommu_lock, flags); + sun50i_iommu_flush_all_tlb(iommu); + spin_unlock_irqrestore(&iommu->iommu_lock, flags); +} + +static void sun50i_iommu_iotlb_sync(struct iommu_domain *domain, + struct iommu_iotlb_gather *gather) +{ + sun50i_iommu_flush_iotlb_all(domain); +} + +static int sun50i_iommu_enable(struct sun50i_iommu *iommu) +{ + struct sun50i_iommu_domain *sun50i_domain; + unsigned long flags; + int ret; + + if (!iommu->domain) + return 0; + + sun50i_domain = to_sun50i_domain(iommu->domain); + + ret = reset_control_deassert(iommu->reset); + if (ret) + return ret; + + ret = clk_prepare_enable(iommu->clk); + if (ret) + goto err_reset_assert; + + spin_lock_irqsave(&iommu->iommu_lock, flags); + + iommu_write(iommu, IOMMU_TTB_REG, sun50i_domain->dt_dma); + iommu_write(iommu, IOMMU_TLB_PREFETCH_REG, + IOMMU_TLB_PREFETCH_MASTER_ENABLE(0) | + IOMMU_TLB_PREFETCH_MASTER_ENABLE(1) | + IOMMU_TLB_PREFETCH_MASTER_ENABLE(2) | + IOMMU_TLB_PREFETCH_MASTER_ENABLE(3) | + IOMMU_TLB_PREFETCH_MASTER_ENABLE(4) | + IOMMU_TLB_PREFETCH_MASTER_ENABLE(5)); + iommu_write(iommu, IOMMU_INT_ENABLE_REG, IOMMU_INT_MASK); + iommu_write(iommu, IOMMU_DM_AUT_CTRL_REG(SUN50I_IOMMU_ACI_NONE), + IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 0) | + IOMMU_DM_AUT_CTRL_WR_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 0) | + IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 1) | + IOMMU_DM_AUT_CTRL_WR_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 1) | + IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 2) | + IOMMU_DM_AUT_CTRL_WR_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 2) | + IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 3) | + IOMMU_DM_AUT_CTRL_WR_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 3) | + IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 4) | + IOMMU_DM_AUT_CTRL_WR_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 4) | + IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 5) | + IOMMU_DM_AUT_CTRL_WR_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 5)); + + iommu_write(iommu, IOMMU_DM_AUT_CTRL_REG(SUN50I_IOMMU_ACI_RD), + IOMMU_DM_AUT_CTRL_WR_UNAVAIL(SUN50I_IOMMU_ACI_RD, 0) | + IOMMU_DM_AUT_CTRL_WR_UNAVAIL(SUN50I_IOMMU_ACI_RD, 1) | + IOMMU_DM_AUT_CTRL_WR_UNAVAIL(SUN50I_IOMMU_ACI_RD, 2) | + IOMMU_DM_AUT_CTRL_WR_UNAVAIL(SUN50I_IOMMU_ACI_RD, 3) | + IOMMU_DM_AUT_CTRL_WR_UNAVAIL(SUN50I_IOMMU_ACI_RD, 4) | + IOMMU_DM_AUT_CTRL_WR_UNAVAIL(SUN50I_IOMMU_ACI_RD, 5)); + + iommu_write(iommu, IOMMU_DM_AUT_CTRL_REG(SUN50I_IOMMU_ACI_WR), + IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_WR, 0) | + IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_WR, 1) | + IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_WR, 2) | + IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_WR, 3) | + IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_WR, 4) | + IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_WR, 5)); + + ret = sun50i_iommu_flush_all_tlb(iommu); + if (ret) { + spin_unlock_irqrestore(&iommu->iommu_lock, flags); + goto err_clk_disable; + } + + iommu_write(iommu, IOMMU_AUTO_GATING_REG, IOMMU_AUTO_GATING_ENABLE); + iommu_write(iommu, IOMMU_ENABLE_REG, IOMMU_ENABLE_ENABLE); + + spin_unlock_irqrestore(&iommu->iommu_lock, flags); + + return 0; + +err_clk_disable: + clk_disable_unprepare(iommu->clk); + +err_reset_assert: + reset_control_assert(iommu->reset); + + return ret; +} + +static void sun50i_iommu_disable(struct sun50i_iommu *iommu) +{ + unsigned long flags; + + spin_lock_irqsave(&iommu->iommu_lock, flags); + + iommu_write(iommu, IOMMU_ENABLE_REG, 0); + iommu_write(iommu, IOMMU_TTB_REG, 0); + + spin_unlock_irqrestore(&iommu->iommu_lock, flags); + + clk_disable_unprepare(iommu->clk); + reset_control_assert(iommu->reset); +} + +static void *sun50i_iommu_alloc_page_table(struct sun50i_iommu *iommu, + gfp_t gfp) +{ + dma_addr_t pt_dma; + u32 *page_table; + + page_table = kmem_cache_zalloc(iommu->pt_pool, gfp); + if (!page_table) + return ERR_PTR(-ENOMEM); + + pt_dma = dma_map_single(iommu->dev, page_table, PT_SIZE, DMA_TO_DEVICE); + if (dma_mapping_error(iommu->dev, pt_dma)) { + dev_err(iommu->dev, "Couldn't map L2 Page Table\n"); + kmem_cache_free(iommu->pt_pool, page_table); + return ERR_PTR(-ENOMEM); + } + + /* We rely on the physical address and DMA address being the same */ + WARN_ON(pt_dma != virt_to_phys(page_table)); + + return page_table; +} + +static void sun50i_iommu_free_page_table(struct sun50i_iommu *iommu, + u32 *page_table) +{ + phys_addr_t pt_phys = virt_to_phys(page_table); + + dma_unmap_single(iommu->dev, pt_phys, PT_SIZE, DMA_TO_DEVICE); + kmem_cache_free(iommu->pt_pool, page_table); +} + +static u32 *sun50i_dte_get_page_table(struct sun50i_iommu_domain *sun50i_domain, + dma_addr_t iova, gfp_t gfp) +{ + struct sun50i_iommu *iommu = sun50i_domain->iommu; + u32 *page_table; + u32 *dte_addr; + u32 old_dte; + u32 dte; + + dte_addr = &sun50i_domain->dt[sun50i_iova_get_dte_index(iova)]; + dte = *dte_addr; + if (sun50i_dte_is_pt_valid(dte)) { + phys_addr_t pt_phys = sun50i_dte_get_pt_address(dte); + return (u32 *)phys_to_virt(pt_phys); + } + + page_table = sun50i_iommu_alloc_page_table(iommu, gfp); + if (IS_ERR(page_table)) + return page_table; + + dte = sun50i_mk_dte(virt_to_phys(page_table)); + old_dte = cmpxchg(dte_addr, 0, dte); + if (old_dte) { + phys_addr_t installed_pt_phys = + sun50i_dte_get_pt_address(old_dte); + u32 *installed_pt = phys_to_virt(installed_pt_phys); + u32 *drop_pt = page_table; + + page_table = installed_pt; + dte = old_dte; + sun50i_iommu_free_page_table(iommu, drop_pt); + } + + sun50i_table_flush(sun50i_domain, page_table, PT_SIZE); + sun50i_table_flush(sun50i_domain, dte_addr, 1); + + return page_table; +} + +static int sun50i_iommu_map(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t size, int prot, gfp_t gfp) +{ + struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain); + struct sun50i_iommu *iommu = sun50i_domain->iommu; + u32 pte_index; + u32 *page_table, *pte_addr; + int ret = 0; + + page_table = sun50i_dte_get_page_table(sun50i_domain, iova, gfp); + if (IS_ERR(page_table)) { + ret = PTR_ERR(page_table); + goto out; + } + + pte_index = sun50i_iova_get_pte_index(iova); + pte_addr = &page_table[pte_index]; + if (unlikely(sun50i_pte_is_page_valid(*pte_addr))) { + phys_addr_t page_phys = sun50i_pte_get_page_address(*pte_addr); + dev_err(iommu->dev, + "iova %pad already mapped to %pa cannot remap to %pa prot: %#x\n", + &iova, &page_phys, &paddr, prot); + ret = -EBUSY; + goto out; + } + + *pte_addr = sun50i_mk_pte(paddr, prot); + sun50i_table_flush(sun50i_domain, pte_addr, 1); + +out: + return ret; +} + +static size_t sun50i_iommu_unmap(struct iommu_domain *domain, unsigned long iova, + size_t size, struct iommu_iotlb_gather *gather) +{ + struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain); + phys_addr_t pt_phys; + dma_addr_t pte_dma; + u32 *pte_addr; + u32 dte; + + dte = sun50i_domain->dt[sun50i_iova_get_dte_index(iova)]; + if (!sun50i_dte_is_pt_valid(dte)) + return 0; + + pt_phys = sun50i_dte_get_pt_address(dte); + pte_addr = (u32 *)phys_to_virt(pt_phys) + sun50i_iova_get_pte_index(iova); + pte_dma = pt_phys + sun50i_iova_get_pte_index(iova) * PT_ENTRY_SIZE; + + if (!sun50i_pte_is_page_valid(*pte_addr)) + return 0; + + memset(pte_addr, 0, sizeof(*pte_addr)); + sun50i_table_flush(sun50i_domain, pte_addr, 1); + + return SZ_4K; +} + +static phys_addr_t sun50i_iommu_iova_to_phys(struct iommu_domain *domain, + dma_addr_t iova) +{ + struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain); + phys_addr_t pt_phys; + u32 *page_table; + u32 dte, pte; + + dte = sun50i_domain->dt[sun50i_iova_get_dte_index(iova)]; + if (!sun50i_dte_is_pt_valid(dte)) + return 0; + + pt_phys = sun50i_dte_get_pt_address(dte); + page_table = (u32 *)phys_to_virt(pt_phys); + pte = page_table[sun50i_iova_get_pte_index(iova)]; + if (!sun50i_pte_is_page_valid(pte)) + return 0; + + return sun50i_pte_get_page_address(pte) + + sun50i_iova_get_page_offset(iova); +} + +static struct iommu_domain *sun50i_iommu_domain_alloc(unsigned type) +{ + struct sun50i_iommu_domain *sun50i_domain; + + if (type != IOMMU_DOMAIN_DMA && + type != IOMMU_DOMAIN_IDENTITY && + type != IOMMU_DOMAIN_UNMANAGED) + return NULL; + + sun50i_domain = kzalloc(sizeof(*sun50i_domain), GFP_KERNEL); + if (!sun50i_domain) + return NULL; + + if (type == IOMMU_DOMAIN_DMA && + iommu_get_dma_cookie(&sun50i_domain->domain)) + goto err_free_domain; + + sun50i_domain->dt = (u32 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + get_order(DT_SIZE)); + if (!sun50i_domain->dt) + goto err_put_cookie; + + refcount_set(&sun50i_domain->refcnt, 1); + + sun50i_domain->domain.geometry.aperture_start = 0; + sun50i_domain->domain.geometry.aperture_end = DMA_BIT_MASK(32); + sun50i_domain->domain.geometry.force_aperture = true; + + return &sun50i_domain->domain; + +err_put_cookie: + if (type == IOMMU_DOMAIN_DMA) + iommu_put_dma_cookie(&sun50i_domain->domain); + +err_free_domain: + kfree(sun50i_domain); + + return NULL; +} + +static void sun50i_iommu_domain_free(struct iommu_domain *domain) +{ + struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain); + + free_pages((unsigned long)sun50i_domain->dt, get_order(DT_SIZE)); + sun50i_domain->dt = NULL; + + iommu_put_dma_cookie(domain); + + kfree(sun50i_domain); +} + +static int sun50i_iommu_attach_domain(struct sun50i_iommu *iommu, + struct sun50i_iommu_domain *sun50i_domain) +{ + iommu->domain = &sun50i_domain->domain; + sun50i_domain->iommu = iommu; + + sun50i_domain->dt_dma = dma_map_single(iommu->dev, sun50i_domain->dt, + DT_SIZE, DMA_TO_DEVICE); + if (dma_mapping_error(iommu->dev, sun50i_domain->dt_dma)) { + dev_err(iommu->dev, "Couldn't map L1 Page Table\n"); + return -ENOMEM; + } + + return sun50i_iommu_enable(iommu); +} + +static void sun50i_iommu_detach_domain(struct sun50i_iommu *iommu, + struct sun50i_iommu_domain *sun50i_domain) +{ + unsigned int i; + + for (i = 0; i < NUM_DT_ENTRIES; i++) { + phys_addr_t pt_phys; + u32 *page_table; + u32 *dte_addr; + u32 dte; + + dte_addr = &sun50i_domain->dt[i]; + dte = *dte_addr; + if (!sun50i_dte_is_pt_valid(dte)) + continue; + + memset(dte_addr, 0, sizeof(*dte_addr)); + sun50i_table_flush(sun50i_domain, dte_addr, 1); + + pt_phys = sun50i_dte_get_pt_address(dte); + page_table = phys_to_virt(pt_phys); + sun50i_iommu_free_page_table(iommu, page_table); + } + + + sun50i_iommu_disable(iommu); + + dma_unmap_single(iommu->dev, virt_to_phys(sun50i_domain->dt), + DT_SIZE, DMA_TO_DEVICE); + + iommu->domain = NULL; +} + +static void sun50i_iommu_detach_device(struct iommu_domain *domain, + struct device *dev) +{ + struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain); + struct sun50i_iommu *iommu = dev_iommu_priv_get(dev); + + dev_dbg(dev, "Detaching from IOMMU domain\n"); + + if (iommu->domain != domain) + return; + + if (refcount_dec_and_test(&sun50i_domain->refcnt)) + sun50i_iommu_detach_domain(iommu, sun50i_domain); +} + +static int sun50i_iommu_attach_device(struct iommu_domain *domain, + struct device *dev) +{ + struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain); + struct sun50i_iommu *iommu; + + iommu = sun50i_iommu_from_dev(dev); + if (!iommu) + return -ENODEV; + + dev_dbg(dev, "Attaching to IOMMU domain\n"); + + refcount_inc(&sun50i_domain->refcnt); + + if (iommu->domain == domain) + return 0; + + if (iommu->domain) + sun50i_iommu_detach_device(iommu->domain, dev); + + sun50i_iommu_attach_domain(iommu, sun50i_domain); + + return 0; +} + +static struct iommu_device *sun50i_iommu_probe_device(struct device *dev) +{ + struct sun50i_iommu *iommu; + + iommu = sun50i_iommu_from_dev(dev); + if (!iommu) + return ERR_PTR(-ENODEV); + + return &iommu->iommu; +} + +static void sun50i_iommu_release_device(struct device *dev) {} + +static struct iommu_group *sun50i_iommu_device_group(struct device *dev) +{ + struct sun50i_iommu *iommu = sun50i_iommu_from_dev(dev); + + return iommu_group_ref_get(iommu->group); +} + +static int sun50i_iommu_of_xlate(struct device *dev, + struct of_phandle_args *args) +{ + struct platform_device *iommu_pdev = of_find_device_by_node(args->np); + unsigned id = args->args[0]; + + dev_iommu_priv_set(dev, platform_get_drvdata(iommu_pdev)); + + return iommu_fwspec_add_ids(dev, &id, 1); +} + +static const struct iommu_ops sun50i_iommu_ops = { + .pgsize_bitmap = SZ_4K, + .attach_dev = sun50i_iommu_attach_device, + .detach_dev = sun50i_iommu_detach_device, + .device_group = sun50i_iommu_device_group, + .domain_alloc = sun50i_iommu_domain_alloc, + .domain_free = sun50i_iommu_domain_free, + .flush_iotlb_all = sun50i_iommu_flush_iotlb_all, + .iotlb_sync = sun50i_iommu_iotlb_sync, + .iova_to_phys = sun50i_iommu_iova_to_phys, + .map = sun50i_iommu_map, + .of_xlate = sun50i_iommu_of_xlate, + .probe_device = sun50i_iommu_probe_device, + .release_device = sun50i_iommu_release_device, + .unmap = sun50i_iommu_unmap, +}; + +static void sun50i_iommu_report_fault(struct sun50i_iommu *iommu, + unsigned master, phys_addr_t iova, + unsigned prot) +{ + dev_err(iommu->dev, "Page fault for %pad (master %d, dir %s)\n", + &iova, master, (prot == IOMMU_FAULT_WRITE) ? "wr" : "rd"); + + if (iommu->domain) + report_iommu_fault(iommu->domain, iommu->dev, iova, prot); + else + dev_err(iommu->dev, "Page fault while iommu not attached to any domain?\n"); +} + +static phys_addr_t sun50i_iommu_handle_pt_irq(struct sun50i_iommu *iommu, + unsigned addr_reg, + unsigned blame_reg) +{ + phys_addr_t iova; + unsigned master; + u32 blame; + + assert_spin_locked(&iommu->iommu_lock); + + iova = iommu_read(iommu, addr_reg); + blame = iommu_read(iommu, blame_reg); + master = ilog2(blame & IOMMU_INT_MASTER_MASK); + + /* + * If the address is not in the page table, we can't get what + * operation triggered the fault. Assume it's a read + * operation. + */ + sun50i_iommu_report_fault(iommu, master, iova, IOMMU_FAULT_READ); + + return iova; +} + +static phys_addr_t sun50i_iommu_handle_perm_irq(struct sun50i_iommu *iommu) +{ + enum sun50i_iommu_aci aci; + phys_addr_t iova; + unsigned master; + unsigned dir; + u32 blame; + + assert_spin_locked(&iommu->iommu_lock); + + blame = iommu_read(iommu, IOMMU_INT_STA_REG); + master = ilog2(blame & IOMMU_INT_MASTER_MASK); + iova = iommu_read(iommu, IOMMU_INT_ERR_ADDR_REG(master)); + aci = sun50i_get_pte_aci(iommu_read(iommu, + IOMMU_INT_ERR_DATA_REG(master))); + + switch (aci) { + /* + * If we are in the read-only domain, then it means we + * tried to write. + */ + case SUN50I_IOMMU_ACI_RD: + dir = IOMMU_FAULT_WRITE; + break; + + /* + * If we are in the write-only domain, then it means + * we tried to read. + */ + case SUN50I_IOMMU_ACI_WR: + + /* + * If we are in the domain without any permission, we + * can't really tell. Let's default to a read + * operation. + */ + case SUN50I_IOMMU_ACI_NONE: + + /* WTF? */ + case SUN50I_IOMMU_ACI_RD_WR: + default: + dir = IOMMU_FAULT_READ; + break; + } + + /* + * If the address is not in the page table, we can't get what + * operation triggered the fault. Assume it's a read + * operation. + */ + sun50i_iommu_report_fault(iommu, master, iova, dir); + + return iova; +} + +static irqreturn_t sun50i_iommu_irq(int irq, void *dev_id) +{ + struct sun50i_iommu *iommu = dev_id; + phys_addr_t iova; + u32 status; + + spin_lock(&iommu->iommu_lock); + + status = iommu_read(iommu, IOMMU_INT_STA_REG); + if (!(status & IOMMU_INT_MASK)) { + spin_unlock(&iommu->iommu_lock); + return IRQ_NONE; + } + + if (status & IOMMU_INT_INVALID_L2PG) + iova = sun50i_iommu_handle_pt_irq(iommu, + IOMMU_INT_ERR_ADDR_L2_REG, + IOMMU_L2PG_INT_REG); + else if (status & IOMMU_INT_INVALID_L1PG) + iova = sun50i_iommu_handle_pt_irq(iommu, + IOMMU_INT_ERR_ADDR_L1_REG, + IOMMU_L1PG_INT_REG); + else + iova = sun50i_iommu_handle_perm_irq(iommu); + + iommu_write(iommu, IOMMU_INT_CLR_REG, status); + + iommu_write(iommu, IOMMU_RESET_REG, ~status); + iommu_write(iommu, IOMMU_RESET_REG, status); + + spin_unlock(&iommu->iommu_lock); + + return IRQ_HANDLED; +} + +static int sun50i_iommu_probe(struct platform_device *pdev) +{ + struct sun50i_iommu *iommu; + int ret, irq; + + iommu = devm_kzalloc(&pdev->dev, sizeof(*iommu), GFP_KERNEL); + if (!iommu) + return -ENOMEM; + spin_lock_init(&iommu->iommu_lock); + platform_set_drvdata(pdev, iommu); + iommu->dev = &pdev->dev; + + iommu->pt_pool = kmem_cache_create(dev_name(&pdev->dev), + PT_SIZE, PT_SIZE, + SLAB_HWCACHE_ALIGN, + NULL); + if (!iommu->pt_pool) + return -ENOMEM; + + iommu->group = iommu_group_alloc(); + if (IS_ERR(iommu->group)) { + ret = PTR_ERR(iommu->group); + goto err_free_cache; + } + + iommu->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(iommu->base)) { + ret = PTR_ERR(iommu->base); + goto err_free_group; + } + + irq = platform_get_irq(pdev, 0); + if (irq < 0) { + ret = irq; + goto err_free_group; + } + + iommu->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(iommu->clk)) { + dev_err(&pdev->dev, "Couldn't get our clock.\n"); + ret = PTR_ERR(iommu->clk); + goto err_free_group; + } + + iommu->reset = devm_reset_control_get(&pdev->dev, NULL); + if (IS_ERR(iommu->reset)) { + dev_err(&pdev->dev, "Couldn't get our reset line.\n"); + ret = PTR_ERR(iommu->reset); + goto err_free_group; + } + + ret = iommu_device_sysfs_add(&iommu->iommu, &pdev->dev, + NULL, dev_name(&pdev->dev)); + if (ret) + goto err_free_group; + + iommu_device_set_ops(&iommu->iommu, &sun50i_iommu_ops); + iommu_device_set_fwnode(&iommu->iommu, &pdev->dev.of_node->fwnode); + + ret = iommu_device_register(&iommu->iommu); + if (ret) + goto err_remove_sysfs; + + ret = devm_request_irq(&pdev->dev, irq, sun50i_iommu_irq, 0, + dev_name(&pdev->dev), iommu); + if (ret < 0) + goto err_unregister; + + bus_set_iommu(&platform_bus_type, &sun50i_iommu_ops); + + return 0; + +err_unregister: + iommu_device_unregister(&iommu->iommu); + +err_remove_sysfs: + iommu_device_sysfs_remove(&iommu->iommu); + +err_free_group: + iommu_group_put(iommu->group); + +err_free_cache: + kmem_cache_destroy(iommu->pt_pool); + + return ret; +} + +static const struct of_device_id sun50i_iommu_dt[] = { + { .compatible = "allwinner,sun50i-h6-iommu", }, + { /* sentinel */ }, +}; +MODULE_DEVICE_TABLE(of, sun50i_iommu_dt); + +static struct platform_driver sun50i_iommu_driver = { + .driver = { + .name = "sun50i-iommu", + .of_match_table = sun50i_iommu_dt, + .suppress_bind_attrs = true, + } +}; +builtin_platform_driver_probe(sun50i_iommu_driver, sun50i_iommu_probe); + +MODULE_DESCRIPTION("Allwinner H6 IOMMU driver"); +MODULE_AUTHOR("Maxime Ripard <maxime@cerno.tech>"); +MODULE_AUTHOR("zhuxianbin <zhuxianbin@allwinnertech.com>"); +MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c index db6559e8336f..5fbdff6ff41a 100644 --- a/drivers/iommu/tegra-gart.c +++ b/drivers/iommu/tegra-gart.c @@ -243,28 +243,16 @@ static bool gart_iommu_capable(enum iommu_cap cap) return false; } -static int gart_iommu_add_device(struct device *dev) +static struct iommu_device *gart_iommu_probe_device(struct device *dev) { - struct iommu_group *group; - if (!dev_iommu_fwspec_get(dev)) - return -ENODEV; - - group = iommu_group_get_for_dev(dev); - if (IS_ERR(group)) - return PTR_ERR(group); - - iommu_group_put(group); + return ERR_PTR(-ENODEV); - iommu_device_link(&gart_handle->iommu, dev); - - return 0; + return &gart_handle->iommu; } -static void gart_iommu_remove_device(struct device *dev) +static void gart_iommu_release_device(struct device *dev) { - iommu_group_remove_device(dev); - iommu_device_unlink(&gart_handle->iommu, dev); } static int gart_iommu_of_xlate(struct device *dev, @@ -290,8 +278,8 @@ static const struct iommu_ops gart_iommu_ops = { .domain_free = gart_iommu_domain_free, .attach_dev = gart_iommu_attach_dev, .detach_dev = gart_iommu_detach_dev, - .add_device = gart_iommu_add_device, - .remove_device = gart_iommu_remove_device, + .probe_device = gart_iommu_probe_device, + .release_device = gart_iommu_release_device, .device_group = generic_device_group, .map = gart_iommu_map, .unmap = gart_iommu_unmap, diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 63a147b623e6..7426b7666e2b 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -757,11 +757,10 @@ static int tegra_smmu_configure(struct tegra_smmu *smmu, struct device *dev, return 0; } -static int tegra_smmu_add_device(struct device *dev) +static struct iommu_device *tegra_smmu_probe_device(struct device *dev) { struct device_node *np = dev->of_node; struct tegra_smmu *smmu = NULL; - struct iommu_group *group; struct of_phandle_args args; unsigned int index = 0; int err; @@ -774,7 +773,7 @@ static int tegra_smmu_add_device(struct device *dev) of_node_put(args.np); if (err < 0) - return err; + return ERR_PTR(err); /* * Only a single IOMMU master interface is currently @@ -783,8 +782,6 @@ static int tegra_smmu_add_device(struct device *dev) */ dev->archdata.iommu = smmu; - iommu_device_link(&smmu->iommu, dev); - break; } @@ -793,26 +790,14 @@ static int tegra_smmu_add_device(struct device *dev) } if (!smmu) - return -ENODEV; - - group = iommu_group_get_for_dev(dev); - if (IS_ERR(group)) - return PTR_ERR(group); - - iommu_group_put(group); + return ERR_PTR(-ENODEV); - return 0; + return &smmu->iommu; } -static void tegra_smmu_remove_device(struct device *dev) +static void tegra_smmu_release_device(struct device *dev) { - struct tegra_smmu *smmu = dev->archdata.iommu; - - if (smmu) - iommu_device_unlink(&smmu->iommu, dev); - dev->archdata.iommu = NULL; - iommu_group_remove_device(dev); } static const struct tegra_smmu_group_soc * @@ -895,8 +880,8 @@ static const struct iommu_ops tegra_smmu_ops = { .domain_free = tegra_smmu_domain_free, .attach_dev = tegra_smmu_attach_dev, .detach_dev = tegra_smmu_detach_dev, - .add_device = tegra_smmu_add_device, - .remove_device = tegra_smmu_remove_device, + .probe_device = tegra_smmu_probe_device, + .release_device = tegra_smmu_release_device, .device_group = tegra_smmu_device_group, .map = tegra_smmu_map, .unmap = tegra_smmu_unmap, @@ -1015,7 +1000,7 @@ struct tegra_smmu *tegra_smmu_probe(struct device *dev, * value. However the IOMMU registration process will attempt to add * all devices to the IOMMU when bus_set_iommu() is called. In order * not to rely on global variables to track the IOMMU instance, we - * set it here so that it can be looked up from the .add_device() + * set it here so that it can be looked up from the .probe_device() * callback via the IOMMU device's .drvdata field. */ mc->smmu = smmu; diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c index 4e1d11af23c8..f6f07489a9aa 100644 --- a/drivers/iommu/virtio-iommu.c +++ b/drivers/iommu/virtio-iommu.c @@ -865,24 +865,23 @@ static struct viommu_dev *viommu_get_by_fwnode(struct fwnode_handle *fwnode) return dev ? dev_to_virtio(dev)->priv : NULL; } -static int viommu_add_device(struct device *dev) +static struct iommu_device *viommu_probe_device(struct device *dev) { int ret; - struct iommu_group *group; struct viommu_endpoint *vdev; struct viommu_dev *viommu = NULL; struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); if (!fwspec || fwspec->ops != &viommu_ops) - return -ENODEV; + return ERR_PTR(-ENODEV); viommu = viommu_get_by_fwnode(fwspec->iommu_fwnode); if (!viommu) - return -ENODEV; + return ERR_PTR(-ENODEV); vdev = kzalloc(sizeof(*vdev), GFP_KERNEL); if (!vdev) - return -ENOMEM; + return ERR_PTR(-ENOMEM); vdev->dev = dev; vdev->viommu = viommu; @@ -896,45 +895,25 @@ static int viommu_add_device(struct device *dev) goto err_free_dev; } - ret = iommu_device_link(&viommu->iommu, dev); - if (ret) - goto err_free_dev; + return &viommu->iommu; - /* - * Last step creates a default domain and attaches to it. Everything - * must be ready. - */ - group = iommu_group_get_for_dev(dev); - if (IS_ERR(group)) { - ret = PTR_ERR(group); - goto err_unlink_dev; - } - - iommu_group_put(group); - - return PTR_ERR_OR_ZERO(group); - -err_unlink_dev: - iommu_device_unlink(&viommu->iommu, dev); err_free_dev: generic_iommu_put_resv_regions(dev, &vdev->resv_regions); kfree(vdev); - return ret; + return ERR_PTR(ret); } -static void viommu_remove_device(struct device *dev) +static void viommu_release_device(struct device *dev) { - struct viommu_endpoint *vdev; struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); + struct viommu_endpoint *vdev; if (!fwspec || fwspec->ops != &viommu_ops) return; vdev = dev_iommu_priv_get(dev); - iommu_group_remove_device(dev); - iommu_device_unlink(&vdev->viommu->iommu, dev); generic_iommu_put_resv_regions(dev, &vdev->resv_regions); kfree(vdev); } @@ -960,8 +939,8 @@ static struct iommu_ops viommu_ops = { .unmap = viommu_unmap, .iova_to_phys = viommu_iova_to_phys, .iotlb_sync = viommu_iotlb_sync, - .add_device = viommu_add_device, - .remove_device = viommu_remove_device, + .probe_device = viommu_probe_device, + .release_device = viommu_release_device, .device_group = viommu_device_group, .get_resv_regions = viommu_get_resv_regions, .put_resv_regions = generic_iommu_put_resv_regions, diff --git a/drivers/misc/uacce/uacce.c b/drivers/misc/uacce/uacce.c index d39307f060bd..107028e77ca3 100644 --- a/drivers/misc/uacce/uacce.c +++ b/drivers/misc/uacce/uacce.c @@ -90,109 +90,39 @@ static long uacce_fops_compat_ioctl(struct file *filep, } #endif -static int uacce_sva_exit(struct device *dev, struct iommu_sva *handle, - void *data) +static int uacce_bind_queue(struct uacce_device *uacce, struct uacce_queue *q) { - struct uacce_mm *uacce_mm = data; - struct uacce_queue *q; - - /* - * No new queue can be added concurrently because no caller can have a - * reference to this mm. But there may be concurrent calls to - * uacce_mm_put(), so we need the lock. - */ - mutex_lock(&uacce_mm->lock); - list_for_each_entry(q, &uacce_mm->queues, list) - uacce_put_queue(q); - uacce_mm->mm = NULL; - mutex_unlock(&uacce_mm->lock); + int pasid; + struct iommu_sva *handle; - return 0; -} - -static struct iommu_sva_ops uacce_sva_ops = { - .mm_exit = uacce_sva_exit, -}; - -static struct uacce_mm *uacce_mm_get(struct uacce_device *uacce, - struct uacce_queue *q, - struct mm_struct *mm) -{ - struct uacce_mm *uacce_mm = NULL; - struct iommu_sva *handle = NULL; - int ret; - - lockdep_assert_held(&uacce->mm_lock); - - list_for_each_entry(uacce_mm, &uacce->mm_list, list) { - if (uacce_mm->mm == mm) { - mutex_lock(&uacce_mm->lock); - list_add(&q->list, &uacce_mm->queues); - mutex_unlock(&uacce_mm->lock); - return uacce_mm; - } - } - - uacce_mm = kzalloc(sizeof(*uacce_mm), GFP_KERNEL); - if (!uacce_mm) - return NULL; + if (!(uacce->flags & UACCE_DEV_SVA)) + return 0; - if (uacce->flags & UACCE_DEV_SVA) { - /* - * Safe to pass an incomplete uacce_mm, since mm_exit cannot - * fire while we hold a reference to the mm. - */ - handle = iommu_sva_bind_device(uacce->parent, mm, uacce_mm); - if (IS_ERR(handle)) - goto err_free; + handle = iommu_sva_bind_device(uacce->parent, current->mm, NULL); + if (IS_ERR(handle)) + return PTR_ERR(handle); - ret = iommu_sva_set_ops(handle, &uacce_sva_ops); - if (ret) - goto err_unbind; - - uacce_mm->pasid = iommu_sva_get_pasid(handle); - if (uacce_mm->pasid == IOMMU_PASID_INVALID) - goto err_unbind; + pasid = iommu_sva_get_pasid(handle); + if (pasid == IOMMU_PASID_INVALID) { + iommu_sva_unbind_device(handle); + return -ENODEV; } - uacce_mm->mm = mm; - uacce_mm->handle = handle; - INIT_LIST_HEAD(&uacce_mm->queues); - mutex_init(&uacce_mm->lock); - list_add(&q->list, &uacce_mm->queues); - list_add(&uacce_mm->list, &uacce->mm_list); - - return uacce_mm; - -err_unbind: - if (handle) - iommu_sva_unbind_device(handle); -err_free: - kfree(uacce_mm); - return NULL; + q->handle = handle; + q->pasid = pasid; + return 0; } -static void uacce_mm_put(struct uacce_queue *q) +static void uacce_unbind_queue(struct uacce_queue *q) { - struct uacce_mm *uacce_mm = q->uacce_mm; - - lockdep_assert_held(&q->uacce->mm_lock); - - mutex_lock(&uacce_mm->lock); - list_del(&q->list); - mutex_unlock(&uacce_mm->lock); - - if (list_empty(&uacce_mm->queues)) { - if (uacce_mm->handle) - iommu_sva_unbind_device(uacce_mm->handle); - list_del(&uacce_mm->list); - kfree(uacce_mm); - } + if (!q->handle) + return; + iommu_sva_unbind_device(q->handle); + q->handle = NULL; } static int uacce_fops_open(struct inode *inode, struct file *filep) { - struct uacce_mm *uacce_mm = NULL; struct uacce_device *uacce; struct uacce_queue *q; int ret = 0; @@ -205,21 +135,16 @@ static int uacce_fops_open(struct inode *inode, struct file *filep) if (!q) return -ENOMEM; - mutex_lock(&uacce->mm_lock); - uacce_mm = uacce_mm_get(uacce, q, current->mm); - mutex_unlock(&uacce->mm_lock); - if (!uacce_mm) { - ret = -ENOMEM; + ret = uacce_bind_queue(uacce, q); + if (ret) goto out_with_mem; - } q->uacce = uacce; - q->uacce_mm = uacce_mm; if (uacce->ops->get_queue) { - ret = uacce->ops->get_queue(uacce, uacce_mm->pasid, q); + ret = uacce->ops->get_queue(uacce, q->pasid, q); if (ret < 0) - goto out_with_mm; + goto out_with_bond; } init_waitqueue_head(&q->wait); @@ -227,12 +152,14 @@ static int uacce_fops_open(struct inode *inode, struct file *filep) uacce->inode = inode; q->state = UACCE_Q_INIT; + mutex_lock(&uacce->queues_lock); + list_add(&q->list, &uacce->queues); + mutex_unlock(&uacce->queues_lock); + return 0; -out_with_mm: - mutex_lock(&uacce->mm_lock); - uacce_mm_put(q); - mutex_unlock(&uacce->mm_lock); +out_with_bond: + uacce_unbind_queue(q); out_with_mem: kfree(q); return ret; @@ -241,14 +168,12 @@ out_with_mem: static int uacce_fops_release(struct inode *inode, struct file *filep) { struct uacce_queue *q = filep->private_data; - struct uacce_device *uacce = q->uacce; + mutex_lock(&q->uacce->queues_lock); + list_del(&q->list); + mutex_unlock(&q->uacce->queues_lock); uacce_put_queue(q); - - mutex_lock(&uacce->mm_lock); - uacce_mm_put(q); - mutex_unlock(&uacce->mm_lock); - + uacce_unbind_queue(q); kfree(q); return 0; @@ -513,8 +438,8 @@ struct uacce_device *uacce_alloc(struct device *parent, if (ret < 0) goto err_with_uacce; - INIT_LIST_HEAD(&uacce->mm_list); - mutex_init(&uacce->mm_lock); + INIT_LIST_HEAD(&uacce->queues); + mutex_init(&uacce->queues_lock); device_initialize(&uacce->dev); uacce->dev.devt = MKDEV(MAJOR(uacce_devt), uacce->dev_id); uacce->dev.class = uacce_class; @@ -561,8 +486,7 @@ EXPORT_SYMBOL_GPL(uacce_register); */ void uacce_remove(struct uacce_device *uacce) { - struct uacce_mm *uacce_mm; - struct uacce_queue *q; + struct uacce_queue *q, *next_q; if (!uacce) return; @@ -574,24 +498,12 @@ void uacce_remove(struct uacce_device *uacce) unmap_mapping_range(uacce->inode->i_mapping, 0, 0, 1); /* ensure no open queue remains */ - mutex_lock(&uacce->mm_lock); - list_for_each_entry(uacce_mm, &uacce->mm_list, list) { - /* - * We don't take the uacce_mm->lock here. Since we hold the - * device's mm_lock, no queue can be added to or removed from - * this uacce_mm. We may run concurrently with mm_exit, but - * uacce_put_queue() is serialized and iommu_sva_unbind_device() - * waits for the lock that mm_exit is holding. - */ - list_for_each_entry(q, &uacce_mm->queues, list) - uacce_put_queue(q); - - if (uacce->flags & UACCE_DEV_SVA) { - iommu_sva_unbind_device(uacce_mm->handle); - uacce_mm->handle = NULL; - } + mutex_lock(&uacce->queues_lock); + list_for_each_entry_safe(q, next_q, &uacce->queues, list) { + uacce_put_queue(q); + uacce_unbind_queue(q); } - mutex_unlock(&uacce->mm_lock); + mutex_unlock(&uacce->queues_lock); /* disable sva now since no opened queues */ if (uacce->flags & UACCE_DEV_SVA) diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c index 390e92f2d8d1..b761c1f72f67 100644 --- a/drivers/pci/ats.c +++ b/drivers/pci/ats.c @@ -31,6 +31,22 @@ void pci_ats_init(struct pci_dev *dev) } /** + * pci_ats_supported - check if the device can use ATS + * @dev: the PCI device + * + * Returns true if the device supports ATS and is allowed to use it, false + * otherwise. + */ +bool pci_ats_supported(struct pci_dev *dev) +{ + if (!dev->ats_cap) + return false; + + return (dev->untrusted == 0); +} +EXPORT_SYMBOL_GPL(pci_ats_supported); + +/** * pci_enable_ats - enable the ATS capability * @dev: the PCI device * @ps: the IOMMU page shift @@ -42,7 +58,7 @@ int pci_enable_ats(struct pci_dev *dev, int ps) u16 ctrl; struct pci_dev *pdev; - if (!dev->ats_cap) + if (!pci_ats_supported(dev)) return -EINVAL; if (WARN_ON(dev->ats_enabled)) |