summaryrefslogtreecommitdiffstats
path: root/drivers/iommu/intel
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/iommu/intel')
-rw-r--r--drivers/iommu/intel/Makefile2
-rw-r--r--drivers/iommu/intel/cache.c419
-rw-r--r--drivers/iommu/intel/debugfs.c7
-rw-r--r--drivers/iommu/intel/dmar.c26
-rw-r--r--drivers/iommu/intel/iommu.c383
-rw-r--r--drivers/iommu/intel/iommu.h88
-rw-r--r--drivers/iommu/intel/irq_remapping.c148
-rw-r--r--drivers/iommu/intel/nested.c69
-rw-r--r--drivers/iommu/intel/pasid.c18
-rw-r--r--drivers/iommu/intel/perf.h1
-rw-r--r--drivers/iommu/intel/svm.c383
-rw-r--r--drivers/iommu/intel/trace.h103
12 files changed, 890 insertions, 757 deletions
diff --git a/drivers/iommu/intel/Makefile b/drivers/iommu/intel/Makefile
index 5402b699a122..c8beb0281559 100644
--- a/drivers/iommu/intel/Makefile
+++ b/drivers/iommu/intel/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_DMAR_TABLE) += dmar.o
-obj-$(CONFIG_INTEL_IOMMU) += iommu.o pasid.o nested.o
+obj-$(CONFIG_INTEL_IOMMU) += iommu.o pasid.o nested.o cache.o
obj-$(CONFIG_DMAR_TABLE) += trace.o cap_audit.o
obj-$(CONFIG_DMAR_PERF) += perf.o
obj-$(CONFIG_INTEL_IOMMU_DEBUGFS) += debugfs.o
diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c
new file mode 100644
index 000000000000..e8418cdd8331
--- /dev/null
+++ b/drivers/iommu/intel/cache.c
@@ -0,0 +1,419 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * cache.c - Intel VT-d cache invalidation
+ *
+ * Copyright (C) 2024 Intel Corporation
+ *
+ * Author: Lu Baolu <baolu.lu@linux.intel.com>
+ */
+
+#define pr_fmt(fmt) "DMAR: " fmt
+
+#include <linux/dmar.h>
+#include <linux/iommu.h>
+#include <linux/memory.h>
+#include <linux/pci.h>
+#include <linux/spinlock.h>
+
+#include "iommu.h"
+#include "pasid.h"
+#include "trace.h"
+
+/* Check if an existing cache tag can be reused for a new association. */
+static bool cache_tage_match(struct cache_tag *tag, u16 domain_id,
+ struct intel_iommu *iommu, struct device *dev,
+ ioasid_t pasid, enum cache_tag_type type)
+{
+ if (tag->type != type)
+ return false;
+
+ if (tag->domain_id != domain_id || tag->pasid != pasid)
+ return false;
+
+ if (type == CACHE_TAG_IOTLB || type == CACHE_TAG_NESTING_IOTLB)
+ return tag->iommu == iommu;
+
+ if (type == CACHE_TAG_DEVTLB || type == CACHE_TAG_NESTING_DEVTLB)
+ return tag->dev == dev;
+
+ return false;
+}
+
+/* Assign a cache tag with specified type to domain. */
+static int cache_tag_assign(struct dmar_domain *domain, u16 did,
+ struct device *dev, ioasid_t pasid,
+ enum cache_tag_type type)
+{
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+ struct intel_iommu *iommu = info->iommu;
+ struct cache_tag *tag, *temp;
+ unsigned long flags;
+
+ tag = kzalloc(sizeof(*tag), GFP_KERNEL);
+ if (!tag)
+ return -ENOMEM;
+
+ tag->type = type;
+ tag->iommu = iommu;
+ tag->domain_id = did;
+ tag->pasid = pasid;
+ tag->users = 1;
+
+ if (type == CACHE_TAG_DEVTLB || type == CACHE_TAG_NESTING_DEVTLB)
+ tag->dev = dev;
+ else
+ tag->dev = iommu->iommu.dev;
+
+ spin_lock_irqsave(&domain->cache_lock, flags);
+ list_for_each_entry(temp, &domain->cache_tags, node) {
+ if (cache_tage_match(temp, did, iommu, dev, pasid, type)) {
+ temp->users++;
+ spin_unlock_irqrestore(&domain->cache_lock, flags);
+ kfree(tag);
+ trace_cache_tag_assign(temp);
+ return 0;
+ }
+ }
+ list_add_tail(&tag->node, &domain->cache_tags);
+ spin_unlock_irqrestore(&domain->cache_lock, flags);
+ trace_cache_tag_assign(tag);
+
+ return 0;
+}
+
+/* Unassign a cache tag with specified type from domain. */
+static void cache_tag_unassign(struct dmar_domain *domain, u16 did,
+ struct device *dev, ioasid_t pasid,
+ enum cache_tag_type type)
+{
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+ struct intel_iommu *iommu = info->iommu;
+ struct cache_tag *tag;
+ unsigned long flags;
+
+ spin_lock_irqsave(&domain->cache_lock, flags);
+ list_for_each_entry(tag, &domain->cache_tags, node) {
+ if (cache_tage_match(tag, did, iommu, dev, pasid, type)) {
+ trace_cache_tag_unassign(tag);
+ if (--tag->users == 0) {
+ list_del(&tag->node);
+ kfree(tag);
+ }
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&domain->cache_lock, flags);
+}
+
+static int __cache_tag_assign_domain(struct dmar_domain *domain, u16 did,
+ struct device *dev, ioasid_t pasid)
+{
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+ int ret;
+
+ ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_IOTLB);
+ if (ret || !info->ats_enabled)
+ return ret;
+
+ ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_DEVTLB);
+ if (ret)
+ cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_IOTLB);
+
+ return ret;
+}
+
+static void __cache_tag_unassign_domain(struct dmar_domain *domain, u16 did,
+ struct device *dev, ioasid_t pasid)
+{
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+
+ cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_IOTLB);
+
+ if (info->ats_enabled)
+ cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_DEVTLB);
+}
+
+static int __cache_tag_assign_parent_domain(struct dmar_domain *domain, u16 did,
+ struct device *dev, ioasid_t pasid)
+{
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+ int ret;
+
+ ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_NESTING_IOTLB);
+ if (ret || !info->ats_enabled)
+ return ret;
+
+ ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_NESTING_DEVTLB);
+ if (ret)
+ cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_NESTING_IOTLB);
+
+ return ret;
+}
+
+static void __cache_tag_unassign_parent_domain(struct dmar_domain *domain, u16 did,
+ struct device *dev, ioasid_t pasid)
+{
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+
+ cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_NESTING_IOTLB);
+
+ if (info->ats_enabled)
+ cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_NESTING_DEVTLB);
+}
+
+static u16 domain_get_id_for_dev(struct dmar_domain *domain, struct device *dev)
+{
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+ struct intel_iommu *iommu = info->iommu;
+
+ /*
+ * The driver assigns different domain IDs for all domains except
+ * the SVA type.
+ */
+ if (domain->domain.type == IOMMU_DOMAIN_SVA)
+ return FLPT_DEFAULT_DID;
+
+ return domain_id_iommu(domain, iommu);
+}
+
+/*
+ * Assign cache tags to a domain when it's associated with a device's
+ * PASID using a specific domain ID.
+ *
+ * On success (return value of 0), cache tags are created and added to the
+ * domain's cache tag list. On failure (negative return value), an error
+ * code is returned indicating the reason for the failure.
+ */
+int cache_tag_assign_domain(struct dmar_domain *domain,
+ struct device *dev, ioasid_t pasid)
+{
+ u16 did = domain_get_id_for_dev(domain, dev);
+ int ret;
+
+ ret = __cache_tag_assign_domain(domain, did, dev, pasid);
+ if (ret || domain->domain.type != IOMMU_DOMAIN_NESTED)
+ return ret;
+
+ ret = __cache_tag_assign_parent_domain(domain->s2_domain, did, dev, pasid);
+ if (ret)
+ __cache_tag_unassign_domain(domain, did, dev, pasid);
+
+ return ret;
+}
+
+/*
+ * Remove the cache tags associated with a device's PASID when the domain is
+ * detached from the device.
+ *
+ * The cache tags must be previously assigned to the domain by calling the
+ * assign interface.
+ */
+void cache_tag_unassign_domain(struct dmar_domain *domain,
+ struct device *dev, ioasid_t pasid)
+{
+ u16 did = domain_get_id_for_dev(domain, dev);
+
+ __cache_tag_unassign_domain(domain, did, dev, pasid);
+ if (domain->domain.type == IOMMU_DOMAIN_NESTED)
+ __cache_tag_unassign_parent_domain(domain->s2_domain, did, dev, pasid);
+}
+
+static unsigned long calculate_psi_aligned_address(unsigned long start,
+ unsigned long end,
+ unsigned long *_pages,
+ unsigned long *_mask)
+{
+ unsigned long pages = aligned_nrpages(start, end - start + 1);
+ unsigned long aligned_pages = __roundup_pow_of_two(pages);
+ unsigned long bitmask = aligned_pages - 1;
+ unsigned long mask = ilog2(aligned_pages);
+ unsigned long pfn = IOVA_PFN(start);
+
+ /*
+ * PSI masks the low order bits of the base address. If the
+ * address isn't aligned to the mask, then compute a mask value
+ * needed to ensure the target range is flushed.
+ */
+ if (unlikely(bitmask & pfn)) {
+ unsigned long end_pfn = pfn + pages - 1, shared_bits;
+
+ /*
+ * Since end_pfn <= pfn + bitmask, the only way bits
+ * higher than bitmask can differ in pfn and end_pfn is
+ * by carrying. This means after masking out bitmask,
+ * high bits starting with the first set bit in
+ * shared_bits are all equal in both pfn and end_pfn.
+ */
+ shared_bits = ~(pfn ^ end_pfn) & ~bitmask;
+ mask = shared_bits ? __ffs(shared_bits) : BITS_PER_LONG;
+ }
+
+ *_pages = aligned_pages;
+ *_mask = mask;
+
+ return ALIGN_DOWN(start, VTD_PAGE_SIZE << mask);
+}
+
+/*
+ * Invalidates a range of IOVA from @start (inclusive) to @end (inclusive)
+ * when the memory mappings in the target domain have been modified.
+ */
+void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
+ unsigned long end, int ih)
+{
+ unsigned long pages, mask, addr;
+ struct cache_tag *tag;
+ unsigned long flags;
+
+ addr = calculate_psi_aligned_address(start, end, &pages, &mask);
+
+ spin_lock_irqsave(&domain->cache_lock, flags);
+ list_for_each_entry(tag, &domain->cache_tags, node) {
+ struct intel_iommu *iommu = tag->iommu;
+ struct device_domain_info *info;
+ u16 sid;
+
+ switch (tag->type) {
+ case CACHE_TAG_IOTLB:
+ case CACHE_TAG_NESTING_IOTLB:
+ if (domain->use_first_level) {
+ qi_flush_piotlb(iommu, tag->domain_id,
+ tag->pasid, addr, pages, ih);
+ } else {
+ /*
+ * Fallback to domain selective flush if no
+ * PSI support or the size is too big.
+ */
+ if (!cap_pgsel_inv(iommu->cap) ||
+ mask > cap_max_amask_val(iommu->cap))
+ iommu->flush.flush_iotlb(iommu, tag->domain_id,
+ 0, 0, DMA_TLB_DSI_FLUSH);
+ else
+ iommu->flush.flush_iotlb(iommu, tag->domain_id,
+ addr | ih, mask,
+ DMA_TLB_PSI_FLUSH);
+ }
+ break;
+ case CACHE_TAG_NESTING_DEVTLB:
+ /*
+ * Address translation cache in device side caches the
+ * result of nested translation. There is no easy way
+ * to identify the exact set of nested translations
+ * affected by a change in S2. So just flush the entire
+ * device cache.
+ */
+ addr = 0;
+ mask = MAX_AGAW_PFN_WIDTH;
+ fallthrough;
+ case CACHE_TAG_DEVTLB:
+ info = dev_iommu_priv_get(tag->dev);
+ sid = PCI_DEVID(info->bus, info->devfn);
+
+ if (tag->pasid == IOMMU_NO_PASID)
+ qi_flush_dev_iotlb(iommu, sid, info->pfsid,
+ info->ats_qdep, addr, mask);
+ else
+ qi_flush_dev_iotlb_pasid(iommu, sid, info->pfsid,
+ tag->pasid, info->ats_qdep,
+ addr, mask);
+
+ quirk_extra_dev_tlb_flush(info, addr, mask, tag->pasid, info->ats_qdep);
+ break;
+ }
+
+ trace_cache_tag_flush_range(tag, start, end, addr, pages, mask);
+ }
+ spin_unlock_irqrestore(&domain->cache_lock, flags);
+}
+
+/*
+ * Invalidates all ranges of IOVA when the memory mappings in the target
+ * domain have been modified.
+ */
+void cache_tag_flush_all(struct dmar_domain *domain)
+{
+ struct cache_tag *tag;
+ unsigned long flags;
+
+ spin_lock_irqsave(&domain->cache_lock, flags);
+ list_for_each_entry(tag, &domain->cache_tags, node) {
+ struct intel_iommu *iommu = tag->iommu;
+ struct device_domain_info *info;
+ u16 sid;
+
+ switch (tag->type) {
+ case CACHE_TAG_IOTLB:
+ case CACHE_TAG_NESTING_IOTLB:
+ if (domain->use_first_level)
+ qi_flush_piotlb(iommu, tag->domain_id,
+ tag->pasid, 0, -1, 0);
+ else
+ iommu->flush.flush_iotlb(iommu, tag->domain_id,
+ 0, 0, DMA_TLB_DSI_FLUSH);
+ break;
+ case CACHE_TAG_DEVTLB:
+ case CACHE_TAG_NESTING_DEVTLB:
+ info = dev_iommu_priv_get(tag->dev);
+ sid = PCI_DEVID(info->bus, info->devfn);
+
+ qi_flush_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep,
+ 0, MAX_AGAW_PFN_WIDTH);
+ quirk_extra_dev_tlb_flush(info, 0, MAX_AGAW_PFN_WIDTH,
+ IOMMU_NO_PASID, info->ats_qdep);
+ break;
+ }
+
+ trace_cache_tag_flush_all(tag);
+ }
+ spin_unlock_irqrestore(&domain->cache_lock, flags);
+}
+
+/*
+ * Invalidate a range of IOVA when new mappings are created in the target
+ * domain.
+ *
+ * - VT-d spec, Section 6.1 Caching Mode: When the CM field is reported as
+ * Set, any software updates to remapping structures other than first-
+ * stage mapping requires explicit invalidation of the caches.
+ * - VT-d spec, Section 6.8 Write Buffer Flushing: For hardware that requires
+ * write buffer flushing, software must explicitly perform write-buffer
+ * flushing, if cache invalidation is not required.
+ */
+void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start,
+ unsigned long end)
+{
+ unsigned long pages, mask, addr;
+ struct cache_tag *tag;
+ unsigned long flags;
+
+ addr = calculate_psi_aligned_address(start, end, &pages, &mask);
+
+ spin_lock_irqsave(&domain->cache_lock, flags);
+ list_for_each_entry(tag, &domain->cache_tags, node) {
+ struct intel_iommu *iommu = tag->iommu;
+
+ if (!cap_caching_mode(iommu->cap) || domain->use_first_level) {
+ iommu_flush_write_buffer(iommu);
+ continue;
+ }
+
+ if (tag->type == CACHE_TAG_IOTLB ||
+ tag->type == CACHE_TAG_NESTING_IOTLB) {
+ /*
+ * Fallback to domain selective flush if no
+ * PSI support or the size is too big.
+ */
+ if (!cap_pgsel_inv(iommu->cap) ||
+ mask > cap_max_amask_val(iommu->cap))
+ iommu->flush.flush_iotlb(iommu, tag->domain_id,
+ 0, 0, DMA_TLB_DSI_FLUSH);
+ else
+ iommu->flush.flush_iotlb(iommu, tag->domain_id,
+ addr, mask,
+ DMA_TLB_PSI_FLUSH);
+ }
+
+ trace_cache_tag_flush_range_np(tag, start, end, addr, pages, mask);
+ }
+ spin_unlock_irqrestore(&domain->cache_lock, flags);
+}
diff --git a/drivers/iommu/intel/debugfs.c b/drivers/iommu/intel/debugfs.c
index 86b506af7daa..affbf4a1558d 100644
--- a/drivers/iommu/intel/debugfs.c
+++ b/drivers/iommu/intel/debugfs.c
@@ -706,7 +706,6 @@ static ssize_t dmar_perf_latency_write(struct file *filp,
dmar_latency_disable(iommu, DMAR_LATENCY_INV_IOTLB);
dmar_latency_disable(iommu, DMAR_LATENCY_INV_DEVTLB);
dmar_latency_disable(iommu, DMAR_LATENCY_INV_IEC);
- dmar_latency_disable(iommu, DMAR_LATENCY_PRQ);
}
rcu_read_unlock();
break;
@@ -728,12 +727,6 @@ static ssize_t dmar_perf_latency_write(struct file *filp,
dmar_latency_enable(iommu, DMAR_LATENCY_INV_IEC);
rcu_read_unlock();
break;
- case 4:
- rcu_read_lock();
- for_each_active_iommu(iommu, drhd)
- dmar_latency_enable(iommu, DMAR_LATENCY_PRQ);
- rcu_read_unlock();
- break;
default:
return -EINVAL;
}
diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c
index 36d7427b1202..304e84949ca7 100644
--- a/drivers/iommu/intel/dmar.c
+++ b/drivers/iommu/intel/dmar.c
@@ -32,6 +32,7 @@
#include "iommu.h"
#include "../irq_remapping.h"
+#include "../iommu-pages.h"
#include "perf.h"
#include "trace.h"
#include "perfmon.h"
@@ -1067,7 +1068,6 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd)
goto error_free_seq_id;
}
- err = -EINVAL;
if (!cap_sagaw(iommu->cap) &&
(!ecap_smts(iommu->ecap) || ecap_slts(iommu->ecap))) {
pr_info("%s: No supported address widths. Not attempting DMA translation.\n",
@@ -1187,7 +1187,7 @@ static void free_iommu(struct intel_iommu *iommu)
}
if (iommu->qi) {
- free_page((unsigned long)iommu->qi->desc);
+ iommu_free_page(iommu->qi->desc);
kfree(iommu->qi->desc_status);
kfree(iommu->qi);
}
@@ -1755,7 +1755,8 @@ static void __dmar_enable_qi(struct intel_iommu *iommu)
int dmar_enable_qi(struct intel_iommu *iommu)
{
struct q_inval *qi;
- struct page *desc_page;
+ void *desc;
+ int order;
if (!ecap_qis(iommu->ecap))
return -ENOENT;
@@ -1776,19 +1777,19 @@ int dmar_enable_qi(struct intel_iommu *iommu)
* Need two pages to accommodate 256 descriptors of 256 bits each
* if the remapping hardware supports scalable mode translation.
*/
- desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO,
- !!ecap_smts(iommu->ecap));
- if (!desc_page) {
+ order = ecap_smts(iommu->ecap) ? 1 : 0;
+ desc = iommu_alloc_pages_node(iommu->node, GFP_ATOMIC, order);
+ if (!desc) {
kfree(qi);
iommu->qi = NULL;
return -ENOMEM;
}
- qi->desc = page_address(desc_page);
+ qi->desc = desc;
qi->desc_status = kcalloc(QI_LENGTH, sizeof(int), GFP_ATOMIC);
if (!qi->desc_status) {
- free_page((unsigned long) qi->desc);
+ iommu_free_page(qi->desc);
kfree(qi);
iommu->qi = NULL;
return -ENOMEM;
@@ -2122,7 +2123,7 @@ int dmar_set_interrupt(struct intel_iommu *iommu)
return ret;
}
-int __init enable_drhd_fault_handling(void)
+int enable_drhd_fault_handling(unsigned int cpu)
{
struct dmar_drhd_unit *drhd;
struct intel_iommu *iommu;
@@ -2132,7 +2133,12 @@ int __init enable_drhd_fault_handling(void)
*/
for_each_iommu(iommu, drhd) {
u32 fault_status;
- int ret = dmar_set_interrupt(iommu);
+ int ret;
+
+ if (iommu->irq || iommu->node != cpu_to_node(cpu))
+ continue;
+
+ ret = dmar_set_interrupt(iommu);
if (ret) {
pr_err("DRHD %Lx: failed to enable fault, interrupt, ret %d\n",
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index a7ecd90303dc..2e9811bf2a4e 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -27,6 +27,7 @@
#include "iommu.h"
#include "../dma-iommu.h"
#include "../irq_remapping.h"
+#include "../iommu-pages.h"
#include "pasid.h"
#include "cap_audit.h"
#include "perfmon.h"
@@ -54,11 +55,6 @@
__DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
-/* IO virtual address start page frame number */
-#define IOVA_START_PFN (1)
-
-#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
-
static void __init check_tylersburg_isoch(void);
static int rwbf_quirk;
@@ -221,12 +217,11 @@ int intel_iommu_sm = IS_ENABLED(CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON);
int intel_iommu_enabled = 0;
EXPORT_SYMBOL_GPL(intel_iommu_enabled);
-static int dmar_map_gfx = 1;
static int intel_iommu_superpage = 1;
static int iommu_identity_mapping;
static int iommu_skip_te_disable;
+static int disable_igfx_iommu;
-#define IDENTMAP_GFX 2
#define IDENTMAP_AZALIA 4
const struct iommu_ops intel_iommu_ops;
@@ -265,7 +260,7 @@ static int __init intel_iommu_setup(char *str)
no_platform_optin = 1;
pr_info("IOMMU disabled\n");
} else if (!strncmp(str, "igfx_off", 8)) {
- dmar_map_gfx = 0;
+ disable_igfx_iommu = 1;
pr_info("Disable GFX device mapping\n");
} else if (!strncmp(str, "forcedac", 8)) {
pr_warn("intel_iommu=forcedac deprecated; use iommu.forcedac instead\n");
@@ -298,22 +293,6 @@ static int __init intel_iommu_setup(char *str)
}
__setup("intel_iommu=", intel_iommu_setup);
-void *alloc_pgtable_page(int node, gfp_t gfp)
-{
- struct page *page;
- void *vaddr = NULL;
-
- page = alloc_pages_node(node, gfp | __GFP_ZERO, 0);
- if (page)
- vaddr = page_address(page);
- return vaddr;
-}
-
-void free_pgtable_page(void *vaddr)
-{
- free_page((unsigned long)vaddr);
-}
-
static int domain_type_is_si(struct dmar_domain *domain)
{
return domain->domain.type == IOMMU_DOMAIN_IDENTITY;
@@ -545,7 +524,7 @@ struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
if (!alloc)
return NULL;
- context = alloc_pgtable_page(iommu->node, GFP_ATOMIC);
+ context = iommu_alloc_page_node(iommu->node, GFP_ATOMIC);
if (!context)
return NULL;
@@ -719,17 +698,17 @@ static void free_context_table(struct intel_iommu *iommu)
for (i = 0; i < ROOT_ENTRY_NR; i++) {
context = iommu_context_addr(iommu, i, 0, 0);
if (context)
- free_pgtable_page(context);
+ iommu_free_page(context);
if (!sm_supported(iommu))
continue;
context = iommu_context_addr(iommu, i, 0x80, 0);
if (context)
- free_pgtable_page(context);
+ iommu_free_page(context);
}
- free_pgtable_page(iommu->root_entry);
+ iommu_free_page(iommu->root_entry);
iommu->root_entry = NULL;
}
@@ -865,9 +844,9 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
break;
if (!dma_pte_present(pte)) {
- uint64_t pteval;
+ uint64_t pteval, tmp;
- tmp_page = alloc_pgtable_page(domain->nid, gfp);
+ tmp_page = iommu_alloc_page_node(domain->nid, gfp);
if (!tmp_page)
return NULL;
@@ -877,9 +856,10 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
if (domain->use_first_level)
pteval |= DMA_FL_PTE_XD | DMA_FL_PTE_US | DMA_FL_PTE_ACCESS;
- if (cmpxchg64(&pte->val, 0ULL, pteval))
+ tmp = 0ULL;
+ if (!try_cmpxchg64(&pte->val, &tmp, pteval))
/* Someone else set it while we were thinking; use theirs. */
- free_pgtable_page(tmp_page);
+ iommu_free_page(tmp_page);
else
domain_flush_cache(domain, pte, sizeof(*pte));
}
@@ -992,7 +972,7 @@ static void dma_pte_free_level(struct dmar_domain *domain, int level,
last_pfn < level_pfn + level_size(level) - 1)) {
dma_clear_pte(pte);
domain_flush_cache(domain, pte, sizeof(*pte));
- free_pgtable_page(level_pte);
+ iommu_free_page(level_pte);
}
next:
pfn += level_size(level);
@@ -1016,7 +996,7 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain,
/* free pgd */
if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
- free_pgtable_page(domain->pgd);
+ iommu_free_page(domain->pgd);
domain->pgd = NULL;
}
}
@@ -1118,7 +1098,7 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu)
{
struct root_entry *root;
- root = alloc_pgtable_page(iommu->node, GFP_ATOMIC);
+ root = iommu_alloc_page_node(iommu->node, GFP_ATOMIC);
if (!root) {
pr_err("Allocating root entry for %s failed\n",
iommu->name);
@@ -1394,197 +1374,9 @@ static void __iommu_flush_dev_iotlb(struct device_domain_info *info,
quirk_extra_dev_tlb_flush(info, addr, mask, IOMMU_NO_PASID, qdep);
}
-static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
- u64 addr, unsigned mask)
-{
- struct dev_pasid_info *dev_pasid;
- struct device_domain_info *info;
- unsigned long flags;
-
- if (!domain->has_iotlb_device)
- return;
-
- spin_lock_irqsave(&domain->lock, flags);
- list_for_each_entry(info, &domain->devices, link)
- __iommu_flush_dev_iotlb(info, addr, mask);
-
- list_for_each_entry(dev_pasid, &domain->dev_pasids, link_domain) {
- info = dev_iommu_priv_get(dev_pasid->dev);
-
- if (!info->ats_enabled)
- continue;
-
- qi_flush_dev_iotlb_pasid(info->iommu,
- PCI_DEVID(info->bus, info->devfn),
- info->pfsid, dev_pasid->pasid,
- info->ats_qdep, addr,
- mask);
- }
- spin_unlock_irqrestore(&domain->lock, flags);
-}
-
-static void domain_flush_pasid_iotlb(struct intel_iommu *iommu,
- struct dmar_domain *domain, u64 addr,
- unsigned long npages, bool ih)
-{
- u16 did = domain_id_iommu(domain, iommu);
- struct dev_pasid_info *dev_pasid;
- unsigned long flags;
-
- spin_lock_irqsave(&domain->lock, flags);
- list_for_each_entry(dev_pasid, &domain->dev_pasids, link_domain)
- qi_flush_piotlb(iommu, did, dev_pasid->pasid, addr, npages, ih);
-
- if (!list_empty(&domain->devices))
- qi_flush_piotlb(iommu, did, IOMMU_NO_PASID, addr, npages, ih);
- spin_unlock_irqrestore(&domain->lock, flags);
-}
-
-static void __iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
- unsigned long pfn, unsigned int pages,
- int ih)
-{
- unsigned int aligned_pages = __roundup_pow_of_two(pages);
- unsigned long bitmask = aligned_pages - 1;
- unsigned int mask = ilog2(aligned_pages);
- u64 addr = (u64)pfn << VTD_PAGE_SHIFT;
-
- /*
- * PSI masks the low order bits of the base address. If the
- * address isn't aligned to the mask, then compute a mask value
- * needed to ensure the target range is flushed.
- */
- if (unlikely(bitmask & pfn)) {
- unsigned long end_pfn = pfn + pages - 1, shared_bits;
-
- /*
- * Since end_pfn <= pfn + bitmask, the only way bits
- * higher than bitmask can differ in pfn and end_pfn is
- * by carrying. This means after masking out bitmask,
- * high bits starting with the first set bit in
- * shared_bits are all equal in both pfn and end_pfn.
- */
- shared_bits = ~(pfn ^ end_pfn) & ~bitmask;
- mask = shared_bits ? __ffs(shared_bits) : BITS_PER_LONG;
- }
-
- /*
- * Fallback to domain selective flush if no PSI support or
- * the size is too big.
- */
- if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
- iommu->flush.flush_iotlb(iommu, did, 0, 0,
- DMA_TLB_DSI_FLUSH);
- else
- iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
- DMA_TLB_PSI_FLUSH);
-}
-
-static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
- struct dmar_domain *domain,
- unsigned long pfn, unsigned int pages,
- int ih, int map)
-{
- unsigned int aligned_pages = __roundup_pow_of_two(pages);
- unsigned int mask = ilog2(aligned_pages);
- uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
- u16 did = domain_id_iommu(domain, iommu);
-
- if (WARN_ON(!pages))
- return;
-
- if (ih)
- ih = 1 << 6;
-
- if (domain->use_first_level)
- domain_flush_pasid_iotlb(iommu, domain, addr, pages, ih);
- else
- __iommu_flush_iotlb_psi(iommu, did, pfn, pages, ih);
-
- /*
- * In caching mode, changes of pages from non-present to present require
- * flush. However, device IOTLB doesn't need to be flushed in this case.
- */
- if (!cap_caching_mode(iommu->cap) || !map)
- iommu_flush_dev_iotlb(domain, addr, mask);
-}
-
-/* Notification for newly created mappings */
-static void __mapping_notify_one(struct intel_iommu *iommu, struct dmar_domain *domain,
- unsigned long pfn, unsigned int pages)
-{
- /*
- * It's a non-present to present mapping. Only flush if caching mode
- * and second level.
- */
- if (cap_caching_mode(iommu->cap) && !domain->use_first_level)
- iommu_flush_iotlb_psi(iommu, domain, pfn, pages, 0, 1);
- else
- iommu_flush_write_buffer(iommu);
-}
-
-/*
- * Flush the relevant caches in nested translation if the domain
- * also serves as a parent
- */
-static void parent_domain_flush(struct dmar_domain *domain,
- unsigned long pfn,
- unsigned long pages, int ih)
-{
- struct dmar_domain *s1_domain;
-
- spin_lock(&domain->s1_lock);
- list_for_each_entry(s1_domain, &domain->s1_domains, s2_link) {
- struct device_domain_info *device_info;
- struct iommu_domain_info *info;
- unsigned long flags;
- unsigned long i;
-
- xa_for_each(&s1_domain->iommu_array, i, info)
- __iommu_flush_iotlb_psi(info->iommu, info->did,
- pfn, pages, ih);
-
- if (!s1_domain->has_iotlb_device)
- continue;
-
- spin_lock_irqsave(&s1_domain->lock, flags);
- list_for_each_entry(device_info, &s1_domain->devices, link)
- /*
- * Address translation cache in device side caches the
- * result of nested translation. There is no easy way
- * to identify the exact set of nested translations
- * affected by a change in S2. So just flush the entire
- * device cache.
- */
- __iommu_flush_dev_iotlb(device_info, 0,
- MAX_AGAW_PFN_WIDTH);
- spin_unlock_irqrestore(&s1_domain->lock, flags);
- }
- spin_unlock(&domain->s1_lock);
-}
-
static void intel_flush_iotlb_all(struct iommu_domain *domain)
{
- struct dmar_domain *dmar_domain = to_dmar_domain(domain);
- struct iommu_domain_info *info;
- unsigned long idx;
-
- xa_for_each(&dmar_domain->iommu_array, idx, info) {
- struct intel_iommu *iommu = info->iommu;
- u16 did = domain_id_iommu(dmar_domain, iommu);
-
- if (dmar_domain->use_first_level)
- domain_flush_pasid_iotlb(iommu, dmar_domain, 0, -1, 0);
- else
- iommu->flush.flush_iotlb(iommu, did, 0, 0,
- DMA_TLB_DSI_FLUSH);
-
- if (!cap_caching_mode(iommu->cap))
- iommu_flush_dev_iotlb(dmar_domain, 0, MAX_AGAW_PFN_WIDTH);
- }
-
- if (dmar_domain->nested_parent)
- parent_domain_flush(dmar_domain, 0, -1, 0);
+ cache_tag_flush_all(to_dmar_domain(domain));
}
static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
@@ -1750,7 +1542,9 @@ static struct dmar_domain *alloc_domain(unsigned int type)
domain->has_iotlb_device = false;
INIT_LIST_HEAD(&domain->devices);
INIT_LIST_HEAD(&domain->dev_pasids);
+ INIT_LIST_HEAD(&domain->cache_tags);
spin_lock_init(&domain->lock);
+ spin_lock_init(&domain->cache_lock);
xa_init(&domain->iommu_array);
return domain;
@@ -1762,6 +1556,9 @@ int domain_attach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu)
unsigned long ndomains;
int num, ret = -ENOSPC;
+ if (domain->domain.type == IOMMU_DOMAIN_SVA)
+ return 0;
+
info = kzalloc(sizeof(*info), GFP_KERNEL);
if (!info)
return -ENOMEM;
@@ -1809,6 +1606,9 @@ void domain_detach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu)
{
struct iommu_domain_info *info;
+ if (domain->domain.type == IOMMU_DOMAIN_SVA)
+ return;
+
spin_lock(&iommu->lock);
info = xa_load(&domain->iommu_array, iommu->seq_id);
if (--info->refcnt == 0) {
@@ -1841,7 +1641,7 @@ static void domain_exit(struct dmar_domain *domain)
LIST_HEAD(freelist);
domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw), &freelist);
- put_pages_list(&freelist);
+ iommu_put_pages_list(&freelist);
}
if (WARN_ON(!list_empty(&domain->devices)))
@@ -1988,13 +1788,6 @@ domain_context_mapping(struct dmar_domain *domain, struct device *dev)
domain_context_mapping_cb, domain);
}
-/* Returns a number of VTD pages, but aligned to MM page size */
-static unsigned long aligned_nrpages(unsigned long host_addr, size_t size)
-{
- host_addr &= ~PAGE_MASK;
- return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
-}
-
/* Return largest possible superpage level for a given mapping */
static int hardware_largepage_caps(struct dmar_domain *domain, unsigned long iov_pfn,
unsigned long phy_pfn, unsigned long pages)
@@ -2031,9 +1824,7 @@ static void switch_to_super_page(struct dmar_domain *domain,
unsigned long end_pfn, int level)
{
unsigned long lvl_pages = lvl_to_nr_pages(level);
- struct iommu_domain_info *info;
struct dma_pte *pte = NULL;
- unsigned long i;
while (start_pfn <= end_pfn) {
if (!pte)
@@ -2045,13 +1836,8 @@ static void switch_to_super_page(struct dmar_domain *domain,
start_pfn + lvl_pages - 1,
level + 1);
- xa_for_each(&domain->iommu_array, i, info)
- iommu_flush_iotlb_psi(info->iommu, domain,
- start_pfn, lvl_pages,
- 0, 0);
- if (domain->nested_parent)
- parent_domain_flush(domain, start_pfn,
- lvl_pages, 0);
+ cache_tag_flush_range(domain, start_pfn << VTD_PAGE_SHIFT,
+ end_pfn << VTD_PAGE_SHIFT, 0);
}
pte++;
@@ -2128,8 +1914,8 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
/* We don't need lock here, nobody else
* touches the iova range
*/
- tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
- if (tmp) {
+ tmp = 0ULL;
+ if (!try_cmpxchg64_local(&pte->val, &tmp, pteval)) {
static int dumps = 5;
pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
iov_pfn, tmp, (unsigned long long)pteval);
@@ -2327,6 +2113,13 @@ static int dmar_domain_attach_device(struct dmar_domain *domain,
ret = domain_attach_iommu(domain, iommu);
if (ret)
return ret;
+
+ ret = cache_tag_assign_domain(domain, dev, IOMMU_NO_PASID);
+ if (ret) {
+ domain_detach_iommu(domain, iommu);
+ return ret;
+ }
+
info->domain = domain;
spin_lock_irqsave(&domain->lock, flags);
list_add(&info->link, &domain->devices);
@@ -2402,9 +2195,6 @@ static int device_def_domain_type(struct device *dev)
if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
return IOMMU_DOMAIN_IDENTITY;
-
- if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
- return IOMMU_DOMAIN_IDENTITY;
}
return 0;
@@ -2497,7 +2287,7 @@ static int copy_context_table(struct intel_iommu *iommu,
if (!old_ce)
goto out;
- new_ce = alloc_pgtable_page(iommu->node, GFP_KERNEL);
+ new_ce = iommu_alloc_page_node(iommu->node, GFP_KERNEL);
if (!new_ce)
goto out_unmap;
@@ -2705,9 +2495,6 @@ static int __init init_dmars(void)
iommu_set_root_entry(iommu);
}
- if (!dmar_map_gfx)
- iommu_identity_mapping |= IDENTMAP_GFX;
-
check_tylersburg_isoch();
ret = si_domain_init(hw_pass_through);
@@ -2798,7 +2585,7 @@ static void __init init_no_remapping_devices(void)
/* This IOMMU has *only* gfx devices. Either bypass it or
set the gfx_mapped flag, as appropriate */
drhd->gfx_dedicated = 1;
- if (!dmar_map_gfx)
+ if (disable_igfx_iommu)
drhd->ignored = 1;
}
}
@@ -3414,19 +3201,10 @@ static int intel_iommu_memory_notifier(struct notifier_block *nb,
case MEM_OFFLINE:
case MEM_CANCEL_ONLINE:
{
- struct dmar_drhd_unit *drhd;
- struct intel_iommu *iommu;
LIST_HEAD(freelist);
domain_unmap(si_domain, start_vpfn, last_vpfn, &freelist);
-
- rcu_read_lock();
- for_each_active_iommu(iommu, drhd)
- iommu_flush_iotlb_psi(iommu, si_domain,
- start_vpfn, mhp->nr_pages,
- list_empty(&freelist), 0);
- rcu_read_unlock();
- put_pages_list(&freelist);
+ iommu_put_pages_list(&freelist);
}
break;
}
@@ -3815,6 +3593,7 @@ void device_block_translation(struct device *dev)
list_del(&info->link);
spin_unlock_irqrestore(&info->domain->lock, flags);
+ cache_tag_unassign_domain(info->domain, dev, IOMMU_NO_PASID);
domain_detach_iommu(info->domain, iommu);
info->domain = NULL;
}
@@ -3833,7 +3612,7 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width)
domain->max_addr = 0;
/* always allocate the top pgd */
- domain->pgd = alloc_pgtable_page(domain->nid, GFP_ATOMIC);
+ domain->pgd = iommu_alloc_page_node(domain->nid, GFP_ATOMIC);
if (!domain->pgd)
return -ENOMEM;
domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
@@ -3882,8 +3661,6 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
return domain;
case IOMMU_DOMAIN_IDENTITY:
return &si_domain->domain;
- case IOMMU_DOMAIN_SVA:
- return intel_svm_domain_alloc();
default:
return NULL;
}
@@ -3987,7 +3764,7 @@ int prepare_domain_attach_device(struct iommu_domain *domain,
pte = dmar_domain->pgd;
if (dma_pte_present(pte)) {
dmar_domain->pgd = phys_to_virt(dma_pte_addr(pte));
- free_pgtable_page(pte);
+ iommu_free_page(pte);
}
dmar_domain->agaw--;
}
@@ -4122,26 +3899,9 @@ static size_t intel_iommu_unmap_pages(struct iommu_domain *domain,
static void intel_iommu_tlb_sync(struct iommu_domain *domain,
struct iommu_iotlb_gather *gather)
{
- struct dmar_domain *dmar_domain = to_dmar_domain(domain);
- unsigned long iova_pfn = IOVA_PFN(gather->start);
- size_t size = gather->end - gather->start;
- struct iommu_domain_info *info;
- unsigned long start_pfn;
- unsigned long nrpages;
- unsigned long i;
-
- nrpages = aligned_nrpages(gather->start, size);
- start_pfn = mm_to_dma_pfn_start(iova_pfn);
-
- xa_for_each(&dmar_domain->iommu_array, i, info)
- iommu_flush_iotlb_psi(info->iommu, dmar_domain,
- start_pfn, nrpages,
- list_empty(&gather->freelist), 0);
-
- if (dmar_domain->nested_parent)
- parent_domain_flush(dmar_domain, start_pfn, nrpages,
- list_empty(&gather->freelist));
- put_pages_list(&gather->freelist);
+ cache_tag_flush_range(to_dmar_domain(domain), gather->start,
+ gather->end, list_empty(&gather->freelist));
+ iommu_put_pages_list(&gather->freelist);
}
static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
@@ -4352,12 +4112,6 @@ static void intel_iommu_release_device(struct device *dev)
set_dma_ops(dev, NULL);
}
-static void intel_iommu_probe_finalize(struct device *dev)
-{
- set_dma_ops(dev, NULL);
- iommu_setup_dma_ops(dev, 0, U64_MAX);
-}
-
static void intel_iommu_get_resv_regions(struct device *device,
struct list_head *head)
{
@@ -4579,41 +4333,20 @@ static bool risky_device(struct pci_dev *pdev)
static int intel_iommu_iotlb_sync_map(struct iommu_domain *domain,
unsigned long iova, size_t size)
{
- struct dmar_domain *dmar_domain = to_dmar_domain(domain);
- unsigned long pages = aligned_nrpages(iova, size);
- unsigned long pfn = iova >> VTD_PAGE_SHIFT;
- struct iommu_domain_info *info;
- unsigned long i;
+ cache_tag_flush_range_np(to_dmar_domain(domain), iova, iova + size - 1);
- xa_for_each(&dmar_domain->iommu_array, i, info)
- __mapping_notify_one(info->iommu, dmar_domain, pfn, pages);
return 0;
}
-static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid)
+static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid,
+ struct iommu_domain *domain)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
+ struct dmar_domain *dmar_domain = to_dmar_domain(domain);
struct dev_pasid_info *curr, *dev_pasid = NULL;
struct intel_iommu *iommu = info->iommu;
- struct dmar_domain *dmar_domain;
- struct iommu_domain *domain;
unsigned long flags;
- domain = iommu_get_domain_for_dev_pasid(dev, pasid, 0);
- if (WARN_ON_ONCE(!domain))
- goto out_tear_down;
-
- /*
- * The SVA implementation needs to handle its own stuffs like the mm
- * notification. Before consolidating that code into iommu core, let
- * the intel sva code handle it.
- */
- if (domain->type == IOMMU_DOMAIN_SVA) {
- intel_svm_remove_dev_pasid(dev, pasid);
- goto out_tear_down;
- }
-
- dmar_domain = to_dmar_domain(domain);
spin_lock_irqsave(&dmar_domain->lock, flags);
list_for_each_entry(curr, &dmar_domain->dev_pasids, link_domain) {
if (curr->dev == dev && curr->pasid == pasid) {
@@ -4625,10 +4358,10 @@ static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid)
WARN_ON_ONCE(!dev_pasid);
spin_unlock_irqrestore(&dmar_domain->lock, flags);
+ cache_tag_unassign_domain(dmar_domain, dev, pasid);
domain_detach_iommu(dmar_domain, iommu);
intel_iommu_debugfs_remove_dev_pasid(dev_pasid);
kfree(dev_pasid);
-out_tear_down:
intel_pasid_tear_down_entry(iommu, dev, pasid, false);
intel_drain_pasid_prq(dev, pasid);
}
@@ -4664,6 +4397,10 @@ static int intel_iommu_set_dev_pasid(struct iommu_domain *domain,
if (ret)
goto out_free;
+ ret = cache_tag_assign_domain(dmar_domain, dev, pasid);
+ if (ret)
+ goto out_detach_iommu;
+
if (domain_type_is_si(dmar_domain))
ret = intel_pasid_setup_pass_through(iommu, dev, pasid);
else if (dmar_domain->use_first_level)
@@ -4673,7 +4410,7 @@ static int intel_iommu_set_dev_pasid(struct iommu_domain *domain,
ret = intel_pasid_setup_second_level(iommu, dmar_domain,
dev, pasid);
if (ret)
- goto out_detach_iommu;
+ goto out_unassign_tag;
dev_pasid->dev = dev;
dev_pasid->pasid = pasid;
@@ -4685,6 +4422,8 @@ static int intel_iommu_set_dev_pasid(struct iommu_domain *domain,
intel_iommu_debugfs_create_dev_pasid(dev_pasid);
return 0;
+out_unassign_tag:
+ cache_tag_unassign_domain(dmar_domain, dev, pasid);
out_detach_iommu:
domain_detach_iommu(dmar_domain, iommu);
out_free:
@@ -4841,8 +4580,8 @@ const struct iommu_ops intel_iommu_ops = {
.hw_info = intel_iommu_hw_info,
.domain_alloc = intel_iommu_domain_alloc,
.domain_alloc_user = intel_iommu_domain_alloc_user,
+ .domain_alloc_sva = intel_svm_domain_alloc,
.probe_device = intel_iommu_probe_device,
- .probe_finalize = intel_iommu_probe_finalize,
.release_device = intel_iommu_release_device,
.get_resv_regions = intel_iommu_get_resv_regions,
.device_group = intel_iommu_device_group,
@@ -4875,7 +4614,7 @@ static void quirk_iommu_igfx(struct pci_dev *dev)
return;
pci_info(dev, "Disabling IOMMU for graphics on this chipset\n");
- dmar_map_gfx = 0;
+ disable_igfx_iommu = 1;
}
/* G4x/GM45 integrated gfx dmar support is totally busted. */
@@ -4956,8 +4695,8 @@ static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
pci_info(dev, "BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
- dmar_map_gfx = 0;
- } else if (dmar_map_gfx) {
+ disable_igfx_iommu = 1;
+ } else if (!disable_igfx_iommu) {
/* we have to ensure the gfx device is idle before we flush */
pci_info(dev, "Disabling batched IOTLB flush on Ironlake\n");
iommu_set_dma_strict();
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index 404d2476a877..eaf015b4353b 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -35,6 +35,8 @@
#define VTD_PAGE_MASK (((u64)-1) << VTD_PAGE_SHIFT)
#define VTD_PAGE_ALIGN(addr) (((addr) + VTD_PAGE_SIZE - 1) & VTD_PAGE_MASK)
+#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
+
#define VTD_STRIDE_SHIFT (9)
#define VTD_STRIDE_MASK (((u64)-1) << VTD_STRIDE_SHIFT)
@@ -455,7 +457,6 @@ enum {
/* Page group response descriptor QW0 */
#define QI_PGRP_PASID_P(p) (((u64)(p)) << 4)
-#define QI_PGRP_PDP(p) (((u64)(p)) << 5)
#define QI_PGRP_RESP_CODE(res) (((u64)(res)) << 12)
#define QI_PGRP_DID(rid) (((u64)(rid)) << 16)
#define QI_PGRP_PASID(pasid) (((u64)(pasid)) << 32)
@@ -607,6 +608,9 @@ struct dmar_domain {
struct list_head devices; /* all devices' list */
struct list_head dev_pasids; /* all attached pasids */
+ spinlock_t cache_lock; /* Protect the cache tag list */
+ struct list_head cache_tags; /* Cache tag list */
+
int iommu_superpage;/* Level of superpages supported:
0 == 4KiB (no superpages), 1 == 2MiB,
2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
@@ -644,6 +648,11 @@ struct dmar_domain {
/* link to parent domain siblings */
struct list_head s2_link;
};
+
+ /* SVA domain */
+ struct {
+ struct mmu_notifier notifier;
+ };
};
struct iommu_domain domain; /* generic domain data structure for
@@ -1038,6 +1047,19 @@ static inline void context_set_sm_pre(struct context_entry *context)
context->lo |= BIT_ULL(4);
}
+/* Returns a number of VTD pages, but aligned to MM page size */
+static inline unsigned long aligned_nrpages(unsigned long host_addr, size_t size)
+{
+ host_addr &= ~PAGE_MASK;
+ return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
+}
+
+/* Return a size from number of VTD pages. */
+static inline unsigned long nrpages_to_size(unsigned long npages)
+{
+ return npages << VTD_PAGE_SHIFT;
+}
+
/* Convert value to context PASID directory size field coding. */
#define context_pdts(pds) (((pds) & 0x7) << 9)
@@ -1085,48 +1107,60 @@ void domain_update_iommu_cap(struct dmar_domain *domain);
int dmar_ir_support(void);
-void *alloc_pgtable_page(int node, gfp_t gfp);
-void free_pgtable_page(void *vaddr);
void iommu_flush_write_buffer(struct intel_iommu *iommu);
struct iommu_domain *intel_nested_domain_alloc(struct iommu_domain *parent,
const struct iommu_user_data *user_data);
struct device *device_rbtree_find(struct intel_iommu *iommu, u16 rid);
+enum cache_tag_type {
+ CACHE_TAG_IOTLB,
+ CACHE_TAG_DEVTLB,
+ CACHE_TAG_NESTING_IOTLB,
+ CACHE_TAG_NESTING_DEVTLB,
+};
+
+struct cache_tag {
+ struct list_head node;
+ enum cache_tag_type type;
+ struct intel_iommu *iommu;
+ /*
+ * The @dev field represents the location of the cache. For IOTLB, it
+ * resides on the IOMMU hardware. @dev stores the device pointer to
+ * the IOMMU hardware. For DevTLB, it locates in the PCIe endpoint.
+ * @dev stores the device pointer to that endpoint.
+ */
+ struct device *dev;
+ u16 domain_id;
+ ioasid_t pasid;
+ unsigned int users;
+};
+
+int cache_tag_assign_domain(struct dmar_domain *domain,
+ struct device *dev, ioasid_t pasid);
+void cache_tag_unassign_domain(struct dmar_domain *domain,
+ struct device *dev, ioasid_t pasid);
+void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
+ unsigned long end, int ih);
+void cache_tag_flush_all(struct dmar_domain *domain);
+void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start,
+ unsigned long end);
+
#ifdef CONFIG_INTEL_IOMMU_SVM
void intel_svm_check(struct intel_iommu *iommu);
int intel_svm_enable_prq(struct intel_iommu *iommu);
int intel_svm_finish_prq(struct intel_iommu *iommu);
void intel_svm_page_response(struct device *dev, struct iopf_fault *evt,
struct iommu_page_response *msg);
-struct iommu_domain *intel_svm_domain_alloc(void);
-void intel_svm_remove_dev_pasid(struct device *dev, ioasid_t pasid);
+struct iommu_domain *intel_svm_domain_alloc(struct device *dev,
+ struct mm_struct *mm);
void intel_drain_pasid_prq(struct device *dev, u32 pasid);
-
-struct intel_svm_dev {
- struct list_head list;
- struct rcu_head rcu;
- struct device *dev;
- struct intel_iommu *iommu;
- u16 did;
- u16 sid, qdep;
-};
-
-struct intel_svm {
- struct mmu_notifier notifier;
- struct mm_struct *mm;
- u32 pasid;
- struct list_head devs;
-};
#else
static inline void intel_svm_check(struct intel_iommu *iommu) {}
static inline void intel_drain_pasid_prq(struct device *dev, u32 pasid) {}
-static inline struct iommu_domain *intel_svm_domain_alloc(void)
-{
- return NULL;
-}
-
-static inline void intel_svm_remove_dev_pasid(struct device *dev, ioasid_t pasid)
+static inline struct iommu_domain *intel_svm_domain_alloc(struct device *dev,
+ struct mm_struct *mm)
{
+ return ERR_PTR(-ENODEV);
}
#endif
diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c
index 566297bc87dd..e4a70886678c 100644
--- a/drivers/iommu/intel/irq_remapping.c
+++ b/drivers/iommu/intel/irq_remapping.c
@@ -19,9 +19,11 @@
#include <asm/cpu.h>
#include <asm/irq_remapping.h>
#include <asm/pci-direct.h>
+#include <asm/posted_intr.h>
#include "iommu.h"
#include "../irq_remapping.h"
+#include "../iommu-pages.h"
#include "cap_audit.h"
enum irq_mode {
@@ -49,6 +51,7 @@ struct irq_2_iommu {
u16 sub_handle;
u8 irte_mask;
enum irq_mode mode;
+ bool posted_msi;
};
struct intel_ir_data {
@@ -82,7 +85,7 @@ static const struct irq_domain_ops intel_ir_domain_ops;
static void iommu_disable_irq_remapping(struct intel_iommu *iommu);
static int __init parse_ioapics_under_ir(void);
-static const struct msi_parent_ops dmar_msi_parent_ops, virt_dmar_msi_parent_ops;
+static const struct msi_parent_ops dmar_msi_parent_ops;
static bool ir_pre_enabled(struct intel_iommu *iommu)
{
@@ -527,7 +530,7 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu)
struct ir_table *ir_table;
struct fwnode_handle *fn;
unsigned long *bitmap;
- struct page *pages;
+ void *ir_table_base;
if (iommu->ir_table)
return 0;
@@ -536,9 +539,9 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu)
if (!ir_table)
return -ENOMEM;
- pages = alloc_pages_node(iommu->node, GFP_KERNEL | __GFP_ZERO,
- INTR_REMAP_PAGE_ORDER);
- if (!pages) {
+ ir_table_base = iommu_alloc_pages_node(iommu->node, GFP_KERNEL,
+ INTR_REMAP_PAGE_ORDER);
+ if (!ir_table_base) {
pr_err("IR%d: failed to allocate pages of order %d\n",
iommu->seq_id, INTR_REMAP_PAGE_ORDER);
goto out_free_table;
@@ -567,13 +570,9 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu)
irq_domain_update_bus_token(iommu->ir_domain, DOMAIN_BUS_DMAR);
iommu->ir_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT |
IRQ_DOMAIN_FLAG_ISOLATED_MSI;
+ iommu->ir_domain->msi_parent_ops = &dmar_msi_parent_ops;
- if (cap_caching_mode(iommu->cap))
- iommu->ir_domain->msi_parent_ops = &virt_dmar_msi_parent_ops;
- else
- iommu->ir_domain->msi_parent_ops = &dmar_msi_parent_ops;
-
- ir_table->base = page_address(pages);
+ ir_table->base = ir_table_base;
ir_table->bitmap = bitmap;
iommu->ir_table = ir_table;
@@ -622,7 +621,7 @@ out_free_fwnode:
out_free_bitmap:
bitmap_free(bitmap);
out_free_pages:
- __free_pages(pages, INTR_REMAP_PAGE_ORDER);
+ iommu_free_pages(ir_table_base, INTR_REMAP_PAGE_ORDER);
out_free_table:
kfree(ir_table);
@@ -643,8 +642,7 @@ static void intel_teardown_irq_remapping(struct intel_iommu *iommu)
irq_domain_free_fwnode(fn);
iommu->ir_domain = NULL;
}
- free_pages((unsigned long)iommu->ir_table->base,
- INTR_REMAP_PAGE_ORDER);
+ iommu_free_pages(iommu->ir_table->base, INTR_REMAP_PAGE_ORDER);
bitmap_free(iommu->ir_table->bitmap);
kfree(iommu->ir_table);
iommu->ir_table = NULL;
@@ -1118,6 +1116,14 @@ static void prepare_irte(struct irte *irte, int vector, unsigned int dest)
irte->redir_hint = 1;
}
+static void prepare_irte_posted(struct irte *irte)
+{
+ memset(irte, 0, sizeof(*irte));
+
+ irte->present = 1;
+ irte->p_pst = 1;
+}
+
struct irq_remap_ops intel_irq_remap_ops = {
.prepare = intel_prepare_irq_remapping,
.enable = intel_enable_irq_remapping,
@@ -1126,6 +1132,47 @@ struct irq_remap_ops intel_irq_remap_ops = {
.enable_faulting = enable_drhd_fault_handling,
};
+#ifdef CONFIG_X86_POSTED_MSI
+
+static phys_addr_t get_pi_desc_addr(struct irq_data *irqd)
+{
+ int cpu = cpumask_first(irq_data_get_effective_affinity_mask(irqd));
+
+ if (WARN_ON(cpu >= nr_cpu_ids))
+ return 0;
+
+ return __pa(per_cpu_ptr(&posted_msi_pi_desc, cpu));
+}
+
+static void intel_ir_reconfigure_irte_posted(struct irq_data *irqd)
+{
+ struct intel_ir_data *ir_data = irqd->chip_data;
+ struct irte *irte = &ir_data->irte_entry;
+ struct irte irte_pi;
+ u64 pid_addr;
+
+ pid_addr = get_pi_desc_addr(irqd);
+
+ if (!pid_addr) {
+ pr_warn("Failed to setup IRQ %d for posted mode", irqd->irq);
+ return;
+ }
+
+ memset(&irte_pi, 0, sizeof(irte_pi));
+
+ /* The shared IRTE already be set up as posted during alloc_irte */
+ dmar_copy_shared_irte(&irte_pi, irte);
+
+ irte_pi.pda_l = (pid_addr >> (32 - PDA_LOW_BIT)) & ~(-1UL << PDA_LOW_BIT);
+ irte_pi.pda_h = (pid_addr >> 32) & ~(-1UL << PDA_HIGH_BIT);
+
+ modify_irte(&ir_data->irq_2_iommu, &irte_pi);
+}
+
+#else
+static inline void intel_ir_reconfigure_irte_posted(struct irq_data *irqd) {}
+#endif
+
static void intel_ir_reconfigure_irte(struct irq_data *irqd, bool force)
{
struct intel_ir_data *ir_data = irqd->chip_data;
@@ -1139,8 +1186,9 @@ static void intel_ir_reconfigure_irte(struct irq_data *irqd, bool force)
irte->vector = cfg->vector;
irte->dest_id = IRTE_DEST(cfg->dest_apicid);
- /* Update the hardware only if the interrupt is in remapped mode. */
- if (force || ir_data->irq_2_iommu.mode == IRQ_REMAPPING)
+ if (ir_data->irq_2_iommu.posted_msi)
+ intel_ir_reconfigure_irte_posted(irqd);
+ else if (force || ir_data->irq_2_iommu.mode == IRQ_REMAPPING)
modify_irte(&ir_data->irq_2_iommu, irte);
}
@@ -1194,7 +1242,7 @@ static int intel_ir_set_vcpu_affinity(struct irq_data *data, void *info)
struct intel_ir_data *ir_data = data->chip_data;
struct vcpu_data *vcpu_pi_info = info;
- /* stop posting interrupts, back to remapping mode */
+ /* stop posting interrupts, back to the default mode */
if (!vcpu_pi_info) {
modify_irte(&ir_data->irq_2_iommu, &ir_data->irte_entry);
} else {
@@ -1233,6 +1281,49 @@ static struct irq_chip intel_ir_chip = {
.irq_set_vcpu_affinity = intel_ir_set_vcpu_affinity,
};
+/*
+ * With posted MSIs, all vectors are multiplexed into a single notification
+ * vector. Devices MSIs are then dispatched in a demux loop where
+ * EOIs can be coalesced as well.
+ *
+ * "INTEL-IR-POST" IRQ chip does not do EOI on ACK, thus the dummy irq_ack()
+ * function. Instead EOI is performed by the posted interrupt notification
+ * handler.
+ *
+ * For the example below, 3 MSIs are coalesced into one CPU notification. Only
+ * one apic_eoi() is needed.
+ *
+ * __sysvec_posted_msi_notification()
+ * irq_enter();
+ * handle_edge_irq()
+ * irq_chip_ack_parent()
+ * dummy(); // No EOI
+ * handle_irq_event()
+ * driver_handler()
+ * handle_edge_irq()
+ * irq_chip_ack_parent()
+ * dummy(); // No EOI
+ * handle_irq_event()
+ * driver_handler()
+ * handle_edge_irq()
+ * irq_chip_ack_parent()
+ * dummy(); // No EOI
+ * handle_irq_event()
+ * driver_handler()
+ * apic_eoi()
+ * irq_exit()
+ */
+
+static void dummy_ack(struct irq_data *d) { }
+
+static struct irq_chip intel_ir_chip_post_msi = {
+ .name = "INTEL-IR-POST",
+ .irq_ack = dummy_ack,
+ .irq_set_affinity = intel_ir_set_affinity,
+ .irq_compose_msi_msg = intel_ir_compose_msi_msg,
+ .irq_set_vcpu_affinity = intel_ir_set_vcpu_affinity,
+};
+
static void fill_msi_msg(struct msi_msg *msg, u32 index, u32 subhandle)
{
memset(msg, 0, sizeof(*msg));
@@ -1274,6 +1365,11 @@ static void intel_irq_remapping_prepare_irte(struct intel_ir_data *data,
break;
case X86_IRQ_ALLOC_TYPE_PCI_MSI:
case X86_IRQ_ALLOC_TYPE_PCI_MSIX:
+ if (posted_msi_supported()) {
+ prepare_irte_posted(irte);
+ data->irq_2_iommu.posted_msi = 1;
+ }
+
set_msi_sid(irte,
pci_real_dma_dev(msi_desc_to_pci_dev(info->desc)));
break;
@@ -1361,7 +1457,12 @@ static int intel_irq_remapping_alloc(struct irq_domain *domain,
irq_data->hwirq = (index << 16) + i;
irq_data->chip_data = ird;
- irq_data->chip = &intel_ir_chip;
+ if (posted_msi_supported() &&
+ ((info->type == X86_IRQ_ALLOC_TYPE_PCI_MSI) ||
+ (info->type == X86_IRQ_ALLOC_TYPE_PCI_MSIX)))
+ irq_data->chip = &intel_ir_chip_post_msi;
+ else
+ irq_data->chip = &intel_ir_chip;
intel_irq_remapping_prepare_irte(ird, irq_cfg, info, index, i);
irq_set_status_flags(virq + i, IRQ_MOVE_PCNTXT);
}
@@ -1421,20 +1522,11 @@ static const struct irq_domain_ops intel_ir_domain_ops = {
};
static const struct msi_parent_ops dmar_msi_parent_ops = {
- .supported_flags = X86_VECTOR_MSI_FLAGS_SUPPORTED |
- MSI_FLAG_MULTI_PCI_MSI |
- MSI_FLAG_PCI_IMS,
+ .supported_flags = X86_VECTOR_MSI_FLAGS_SUPPORTED | MSI_FLAG_MULTI_PCI_MSI,
.prefix = "IR-",
.init_dev_msi_info = msi_parent_init_dev_msi_info,
};
-static const struct msi_parent_ops virt_dmar_msi_parent_ops = {
- .supported_flags = X86_VECTOR_MSI_FLAGS_SUPPORTED |
- MSI_FLAG_MULTI_PCI_MSI,
- .prefix = "vIR-",
- .init_dev_msi_info = msi_parent_init_dev_msi_info,
-};
-
/*
* Support of Interrupt Remapping Unit Hotplug
*/
diff --git a/drivers/iommu/intel/nested.c b/drivers/iommu/intel/nested.c
index a7d68f3d518a..16a2bcf5cfeb 100644
--- a/drivers/iommu/intel/nested.c
+++ b/drivers/iommu/intel/nested.c
@@ -52,13 +52,14 @@ static int intel_nested_attach_dev(struct iommu_domain *domain,
return ret;
}
+ ret = cache_tag_assign_domain(dmar_domain, dev, IOMMU_NO_PASID);
+ if (ret)
+ goto detach_iommu;
+
ret = intel_pasid_setup_nested(iommu, dev,
IOMMU_NO_PASID, dmar_domain);
- if (ret) {
- domain_detach_iommu(dmar_domain, iommu);
- dev_err_ratelimited(dev, "Failed to setup pasid entry\n");
- return ret;
- }
+ if (ret)
+ goto unassign_tag;
info->domain = dmar_domain;
spin_lock_irqsave(&dmar_domain->lock, flags);
@@ -68,6 +69,12 @@ static int intel_nested_attach_dev(struct iommu_domain *domain,
domain_update_iotlb(dmar_domain);
return 0;
+unassign_tag:
+ cache_tag_unassign_domain(dmar_domain, dev, IOMMU_NO_PASID);
+detach_iommu:
+ domain_detach_iommu(dmar_domain, iommu);
+
+ return ret;
}
static void intel_nested_domain_free(struct iommu_domain *domain)
@@ -81,50 +88,6 @@ static void intel_nested_domain_free(struct iommu_domain *domain)
kfree(dmar_domain);
}
-static void nested_flush_dev_iotlb(struct dmar_domain *domain, u64 addr,
- unsigned int mask)
-{
- struct device_domain_info *info;
- unsigned long flags;
- u16 sid, qdep;
-
- spin_lock_irqsave(&domain->lock, flags);
- list_for_each_entry(info, &domain->devices, link) {
- if (!info->ats_enabled)
- continue;
- sid = info->bus << 8 | info->devfn;
- qdep = info->ats_qdep;
- qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
- qdep, addr, mask);
- quirk_extra_dev_tlb_flush(info, addr, mask,
- IOMMU_NO_PASID, qdep);
- }
- spin_unlock_irqrestore(&domain->lock, flags);
-}
-
-static void intel_nested_flush_cache(struct dmar_domain *domain, u64 addr,
- u64 npages, bool ih)
-{
- struct iommu_domain_info *info;
- unsigned int mask;
- unsigned long i;
-
- xa_for_each(&domain->iommu_array, i, info)
- qi_flush_piotlb(info->iommu,
- domain_id_iommu(domain, info->iommu),
- IOMMU_NO_PASID, addr, npages, ih);
-
- if (!domain->has_iotlb_device)
- return;
-
- if (npages == U64_MAX)
- mask = 64 - VTD_PAGE_SHIFT;
- else
- mask = ilog2(__roundup_pow_of_two(npages));
-
- nested_flush_dev_iotlb(domain, addr, mask);
-}
-
static int intel_nested_cache_invalidate_user(struct iommu_domain *domain,
struct iommu_user_data_array *array)
{
@@ -157,9 +120,9 @@ static int intel_nested_cache_invalidate_user(struct iommu_domain *domain,
break;
}
- intel_nested_flush_cache(dmar_domain, inv_entry.addr,
- inv_entry.npages,
- inv_entry.flags & IOMMU_VTD_INV_FLAGS_LEAF);
+ cache_tag_flush_range(dmar_domain, inv_entry.addr,
+ inv_entry.addr + nrpages_to_size(inv_entry.npages) - 1,
+ inv_entry.flags & IOMMU_VTD_INV_FLAGS_LEAF);
processed++;
}
@@ -206,7 +169,9 @@ struct iommu_domain *intel_nested_domain_alloc(struct iommu_domain *parent,
domain->domain.type = IOMMU_DOMAIN_NESTED;
INIT_LIST_HEAD(&domain->devices);
INIT_LIST_HEAD(&domain->dev_pasids);
+ INIT_LIST_HEAD(&domain->cache_tags);
spin_lock_init(&domain->lock);
+ spin_lock_init(&domain->cache_lock);
xa_init(&domain->iommu_array);
spin_lock(&s2_domain->s1_lock);
diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
index 11f0b856d74c..abce19e2ad6f 100644
--- a/drivers/iommu/intel/pasid.c
+++ b/drivers/iommu/intel/pasid.c
@@ -20,6 +20,7 @@
#include "iommu.h"
#include "pasid.h"
+#include "../iommu-pages.h"
/*
* Intel IOMMU system wide PASID name space:
@@ -38,7 +39,7 @@ int intel_pasid_alloc_table(struct device *dev)
{
struct device_domain_info *info;
struct pasid_table *pasid_table;
- struct page *pages;
+ struct pasid_dir_entry *dir;
u32 max_pasid = 0;
int order, size;
@@ -59,14 +60,13 @@ int intel_pasid_alloc_table(struct device *dev)
size = max_pasid >> (PASID_PDE_SHIFT - 3);
order = size ? get_order(size) : 0;
- pages = alloc_pages_node(info->iommu->node,
- GFP_KERNEL | __GFP_ZERO, order);
- if (!pages) {
+ dir = iommu_alloc_pages_node(info->iommu->node, GFP_KERNEL, order);
+ if (!dir) {
kfree(pasid_table);
return -ENOMEM;
}
- pasid_table->table = page_address(pages);
+ pasid_table->table = dir;
pasid_table->order = order;
pasid_table->max_pasid = 1 << (order + PAGE_SHIFT + 3);
info->pasid_table = pasid_table;
@@ -97,10 +97,10 @@ void intel_pasid_free_table(struct device *dev)
max_pde = pasid_table->max_pasid >> PASID_PDE_SHIFT;
for (i = 0; i < max_pde; i++) {
table = get_pasid_table_from_pde(&dir[i]);
- free_pgtable_page(table);
+ iommu_free_page(table);
}
- free_pages((unsigned long)pasid_table->table, pasid_table->order);
+ iommu_free_pages(pasid_table->table, pasid_table->order);
kfree(pasid_table);
}
@@ -146,7 +146,7 @@ static struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid)
retry:
entries = get_pasid_table_from_pde(&dir[dir_index]);
if (!entries) {
- entries = alloc_pgtable_page(info->iommu->node, GFP_ATOMIC);
+ entries = iommu_alloc_page_node(info->iommu->node, GFP_ATOMIC);
if (!entries)
return NULL;
@@ -158,7 +158,7 @@ retry:
*/
if (cmpxchg64(&dir[dir_index].val, 0ULL,
(u64)virt_to_phys(entries) | PASID_PTE_PRESENT)) {
- free_pgtable_page(entries);
+ iommu_free_page(entries);
goto retry;
}
if (!ecap_coherent(info->iommu->ecap)) {
diff --git a/drivers/iommu/intel/perf.h b/drivers/iommu/intel/perf.h
index fd6db8049d1a..df9a36942d64 100644
--- a/drivers/iommu/intel/perf.h
+++ b/drivers/iommu/intel/perf.h
@@ -11,7 +11,6 @@ enum latency_type {
DMAR_LATENCY_INV_IOTLB = 0,
DMAR_LATENCY_INV_DEVTLB,
DMAR_LATENCY_INV_IEC,
- DMAR_LATENCY_PRQ,
DMAR_LATENCY_NUM
};
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index ee3b469e2da1..0e3a9b38bef2 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -22,57 +22,22 @@
#include "iommu.h"
#include "pasid.h"
#include "perf.h"
+#include "../iommu-pages.h"
#include "trace.h"
static irqreturn_t prq_event_thread(int irq, void *d);
-static DEFINE_XARRAY_ALLOC(pasid_private_array);
-static int pasid_private_add(ioasid_t pasid, void *priv)
-{
- return xa_alloc(&pasid_private_array, &pasid, priv,
- XA_LIMIT(pasid, pasid), GFP_ATOMIC);
-}
-
-static void pasid_private_remove(ioasid_t pasid)
-{
- xa_erase(&pasid_private_array, pasid);
-}
-
-static void *pasid_private_find(ioasid_t pasid)
-{
- return xa_load(&pasid_private_array, pasid);
-}
-
-static struct intel_svm_dev *
-svm_lookup_device_by_dev(struct intel_svm *svm, struct device *dev)
-{
- struct intel_svm_dev *sdev = NULL, *t;
-
- rcu_read_lock();
- list_for_each_entry_rcu(t, &svm->devs, list) {
- if (t->dev == dev) {
- sdev = t;
- break;
- }
- }
- rcu_read_unlock();
-
- return sdev;
-}
-
int intel_svm_enable_prq(struct intel_iommu *iommu)
{
struct iopf_queue *iopfq;
- struct page *pages;
int irq, ret;
- pages = alloc_pages_node(iommu->node, GFP_KERNEL | __GFP_ZERO, PRQ_ORDER);
- if (!pages) {
+ iommu->prq = iommu_alloc_pages_node(iommu->node, GFP_KERNEL, PRQ_ORDER);
+ if (!iommu->prq) {
pr_warn("IOMMU: %s: Failed to allocate page request queue\n",
iommu->name);
return -ENOMEM;
}
- iommu->prq = page_address(pages);
irq = dmar_alloc_hwirq(IOMMU_IRQ_ID_OFFSET_PRQ + iommu->seq_id, iommu->node, iommu);
if (irq <= 0) {
@@ -117,7 +82,7 @@ free_hwirq:
dmar_free_hwirq(irq);
iommu->pr_irq = 0;
free_prq:
- free_pages((unsigned long)iommu->prq, PRQ_ORDER);
+ iommu_free_pages(iommu->prq, PRQ_ORDER);
iommu->prq = NULL;
return ret;
@@ -140,7 +105,7 @@ int intel_svm_finish_prq(struct intel_iommu *iommu)
iommu->iopf_queue = NULL;
}
- free_pages((unsigned long)iommu->prq, PRQ_ORDER);
+ iommu_free_pages(iommu->prq, PRQ_ORDER);
iommu->prq = NULL;
return 0;
@@ -168,94 +133,32 @@ void intel_svm_check(struct intel_iommu *iommu)
iommu->flags |= VTD_FLAG_SVM_CAPABLE;
}
-static void __flush_svm_range_dev(struct intel_svm *svm,
- struct intel_svm_dev *sdev,
- unsigned long address,
- unsigned long pages, int ih)
-{
- struct device_domain_info *info = dev_iommu_priv_get(sdev->dev);
-
- if (WARN_ON(!pages))
- return;
-
- qi_flush_piotlb(sdev->iommu, sdev->did, svm->pasid, address, pages, ih);
- if (info->ats_enabled) {
- qi_flush_dev_iotlb_pasid(sdev->iommu, sdev->sid, info->pfsid,
- svm->pasid, sdev->qdep, address,
- order_base_2(pages));
- quirk_extra_dev_tlb_flush(info, address, order_base_2(pages),
- svm->pasid, sdev->qdep);
- }
-}
-
-static void intel_flush_svm_range_dev(struct intel_svm *svm,
- struct intel_svm_dev *sdev,
- unsigned long address,
- unsigned long pages, int ih)
-{
- unsigned long shift = ilog2(__roundup_pow_of_two(pages));
- unsigned long align = (1ULL << (VTD_PAGE_SHIFT + shift));
- unsigned long start = ALIGN_DOWN(address, align);
- unsigned long end = ALIGN(address + (pages << VTD_PAGE_SHIFT), align);
-
- while (start < end) {
- __flush_svm_range_dev(svm, sdev, start, align >> VTD_PAGE_SHIFT, ih);
- start += align;
- }
-}
-
-static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address,
- unsigned long pages, int ih)
-{
- struct intel_svm_dev *sdev;
-
- rcu_read_lock();
- list_for_each_entry_rcu(sdev, &svm->devs, list)
- intel_flush_svm_range_dev(svm, sdev, address, pages, ih);
- rcu_read_unlock();
-}
-
-static void intel_flush_svm_all(struct intel_svm *svm)
-{
- struct device_domain_info *info;
- struct intel_svm_dev *sdev;
-
- rcu_read_lock();
- list_for_each_entry_rcu(sdev, &svm->devs, list) {
- info = dev_iommu_priv_get(sdev->dev);
-
- qi_flush_piotlb(sdev->iommu, sdev->did, svm->pasid, 0, -1UL, 0);
- if (info->ats_enabled) {
- qi_flush_dev_iotlb_pasid(sdev->iommu, sdev->sid, info->pfsid,
- svm->pasid, sdev->qdep,
- 0, 64 - VTD_PAGE_SHIFT);
- quirk_extra_dev_tlb_flush(info, 0, 64 - VTD_PAGE_SHIFT,
- svm->pasid, sdev->qdep);
- }
- }
- rcu_read_unlock();
-}
-
/* Pages have been freed at this point */
static void intel_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start, unsigned long end)
{
- struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
+ struct dmar_domain *domain = container_of(mn, struct dmar_domain, notifier);
- if (start == 0 && end == -1UL) {
- intel_flush_svm_all(svm);
+ if (start == 0 && end == ULONG_MAX) {
+ cache_tag_flush_all(domain);
return;
}
- intel_flush_svm_range(svm, start,
- (end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT, 0);
+ /*
+ * The mm_types defines vm_end as the first byte after the end address,
+ * different from IOMMU subsystem using the last address of an address
+ * range.
+ */
+ cache_tag_flush_range(domain, start, end - 1, 0);
}
static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
{
- struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
- struct intel_svm_dev *sdev;
+ struct dmar_domain *domain = container_of(mn, struct dmar_domain, notifier);
+ struct dev_pasid_info *dev_pasid;
+ struct device_domain_info *info;
+ unsigned long flags;
/* This might end up being called from exit_mmap(), *before* the page
* tables are cleared. And __mmu_notifier_release() will delete us from
@@ -269,157 +172,78 @@ static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
* page) so that we end up taking a fault that the hardware really
* *has* to handle gracefully without affecting other processes.
*/
- rcu_read_lock();
- list_for_each_entry_rcu(sdev, &svm->devs, list)
- intel_pasid_tear_down_entry(sdev->iommu, sdev->dev,
- svm->pasid, true);
- rcu_read_unlock();
+ spin_lock_irqsave(&domain->lock, flags);
+ list_for_each_entry(dev_pasid, &domain->dev_pasids, link_domain) {
+ info = dev_iommu_priv_get(dev_pasid->dev);
+ intel_pasid_tear_down_entry(info->iommu, dev_pasid->dev,
+ dev_pasid->pasid, true);
+ }
+ spin_unlock_irqrestore(&domain->lock, flags);
+
+}
+static void intel_mm_free_notifier(struct mmu_notifier *mn)
+{
+ kfree(container_of(mn, struct dmar_domain, notifier));
}
static const struct mmu_notifier_ops intel_mmuops = {
.release = intel_mm_release,
.arch_invalidate_secondary_tlbs = intel_arch_invalidate_secondary_tlbs,
+ .free_notifier = intel_mm_free_notifier,
};
-static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid,
- struct intel_svm **rsvm,
- struct intel_svm_dev **rsdev)
-{
- struct intel_svm_dev *sdev = NULL;
- struct intel_svm *svm;
-
- if (pasid == IOMMU_PASID_INVALID || pasid >= PASID_MAX)
- return -EINVAL;
-
- svm = pasid_private_find(pasid);
- if (IS_ERR(svm))
- return PTR_ERR(svm);
-
- if (!svm)
- goto out;
-
- /*
- * If we found svm for the PASID, there must be at least one device
- * bond.
- */
- if (WARN_ON(list_empty(&svm->devs)))
- return -EINVAL;
- sdev = svm_lookup_device_by_dev(svm, dev);
-
-out:
- *rsvm = svm;
- *rsdev = sdev;
-
- return 0;
-}
-
static int intel_svm_set_dev_pasid(struct iommu_domain *domain,
struct device *dev, ioasid_t pasid)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
+ struct dmar_domain *dmar_domain = to_dmar_domain(domain);
struct intel_iommu *iommu = info->iommu;
struct mm_struct *mm = domain->mm;
- struct intel_svm_dev *sdev;
- struct intel_svm *svm;
+ struct dev_pasid_info *dev_pasid;
unsigned long sflags;
+ unsigned long flags;
int ret = 0;
- svm = pasid_private_find(pasid);
- if (!svm) {
- svm = kzalloc(sizeof(*svm), GFP_KERNEL);
- if (!svm)
- return -ENOMEM;
-
- svm->pasid = pasid;
- svm->mm = mm;
- INIT_LIST_HEAD_RCU(&svm->devs);
-
- svm->notifier.ops = &intel_mmuops;
- ret = mmu_notifier_register(&svm->notifier, mm);
- if (ret) {
- kfree(svm);
- return ret;
- }
-
- ret = pasid_private_add(svm->pasid, svm);
- if (ret) {
- mmu_notifier_unregister(&svm->notifier, mm);
- kfree(svm);
- return ret;
- }
- }
+ dev_pasid = kzalloc(sizeof(*dev_pasid), GFP_KERNEL);
+ if (!dev_pasid)
+ return -ENOMEM;
- sdev = kzalloc(sizeof(*sdev), GFP_KERNEL);
- if (!sdev) {
- ret = -ENOMEM;
- goto free_svm;
- }
+ dev_pasid->dev = dev;
+ dev_pasid->pasid = pasid;
- sdev->dev = dev;
- sdev->iommu = iommu;
- sdev->did = FLPT_DEFAULT_DID;
- sdev->sid = PCI_DEVID(info->bus, info->devfn);
- if (info->ats_enabled) {
- sdev->qdep = info->ats_qdep;
- if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS)
- sdev->qdep = 0;
- }
+ ret = cache_tag_assign_domain(to_dmar_domain(domain), dev, pasid);
+ if (ret)
+ goto free_dev_pasid;
/* Setup the pasid table: */
sflags = cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0;
ret = intel_pasid_setup_first_level(iommu, dev, mm->pgd, pasid,
FLPT_DEFAULT_DID, sflags);
if (ret)
- goto free_sdev;
+ goto unassign_tag;
- list_add_rcu(&sdev->list, &svm->devs);
+ spin_lock_irqsave(&dmar_domain->lock, flags);
+ list_add(&dev_pasid->link_domain, &dmar_domain->dev_pasids);
+ spin_unlock_irqrestore(&dmar_domain->lock, flags);
return 0;
-free_sdev:
- kfree(sdev);
-free_svm:
- if (list_empty(&svm->devs)) {
- mmu_notifier_unregister(&svm->notifier, mm);
- pasid_private_remove(pasid);
- kfree(svm);
- }
+unassign_tag:
+ cache_tag_unassign_domain(to_dmar_domain(domain), dev, pasid);
+free_dev_pasid:
+ kfree(dev_pasid);
return ret;
}
-void intel_svm_remove_dev_pasid(struct device *dev, u32 pasid)
-{
- struct intel_svm_dev *sdev;
- struct intel_svm *svm;
- struct mm_struct *mm;
-
- if (pasid_to_svm_sdev(dev, pasid, &svm, &sdev))
- return;
- mm = svm->mm;
-
- if (sdev) {
- list_del_rcu(&sdev->list);
- kfree_rcu(sdev, rcu);
-
- if (list_empty(&svm->devs)) {
- if (svm->notifier.ops)
- mmu_notifier_unregister(&svm->notifier, mm);
- pasid_private_remove(svm->pasid);
- kfree(svm);
- }
- }
-}
-
/* Page request queue descriptor */
struct page_req_dsc {
union {
struct {
u64 type:8;
u64 pasid_present:1;
- u64 priv_data_present:1;
- u64 rsvd:6;
+ u64 rsvd:7;
u64 rid:16;
u64 pasid:20;
u64 exe_req:1;
@@ -438,7 +262,8 @@ struct page_req_dsc {
};
u64 qw_1;
};
- u64 priv_data[2];
+ u64 qw_2;
+ u64 qw_3;
};
static bool is_canonical_address(u64 addr)
@@ -572,24 +397,6 @@ static void intel_svm_prq_report(struct intel_iommu *iommu, struct device *dev,
event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
event.fault.prm.flags |= IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID;
}
- if (desc->priv_data_present) {
- /*
- * Set last page in group bit if private data is present,
- * page response is required as it does for LPIG.
- * iommu_report_device_fault() doesn't understand this vendor
- * specific requirement thus we set last_page as a workaround.
- */
- event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
- event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
- event.fault.prm.private_data[0] = desc->priv_data[0];
- event.fault.prm.private_data[1] = desc->priv_data[1];
- } else if (dmar_latency_enabled(iommu, DMAR_LATENCY_PRQ)) {
- /*
- * If the private data fields are not used by hardware, use it
- * to monitor the prq handle latency.
- */
- event.fault.prm.private_data[0] = ktime_to_ns(ktime_get());
- }
iommu_report_device_fault(dev, &event);
}
@@ -597,39 +404,23 @@ static void intel_svm_prq_report(struct intel_iommu *iommu, struct device *dev,
static void handle_bad_prq_event(struct intel_iommu *iommu,
struct page_req_dsc *req, int result)
{
- struct qi_desc desc;
+ struct qi_desc desc = { };
pr_err("%s: Invalid page request: %08llx %08llx\n",
iommu->name, ((unsigned long long *)req)[0],
((unsigned long long *)req)[1]);
- /*
- * Per VT-d spec. v3.0 ch7.7, system software must
- * respond with page group response if private data
- * is present (PDP) or last page in group (LPIG) bit
- * is set. This is an additional VT-d feature beyond
- * PCI ATS spec.
- */
- if (!req->lpig && !req->priv_data_present)
+ if (!req->lpig)
return;
desc.qw0 = QI_PGRP_PASID(req->pasid) |
QI_PGRP_DID(req->rid) |
QI_PGRP_PASID_P(req->pasid_present) |
- QI_PGRP_PDP(req->priv_data_present) |
QI_PGRP_RESP_CODE(result) |
QI_PGRP_RESP_TYPE;
desc.qw1 = QI_PGRP_IDX(req->prg_index) |
QI_PGRP_LPIG(req->lpig);
- if (req->priv_data_present) {
- desc.qw2 = req->priv_data[0];
- desc.qw3 = req->priv_data[1];
- } else {
- desc.qw2 = 0;
- desc.qw3 = 0;
- }
-
qi_submit_sync(iommu, &desc, 1, 0);
}
@@ -697,7 +488,7 @@ bad_req:
intel_svm_prq_report(iommu, dev, req);
trace_prq_report(iommu, dev, req->qw_0, req->qw_1,
- req->priv_data[0], req->priv_data[1],
+ req->qw_2, req->qw_3,
iommu->prq_seq_number++);
mutex_unlock(&iommu->iopf_lock);
prq_advance:
@@ -736,7 +527,7 @@ void intel_svm_page_response(struct device *dev, struct iopf_fault *evt,
struct intel_iommu *iommu = info->iommu;
u8 bus = info->bus, devfn = info->devfn;
struct iommu_fault_page_request *prm;
- bool private_present;
+ struct qi_desc desc;
bool pasid_present;
bool last_page;
u16 sid;
@@ -744,42 +535,25 @@ void intel_svm_page_response(struct device *dev, struct iopf_fault *evt,
prm = &evt->fault.prm;
sid = PCI_DEVID(bus, devfn);
pasid_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
- private_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
last_page = prm->flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
- /*
- * Per VT-d spec. v3.0 ch7.7, system software must respond
- * with page group response if private data is present (PDP)
- * or last page in group (LPIG) bit is set. This is an
- * additional VT-d requirement beyond PCI ATS spec.
- */
- if (last_page || private_present) {
- struct qi_desc desc;
-
- desc.qw0 = QI_PGRP_PASID(prm->pasid) | QI_PGRP_DID(sid) |
- QI_PGRP_PASID_P(pasid_present) |
- QI_PGRP_PDP(private_present) |
- QI_PGRP_RESP_CODE(msg->code) |
- QI_PGRP_RESP_TYPE;
- desc.qw1 = QI_PGRP_IDX(prm->grpid) | QI_PGRP_LPIG(last_page);
- desc.qw2 = 0;
- desc.qw3 = 0;
-
- if (private_present) {
- desc.qw2 = prm->private_data[0];
- desc.qw3 = prm->private_data[1];
- } else if (prm->private_data[0]) {
- dmar_latency_update(iommu, DMAR_LATENCY_PRQ,
- ktime_to_ns(ktime_get()) - prm->private_data[0]);
- }
+ desc.qw0 = QI_PGRP_PASID(prm->pasid) | QI_PGRP_DID(sid) |
+ QI_PGRP_PASID_P(pasid_present) |
+ QI_PGRP_RESP_CODE(msg->code) |
+ QI_PGRP_RESP_TYPE;
+ desc.qw1 = QI_PGRP_IDX(prm->grpid) | QI_PGRP_LPIG(last_page);
+ desc.qw2 = 0;
+ desc.qw3 = 0;
- qi_submit_sync(iommu, &desc, 1, 0);
- }
+ qi_submit_sync(iommu, &desc, 1, 0);
}
static void intel_svm_domain_free(struct iommu_domain *domain)
{
- kfree(to_dmar_domain(domain));
+ struct dmar_domain *dmar_domain = to_dmar_domain(domain);
+
+ /* dmar_domain free is deferred to the mmu free_notifier callback. */
+ mmu_notifier_put(&dmar_domain->notifier);
}
static const struct iommu_domain_ops intel_svm_domain_ops = {
@@ -787,14 +561,29 @@ static const struct iommu_domain_ops intel_svm_domain_ops = {
.free = intel_svm_domain_free
};
-struct iommu_domain *intel_svm_domain_alloc(void)
+struct iommu_domain *intel_svm_domain_alloc(struct device *dev,
+ struct mm_struct *mm)
{
struct dmar_domain *domain;
+ int ret;
domain = kzalloc(sizeof(*domain), GFP_KERNEL);
if (!domain)
- return NULL;
+ return ERR_PTR(-ENOMEM);
+
domain->domain.ops = &intel_svm_domain_ops;
+ domain->use_first_level = true;
+ INIT_LIST_HEAD(&domain->dev_pasids);
+ INIT_LIST_HEAD(&domain->cache_tags);
+ spin_lock_init(&domain->cache_lock);
+ spin_lock_init(&domain->lock);
+
+ domain->notifier.ops = &intel_mmuops;
+ ret = mmu_notifier_register(&domain->notifier, mm);
+ if (ret) {
+ kfree(domain);
+ return ERR_PTR(ret);
+ }
return &domain->domain;
}
diff --git a/drivers/iommu/intel/trace.h b/drivers/iommu/intel/trace.h
index 93d96f93a89b..9defdae6ebae 100644
--- a/drivers/iommu/intel/trace.h
+++ b/drivers/iommu/intel/trace.h
@@ -32,7 +32,7 @@ TRACE_EVENT(qi_submit,
),
TP_fast_assign(
- __assign_str(iommu, iommu->name);
+ __assign_str(iommu);
__entry->qw0 = qw0;
__entry->qw1 = qw1;
__entry->qw2 = qw2;
@@ -79,8 +79,8 @@ TRACE_EVENT(prq_report,
__entry->dw2 = dw2;
__entry->dw3 = dw3;
__entry->seq = seq;
- __assign_str(iommu, iommu->name);
- __assign_str(dev, dev_name(dev));
+ __assign_str(iommu);
+ __assign_str(dev);
),
TP_printk("%s/%s seq# %ld: %s",
@@ -89,6 +89,103 @@ TRACE_EVENT(prq_report,
__entry->dw1, __entry->dw2, __entry->dw3)
)
);
+
+DECLARE_EVENT_CLASS(cache_tag_log,
+ TP_PROTO(struct cache_tag *tag),
+ TP_ARGS(tag),
+ TP_STRUCT__entry(
+ __string(iommu, tag->iommu->name)
+ __string(dev, dev_name(tag->dev))
+ __field(u16, type)
+ __field(u16, domain_id)
+ __field(u32, pasid)
+ __field(u32, users)
+ ),
+ TP_fast_assign(
+ __assign_str(iommu);
+ __assign_str(dev);
+ __entry->type = tag->type;
+ __entry->domain_id = tag->domain_id;
+ __entry->pasid = tag->pasid;
+ __entry->users = tag->users;
+ ),
+ TP_printk("%s/%s type %s did %d pasid %d ref %d",
+ __get_str(iommu), __get_str(dev),
+ __print_symbolic(__entry->type,
+ { CACHE_TAG_IOTLB, "iotlb" },
+ { CACHE_TAG_DEVTLB, "devtlb" },
+ { CACHE_TAG_NESTING_IOTLB, "nesting_iotlb" },
+ { CACHE_TAG_NESTING_DEVTLB, "nesting_devtlb" }),
+ __entry->domain_id, __entry->pasid, __entry->users
+ )
+);
+
+DEFINE_EVENT(cache_tag_log, cache_tag_assign,
+ TP_PROTO(struct cache_tag *tag),
+ TP_ARGS(tag)
+);
+
+DEFINE_EVENT(cache_tag_log, cache_tag_unassign,
+ TP_PROTO(struct cache_tag *tag),
+ TP_ARGS(tag)
+);
+
+DEFINE_EVENT(cache_tag_log, cache_tag_flush_all,
+ TP_PROTO(struct cache_tag *tag),
+ TP_ARGS(tag)
+);
+
+DECLARE_EVENT_CLASS(cache_tag_flush,
+ TP_PROTO(struct cache_tag *tag, unsigned long start, unsigned long end,
+ unsigned long addr, unsigned long pages, unsigned long mask),
+ TP_ARGS(tag, start, end, addr, pages, mask),
+ TP_STRUCT__entry(
+ __string(iommu, tag->iommu->name)
+ __string(dev, dev_name(tag->dev))
+ __field(u16, type)
+ __field(u16, domain_id)
+ __field(u32, pasid)
+ __field(unsigned long, start)
+ __field(unsigned long, end)
+ __field(unsigned long, addr)
+ __field(unsigned long, pages)
+ __field(unsigned long, mask)
+ ),
+ TP_fast_assign(
+ __assign_str(iommu);
+ __assign_str(dev);
+ __entry->type = tag->type;
+ __entry->domain_id = tag->domain_id;
+ __entry->pasid = tag->pasid;
+ __entry->start = start;
+ __entry->end = end;
+ __entry->addr = addr;
+ __entry->pages = pages;
+ __entry->mask = mask;
+ ),
+ TP_printk("%s %s[%d] type %s did %d [0x%lx-0x%lx] addr 0x%lx pages 0x%lx mask 0x%lx",
+ __get_str(iommu), __get_str(dev), __entry->pasid,
+ __print_symbolic(__entry->type,
+ { CACHE_TAG_IOTLB, "iotlb" },
+ { CACHE_TAG_DEVTLB, "devtlb" },
+ { CACHE_TAG_NESTING_IOTLB, "nesting_iotlb" },
+ { CACHE_TAG_NESTING_DEVTLB, "nesting_devtlb" }),
+ __entry->domain_id, __entry->start, __entry->end,
+ __entry->addr, __entry->pages, __entry->mask
+ )
+);
+
+DEFINE_EVENT(cache_tag_flush, cache_tag_flush_range,
+ TP_PROTO(struct cache_tag *tag, unsigned long start, unsigned long end,
+ unsigned long addr, unsigned long pages, unsigned long mask),
+ TP_ARGS(tag, start, end, addr, pages, mask)
+);
+
+DEFINE_EVENT(cache_tag_flush, cache_tag_flush_range_np,
+ TP_PROTO(struct cache_tag *tag, unsigned long start, unsigned long end,
+ unsigned long addr, unsigned long pages, unsigned long mask),
+ TP_ARGS(tag, start, end, addr, pages, mask)
+);
#endif /* _TRACE_INTEL_IOMMU_H */
/* This part must be outside protection */