summaryrefslogtreecommitdiffstats
path: root/drivers/iommu
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/iommu')
-rw-r--r--drivers/iommu/Kconfig4
-rw-r--r--drivers/iommu/amd/amd_iommu.h8
-rw-r--r--drivers/iommu/amd/amd_iommu_types.h30
-rw-r--r--drivers/iommu/amd/init.c65
-rw-r--r--drivers/iommu/amd/io_pgtable.c7
-rw-r--r--drivers/iommu/amd/io_pgtable_v2.c2
-rw-r--r--drivers/iommu/amd/iommu.c106
-rw-r--r--drivers/iommu/amd/pasid.c2
-rw-r--r--drivers/iommu/apple-dart.c22
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c60
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c80
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h36
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c32
-rw-r--r--drivers/iommu/arm/arm-smmu/arm-smmu.c11
-rw-r--r--drivers/iommu/dma-iommu.c256
-rw-r--r--drivers/iommu/dma-iommu.h14
-rw-r--r--drivers/iommu/exynos-iommu.c4
-rw-r--r--drivers/iommu/hyperv-iommu.c8
-rw-r--r--drivers/iommu/intel/iommu.c243
-rw-r--r--drivers/iommu/intel/iommu.h28
-rw-r--r--drivers/iommu/intel/irq_remapping.c71
-rw-r--r--drivers/iommu/intel/nested.c2
-rw-r--r--drivers/iommu/intel/pasid.c43
-rw-r--r--drivers/iommu/intel/prq.c2
-rw-r--r--drivers/iommu/intel/svm.c43
-rw-r--r--drivers/iommu/io-pgtable-dart.c2
-rw-r--r--drivers/iommu/iommu-priv.h21
-rw-r--r--drivers/iommu/iommu-sva.c1
-rw-r--r--drivers/iommu/iommu.c382
-rw-r--r--drivers/iommu/iommufd/Kconfig2
-rw-r--r--drivers/iommu/iommufd/Makefile2
-rw-r--r--drivers/iommu/iommufd/device.c503
-rw-r--r--drivers/iommu/iommufd/driver.c198
-rw-r--r--drivers/iommu/iommufd/eventq.c598
-rw-r--r--drivers/iommu/iommufd/fault.c462
-rw-r--r--drivers/iommu/iommufd/hw_pagetable.c39
-rw-r--r--drivers/iommu/iommufd/iommufd_private.h196
-rw-r--r--drivers/iommu/iommufd/iommufd_test.h40
-rw-r--r--drivers/iommu/iommufd/main.c16
-rw-r--r--drivers/iommu/iommufd/selftest.c297
-rw-r--r--drivers/iommu/iommufd/viommu.c2
-rw-r--r--drivers/iommu/ipmmu-vmsa.c27
-rw-r--r--drivers/iommu/mtk_iommu.c26
-rw-r--r--drivers/iommu/mtk_iommu_v1.c25
-rw-r--r--drivers/iommu/of_iommu.c13
-rw-r--r--drivers/iommu/rockchip-iommu.c61
-rw-r--r--drivers/iommu/s390-iommu.c138
-rw-r--r--drivers/iommu/tegra-smmu.c1
48 files changed, 2808 insertions, 1423 deletions
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index ec1b5e32b972..cd750f512dee 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -154,7 +154,6 @@ config IOMMU_DMA
select DMA_OPS_HELPERS
select IOMMU_API
select IOMMU_IOVA
- select IRQ_MSI_IOMMU
select NEED_SG_DMA_LENGTH
select NEED_SG_DMA_FLAGS if SWIOTLB
@@ -483,8 +482,7 @@ config MTK_IOMMU
config MTK_IOMMU_V1
tristate "MediaTek IOMMU Version 1 (M4U gen1) Support"
- depends on ARM
- depends on ARCH_MEDIATEK || COMPILE_TEST
+ depends on (ARCH_MEDIATEK && ARM) || COMPILE_TEST
select ARM_DMA_USE_IOMMU
select IOMMU_API
select MEMORY
diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index 68debf5ee2d7..220c598b7e14 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -47,7 +47,6 @@ extern unsigned long amd_iommu_pgsize_bitmap;
/* Protection domain ops */
void amd_iommu_init_identity_domain(void);
struct protection_domain *protection_domain_alloc(void);
-void protection_domain_free(struct protection_domain *domain);
struct iommu_domain *amd_iommu_domain_alloc_sva(struct device *dev,
struct mm_struct *mm);
void amd_iommu_domain_free(struct iommu_domain *dom);
@@ -176,12 +175,11 @@ void amd_iommu_apply_ivrs_quirks(void);
#else
static inline void amd_iommu_apply_ivrs_quirks(void) { }
#endif
+struct dev_table_entry *amd_iommu_get_ivhd_dte_flags(u16 segid, u16 devid);
void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
u64 *root, int mode);
struct dev_table_entry *get_dev_table(struct amd_iommu *iommu);
-
-#endif
-
-struct dev_table_entry *amd_iommu_get_ivhd_dte_flags(u16 segid, u16 devid);
struct iommu_dev_data *search_dev_data(struct amd_iommu *iommu, u16 devid);
+
+#endif /* AMD_IOMMU_H */
diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
index 23caea22f8dc..5089b58e528a 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -112,6 +112,10 @@
#define FEATURE_SNPAVICSUP_GAM(x) \
(FIELD_GET(FEATURE_SNPAVICSUP, x) == 0x1)
+#define FEATURE_NUM_INT_REMAP_SUP GENMASK_ULL(9, 8)
+#define FEATURE_NUM_INT_REMAP_SUP_2K(x) \
+ (FIELD_GET(FEATURE_NUM_INT_REMAP_SUP, x) == 0x1)
+
/* Note:
* The current driver only support 16-bit PASID.
* Currently, hardware only implement upto 16-bit PASID
@@ -175,13 +179,16 @@
#define CONTROL_GAM_EN 25
#define CONTROL_GALOG_EN 28
#define CONTROL_GAINT_EN 29
+#define CONTROL_NUM_INT_REMAP_MODE 43
+#define CONTROL_NUM_INT_REMAP_MODE_MASK 0x03
+#define CONTROL_NUM_INT_REMAP_MODE_2K 0x01
#define CONTROL_EPH_EN 45
#define CONTROL_XT_EN 50
#define CONTROL_INTCAPXT_EN 51
#define CONTROL_IRTCACHEDIS 59
#define CONTROL_SNPAVIC_EN 61
-#define CTRL_INV_TO_MASK (7 << CONTROL_INV_TIMEOUT)
+#define CTRL_INV_TO_MASK 7
#define CTRL_INV_TO_NONE 0
#define CTRL_INV_TO_1MS 1
#define CTRL_INV_TO_10MS 2
@@ -309,15 +316,13 @@
#define DTE_IRQ_REMAP_INTCTL (2ULL << 60)
#define DTE_IRQ_REMAP_ENABLE 1ULL
-/*
- * AMD IOMMU hardware only support 512 IRTEs despite
- * the architectural limitation of 2048 entries.
- */
-#define DTE_INTTAB_ALIGNMENT 128
-#define DTE_INTTABLEN_VALUE 9ULL
-#define DTE_INTTABLEN (DTE_INTTABLEN_VALUE << 1)
#define DTE_INTTABLEN_MASK (0xfULL << 1)
-#define MAX_IRQS_PER_TABLE (1 << DTE_INTTABLEN_VALUE)
+#define DTE_INTTABLEN_VALUE_512 9ULL
+#define DTE_INTTABLEN_512 (DTE_INTTABLEN_VALUE_512 << 1)
+#define MAX_IRQS_PER_TABLE_512 BIT(DTE_INTTABLEN_VALUE_512)
+#define DTE_INTTABLEN_VALUE_2K 11ULL
+#define DTE_INTTABLEN_2K (DTE_INTTABLEN_VALUE_2K << 1)
+#define MAX_IRQS_PER_TABLE_2K BIT(DTE_INTTABLEN_VALUE_2K)
#define PAGE_MODE_NONE 0x00
#define PAGE_MODE_1_LEVEL 0x01
@@ -492,9 +497,6 @@ extern const struct iommu_ops amd_iommu_ops;
/* IVRS indicates that pre-boot remapping was enabled */
extern bool amdr_ivrs_remap_support;
-/* kmem_cache to get tables with 128 byte alignement */
-extern struct kmem_cache *amd_iommu_irq_cache;
-
#define PCI_SBDF_TO_SEGID(sbdf) (((sbdf) >> 16) & 0xffff)
#define PCI_SBDF_TO_DEVID(sbdf) ((sbdf) & 0xffff)
#define PCI_SEG_DEVID_TO_SBDF(seg, devid) ((((u32)(seg) & 0xffff) << 16) | \
@@ -851,6 +853,7 @@ struct iommu_dev_data {
struct device *dev;
u16 devid; /* PCI Device ID */
+ unsigned int max_irqs; /* Maximum IRQs supported by device */
u32 max_pasids; /* Max supported PASIDs */
u32 flags; /* Holds AMD_IOMMU_DEVICE_FLAG_<*> */
int ats_qdep;
@@ -928,9 +931,6 @@ struct unity_map_entry {
* Data structures for device handling
*/
-/* size of the dma_ops aperture as power of 2 */
-extern unsigned amd_iommu_aperture_order;
-
extern bool amd_iommu_force_isolation;
/* Max levels of glxval supported */
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index cb536d372b12..dd9e26b7b718 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -12,7 +12,6 @@
#include <linux/acpi.h>
#include <linux/list.h>
#include <linux/bitmap.h>
-#include <linux/slab.h>
#include <linux/syscore_ops.h>
#include <linux/interrupt.h>
#include <linux/msi.h>
@@ -219,7 +218,6 @@ static bool __initdata cmdline_maps;
static enum iommu_init_state init_state = IOMMU_START_STATE;
static int amd_iommu_enable_interrupts(void);
-static int __init iommu_go_to_state(enum iommu_init_state state);
static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg);
static bool amd_iommu_pre_enabled = true;
@@ -412,33 +410,26 @@ static void iommu_set_device_table(struct amd_iommu *iommu)
&entry, sizeof(entry));
}
-/* Generic functions to enable/disable certain features of the IOMMU. */
-void iommu_feature_enable(struct amd_iommu *iommu, u8 bit)
+static void iommu_feature_set(struct amd_iommu *iommu, u64 val, u64 mask, u8 shift)
{
u64 ctrl;
ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET);
- ctrl |= (1ULL << bit);
+ mask <<= shift;
+ ctrl &= ~mask;
+ ctrl |= (val << shift) & mask;
writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
}
-static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
+/* Generic functions to enable/disable certain features of the IOMMU. */
+void iommu_feature_enable(struct amd_iommu *iommu, u8 bit)
{
- u64 ctrl;
-
- ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET);
- ctrl &= ~(1ULL << bit);
- writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
+ iommu_feature_set(iommu, 1ULL, 1ULL, bit);
}
-static void iommu_set_inv_tlb_timeout(struct amd_iommu *iommu, int timeout)
+static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
{
- u64 ctrl;
-
- ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET);
- ctrl &= ~CTRL_INV_TO_MASK;
- ctrl |= (timeout << CONTROL_INV_TIMEOUT) & CTRL_INV_TO_MASK;
- writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
+ iommu_feature_set(iommu, 0ULL, 1ULL, bit);
}
/* Function to enable the hardware */
@@ -1069,7 +1060,8 @@ static bool __copy_device_table(struct amd_iommu *iommu)
int_tab_len = old_devtb[devid].data[2] & DTE_INTTABLEN_MASK;
if (irq_v && (int_ctl || int_tab_len)) {
if ((int_ctl != DTE_IRQ_REMAP_INTCTL) ||
- (int_tab_len != DTE_INTTABLEN)) {
+ (int_tab_len != DTE_INTTABLEN_512 &&
+ int_tab_len != DTE_INTTABLEN_2K)) {
pr_err("Wrong old irq remapping flag: %#x\n", devid);
memunmap(old_devtb);
return false;
@@ -2652,7 +2644,7 @@ static void iommu_init_flags(struct amd_iommu *iommu)
iommu_feature_enable(iommu, CONTROL_COHERENT_EN);
/* Set IOTLB invalidation timeout to 1s */
- iommu_set_inv_tlb_timeout(iommu, CTRL_INV_TO_1S);
+ iommu_feature_set(iommu, CTRL_INV_TO_1S, CTRL_INV_TO_MASK, CONTROL_INV_TIMEOUT);
/* Enable Enhanced Peripheral Page Request Handling */
if (check_feature(FEATURE_EPHSUP))
@@ -2745,6 +2737,17 @@ static void iommu_enable_irtcachedis(struct amd_iommu *iommu)
iommu->irtcachedis_enabled ? "disabled" : "enabled");
}
+static void iommu_enable_2k_int(struct amd_iommu *iommu)
+{
+ if (!FEATURE_NUM_INT_REMAP_SUP_2K(amd_iommu_efr2))
+ return;
+
+ iommu_feature_set(iommu,
+ CONTROL_NUM_INT_REMAP_MODE_2K,
+ CONTROL_NUM_INT_REMAP_MODE_MASK,
+ CONTROL_NUM_INT_REMAP_MODE);
+}
+
static void early_enable_iommu(struct amd_iommu *iommu)
{
iommu_disable(iommu);
@@ -2757,6 +2760,7 @@ static void early_enable_iommu(struct amd_iommu *iommu)
iommu_enable_ga(iommu);
iommu_enable_xt(iommu);
iommu_enable_irtcachedis(iommu);
+ iommu_enable_2k_int(iommu);
iommu_enable(iommu);
amd_iommu_flush_all_caches(iommu);
}
@@ -2813,6 +2817,7 @@ static void early_enable_iommus(void)
iommu_enable_ga(iommu);
iommu_enable_xt(iommu);
iommu_enable_irtcachedis(iommu);
+ iommu_enable_2k_int(iommu);
iommu_set_device_table(iommu);
amd_iommu_flush_all_caches(iommu);
}
@@ -2939,9 +2944,6 @@ static struct syscore_ops amd_iommu_syscore_ops = {
static void __init free_iommu_resources(void)
{
- kmem_cache_destroy(amd_iommu_irq_cache);
- amd_iommu_irq_cache = NULL;
-
free_iommu_all();
free_pci_segments();
}
@@ -3040,7 +3042,7 @@ static void __init ivinfo_init(void *ivrs)
static int __init early_amd_iommu_init(void)
{
struct acpi_table_header *ivrs_base;
- int remap_cache_sz, ret;
+ int ret;
acpi_status status;
if (!amd_iommu_detected)
@@ -3102,22 +3104,7 @@ static int __init early_amd_iommu_init(void)
if (amd_iommu_irq_remap) {
struct amd_iommu_pci_seg *pci_seg;
- /*
- * Interrupt remapping enabled, create kmem_cache for the
- * remapping tables.
- */
ret = -ENOMEM;
- if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir))
- remap_cache_sz = MAX_IRQS_PER_TABLE * sizeof(u32);
- else
- remap_cache_sz = MAX_IRQS_PER_TABLE * (sizeof(u64) * 2);
- amd_iommu_irq_cache = kmem_cache_create("irq_remap_cache",
- remap_cache_sz,
- DTE_INTTAB_ALIGNMENT,
- 0, NULL);
- if (!amd_iommu_irq_cache)
- goto out;
-
for_each_pci_segment(pci_seg) {
if (alloc_irq_lookup_table(pci_seg))
goto out;
diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
index f3399087859f..26cf562dde11 100644
--- a/drivers/iommu/amd/io_pgtable.c
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -47,13 +47,6 @@ static u64 *first_pte_l7(u64 *pte, unsigned long *page_size,
return fpte;
}
-/****************************************************************************
- *
- * The functions below are used the create the page table mappings for
- * unity mapped regions.
- *
- ****************************************************************************/
-
static void free_pt_page(u64 *pt, struct list_head *freelist)
{
struct page *p = virt_to_page(pt);
diff --git a/drivers/iommu/amd/io_pgtable_v2.c b/drivers/iommu/amd/io_pgtable_v2.c
index c616de2c5926..a56a27396305 100644
--- a/drivers/iommu/amd/io_pgtable_v2.c
+++ b/drivers/iommu/amd/io_pgtable_v2.c
@@ -254,7 +254,7 @@ static int iommu_v2_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
pte = v2_alloc_pte(cfg->amd.nid, pgtable->pgd,
iova, map_size, gfp, &updated);
if (!pte) {
- ret = -EINVAL;
+ ret = -ENOMEM;
goto out;
}
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index cd5116d8c3b2..f34209b08b4c 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -75,8 +75,6 @@ struct iommu_cmd {
*/
DEFINE_IDA(pdom_ids);
-struct kmem_cache *amd_iommu_irq_cache;
-
static int amd_iommu_attach_device(struct iommu_domain *dom,
struct device *dev);
@@ -868,7 +866,7 @@ static void iommu_print_event(struct amd_iommu *iommu, void *__evt)
int type, devid, flags, tag;
volatile u32 *event = __evt;
int count = 0;
- u64 address;
+ u64 address, ctrl;
u32 pasid;
retry:
@@ -878,6 +876,7 @@ retry:
(event[1] & EVENT_DOMID_MASK_LO);
flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK;
address = (u64)(((u64)event[3]) << 32) | event[2];
+ ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET);
if (type == 0) {
/* Did we hit the erratum? */
@@ -899,6 +898,7 @@ retry:
dev_err(dev, "Event logged [ILLEGAL_DEV_TABLE_ENTRY device=%04x:%02x:%02x.%x pasid=0x%05x address=0x%llx flags=0x%04x]\n",
iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
pasid, address, flags);
+ dev_err(dev, "Control Reg : 0x%llx\n", ctrl);
dump_dte_entry(iommu, devid);
break;
case EVENT_TYPE_DEV_TAB_ERR:
@@ -2394,8 +2394,14 @@ static struct iommu_device *amd_iommu_probe_device(struct device *dev)
}
out_err:
+
iommu_completion_wait(iommu);
+ if (FEATURE_NUM_INT_REMAP_SUP_2K(amd_iommu_efr2))
+ dev_data->max_irqs = MAX_IRQS_PER_TABLE_2K;
+ else
+ dev_data->max_irqs = MAX_IRQS_PER_TABLE_512;
+
if (dev_is_pci(dev))
pci_prepare_ats(to_pci_dev(dev), PAGE_SHIFT);
@@ -2432,15 +2438,6 @@ static struct iommu_group *amd_iommu_device_group(struct device *dev)
*
*****************************************************************************/
-void protection_domain_free(struct protection_domain *domain)
-{
- WARN_ON(!list_empty(&domain->dev_list));
- if (domain->domain.type & __IOMMU_DOMAIN_PAGING)
- free_io_pgtable_ops(&domain->iop.pgtbl.ops);
- pdom_id_free(domain->id);
- kfree(domain);
-}
-
static void protection_domain_init(struct protection_domain *domain)
{
spin_lock_init(&domain->lock);
@@ -2578,7 +2575,11 @@ void amd_iommu_domain_free(struct iommu_domain *dom)
{
struct protection_domain *domain = to_pdomain(dom);
- protection_domain_free(domain);
+ WARN_ON(!list_empty(&domain->dev_list));
+ if (domain->domain.type & __IOMMU_DOMAIN_PAGING)
+ free_io_pgtable_ops(&domain->iop.pgtbl.ops);
+ pdom_id_free(domain->id);
+ kfree(domain);
}
static int blocked_domain_attach_device(struct iommu_domain *domain,
@@ -3081,6 +3082,13 @@ out:
raw_spin_unlock_irqrestore(&iommu->lock, flags);
}
+static inline u8 iommu_get_int_tablen(struct iommu_dev_data *dev_data)
+{
+ if (dev_data && dev_data->max_irqs == MAX_IRQS_PER_TABLE_2K)
+ return DTE_INTTABLEN_2K;
+ return DTE_INTTABLEN_512;
+}
+
static void set_dte_irq_entry(struct amd_iommu *iommu, u16 devid,
struct irq_remap_table *table)
{
@@ -3095,7 +3103,7 @@ static void set_dte_irq_entry(struct amd_iommu *iommu, u16 devid,
new &= ~DTE_IRQ_PHYS_ADDR_MASK;
new |= iommu_virt_to_phys(table->table);
new |= DTE_IRQ_REMAP_INTCTL;
- new |= DTE_INTTABLEN;
+ new |= iommu_get_int_tablen(dev_data);
new |= DTE_IRQ_REMAP_ENABLE;
WRITE_ONCE(dte->data[2], new);
@@ -3121,7 +3129,7 @@ static struct irq_remap_table *get_irq_table(struct amd_iommu *iommu, u16 devid)
return table;
}
-static struct irq_remap_table *__alloc_irq_table(void)
+static struct irq_remap_table *__alloc_irq_table(int nid, int order)
{
struct irq_remap_table *table;
@@ -3129,19 +3137,13 @@ static struct irq_remap_table *__alloc_irq_table(void)
if (!table)
return NULL;
- table->table = kmem_cache_alloc(amd_iommu_irq_cache, GFP_KERNEL);
+ table->table = iommu_alloc_pages_node(nid, GFP_KERNEL, order);
if (!table->table) {
kfree(table);
return NULL;
}
raw_spin_lock_init(&table->lock);
- if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir))
- memset(table->table, 0,
- MAX_IRQS_PER_TABLE * sizeof(u32));
- else
- memset(table->table, 0,
- (MAX_IRQS_PER_TABLE * (sizeof(u64) * 2)));
return table;
}
@@ -3173,13 +3175,24 @@ static int set_remap_table_entry_alias(struct pci_dev *pdev, u16 alias,
return 0;
}
+static inline size_t get_irq_table_size(unsigned int max_irqs)
+{
+ if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir))
+ return max_irqs * sizeof(u32);
+
+ return max_irqs * (sizeof(u64) * 2);
+}
+
static struct irq_remap_table *alloc_irq_table(struct amd_iommu *iommu,
- u16 devid, struct pci_dev *pdev)
+ u16 devid, struct pci_dev *pdev,
+ unsigned int max_irqs)
{
struct irq_remap_table *table = NULL;
struct irq_remap_table *new_table = NULL;
struct amd_iommu_pci_seg *pci_seg;
unsigned long flags;
+ int order = get_order(get_irq_table_size(max_irqs));
+ int nid = iommu && iommu->dev ? dev_to_node(&iommu->dev->dev) : NUMA_NO_NODE;
u16 alias;
spin_lock_irqsave(&iommu_table_lock, flags);
@@ -3198,7 +3211,7 @@ static struct irq_remap_table *alloc_irq_table(struct amd_iommu *iommu,
spin_unlock_irqrestore(&iommu_table_lock, flags);
/* Nothing there yet, allocate new irq remapping table */
- new_table = __alloc_irq_table();
+ new_table = __alloc_irq_table(nid, order);
if (!new_table)
return NULL;
@@ -3233,20 +3246,21 @@ out_unlock:
spin_unlock_irqrestore(&iommu_table_lock, flags);
if (new_table) {
- kmem_cache_free(amd_iommu_irq_cache, new_table->table);
+ iommu_free_pages(new_table->table, order);
kfree(new_table);
}
return table;
}
static int alloc_irq_index(struct amd_iommu *iommu, u16 devid, int count,
- bool align, struct pci_dev *pdev)
+ bool align, struct pci_dev *pdev,
+ unsigned long max_irqs)
{
struct irq_remap_table *table;
int index, c, alignment = 1;
unsigned long flags;
- table = alloc_irq_table(iommu, devid, pdev);
+ table = alloc_irq_table(iommu, devid, pdev, max_irqs);
if (!table)
return -ENODEV;
@@ -3257,7 +3271,7 @@ static int alloc_irq_index(struct amd_iommu *iommu, u16 devid, int count,
/* Scan table for free entries */
for (index = ALIGN(table->min_index, alignment), c = 0;
- index < MAX_IRQS_PER_TABLE;) {
+ index < max_irqs;) {
if (!iommu->irte_ops->is_allocated(table, index)) {
c += 1;
} else {
@@ -3527,6 +3541,14 @@ static void fill_msi_msg(struct msi_msg *msg, u32 index)
msg->data = index;
msg->address_lo = 0;
msg->arch_addr_lo.base_address = X86_MSI_BASE_ADDRESS_LOW;
+ /*
+ * The struct msi_msg.dest_mode_logical is used to set the DM bit
+ * in MSI Message Address Register. For device w/ 2K int-remap support,
+ * this is bit must be set to 1 regardless of the actual destination
+ * mode, which is signified by the IRTE[DM].
+ */
+ if (FEATURE_NUM_INT_REMAP_SUP_2K(amd_iommu_efr2))
+ msg->arch_addr_lo.dest_mode_logical = true;
msg->address_hi = X86_MSI_BASE_ADDRESS_HIGH;
}
@@ -3589,6 +3611,8 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq,
struct amd_ir_data *data = NULL;
struct amd_iommu *iommu;
struct irq_cfg *cfg;
+ struct iommu_dev_data *dev_data;
+ unsigned long max_irqs;
int i, ret, devid, seg, sbdf;
int index;
@@ -3607,6 +3631,9 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq,
if (!iommu)
return -EINVAL;
+ dev_data = search_dev_data(iommu, devid);
+ max_irqs = dev_data ? dev_data->max_irqs : MAX_IRQS_PER_TABLE_512;
+
ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
if (ret < 0)
return ret;
@@ -3614,7 +3641,7 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq,
if (info->type == X86_IRQ_ALLOC_TYPE_IOAPIC) {
struct irq_remap_table *table;
- table = alloc_irq_table(iommu, devid, NULL);
+ table = alloc_irq_table(iommu, devid, NULL, max_irqs);
if (table) {
if (!table->min_index) {
/*
@@ -3635,9 +3662,11 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq,
bool align = (info->type == X86_IRQ_ALLOC_TYPE_PCI_MSI);
index = alloc_irq_index(iommu, devid, nr_irqs, align,
- msi_desc_to_pci_dev(info->desc));
+ msi_desc_to_pci_dev(info->desc),
+ max_irqs);
} else {
- index = alloc_irq_index(iommu, devid, nr_irqs, false, NULL);
+ index = alloc_irq_index(iommu, devid, nr_irqs, false, NULL,
+ max_irqs);
}
if (index < 0) {
@@ -3840,6 +3869,9 @@ static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info)
struct irq_2_irte *irte_info = &ir_data->irq_2_irte;
struct iommu_dev_data *dev_data;
+ if (WARN_ON_ONCE(!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)))
+ return -EINVAL;
+
if (ir_data->iommu == NULL)
return -EINVAL;
@@ -3850,21 +3882,11 @@ static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info)
* we should not modify the IRTE
*/
if (!dev_data || !dev_data->use_vapic)
- return 0;
+ return -EINVAL;
ir_data->cfg = irqd_cfg(data);
pi_data->ir_data = ir_data;
- /* Note:
- * SVM tries to set up for VAPIC mode, but we are in
- * legacy mode. So, we force legacy mode instead.
- */
- if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) {
- pr_debug("%s: Fall back to using intr legacy remap\n",
- __func__);
- pi_data->is_guest_mode = false;
- }
-
pi_data->prev_ga_tag = ir_data->cached_ga_tag;
if (pi_data->is_guest_mode) {
ir_data->ga_root_ptr = (pi_data->base >> 12);
diff --git a/drivers/iommu/amd/pasid.c b/drivers/iommu/amd/pasid.c
index 11150cfd6718..77c8e9a91cbc 100644
--- a/drivers/iommu/amd/pasid.c
+++ b/drivers/iommu/amd/pasid.c
@@ -195,7 +195,7 @@ struct iommu_domain *amd_iommu_domain_alloc_sva(struct device *dev,
ret = mmu_notifier_register(&pdom->mn, mm);
if (ret) {
- protection_domain_free(pdom);
+ amd_iommu_domain_free(&pdom->domain);
return ERR_PTR(ret);
}
diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c
index 95ba3caeb401..e13501541fdd 100644
--- a/drivers/iommu/apple-dart.c
+++ b/drivers/iommu/apple-dart.c
@@ -36,7 +36,7 @@
#define DART_MAX_STREAMS 256
#define DART_MAX_TTBR 4
-#define MAX_DARTS_PER_DEVICE 2
+#define MAX_DARTS_PER_DEVICE 3
/* Common registers */
@@ -277,6 +277,9 @@ struct apple_dart_domain {
* @streams: streams for this device
*/
struct apple_dart_master_cfg {
+ /* Intersection of DART capabilitles */
+ u32 supports_bypass : 1;
+
struct apple_dart_stream_map stream_maps[MAX_DARTS_PER_DEVICE];
};
@@ -684,7 +687,7 @@ static int apple_dart_attach_dev_identity(struct iommu_domain *domain,
struct apple_dart_stream_map *stream_map;
int i;
- if (!cfg->stream_maps[0].dart->supports_bypass)
+ if (!cfg->supports_bypass)
return -EINVAL;
for_each_stream_map(i, cfg, stream_map)
@@ -792,20 +795,23 @@ static int apple_dart_of_xlate(struct device *dev,
return -EINVAL;
sid = args->args[0];
- if (!cfg)
+ if (!cfg) {
cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
- if (!cfg)
- return -ENOMEM;
+ if (!cfg)
+ return -ENOMEM;
+ /* Will be ANDed with DART capabilities */
+ cfg->supports_bypass = true;
+ }
dev_iommu_priv_set(dev, cfg);
cfg_dart = cfg->stream_maps[0].dart;
if (cfg_dart) {
- if (cfg_dart->supports_bypass != dart->supports_bypass)
- return -EINVAL;
if (cfg_dart->pgsize != dart->pgsize)
return -EINVAL;
}
+ cfg->supports_bypass &= dart->supports_bypass;
+
for (i = 0; i < MAX_DARTS_PER_DEVICE; ++i) {
if (cfg->stream_maps[i].dart == dart) {
set_bit(sid, cfg->stream_maps[i].sidmap);
@@ -945,7 +951,7 @@ static int apple_dart_def_domain_type(struct device *dev)
if (cfg->stream_maps[0].dart->pgsize > PAGE_SIZE)
return IOMMU_DOMAIN_IDENTITY;
- if (!cfg->stream_maps[0].dart->supports_bypass)
+ if (!cfg->supports_bypass)
return IOMMU_DOMAIN_DMA;
return 0;
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c
index 5aa2e7af58b4..e4fd8d522af8 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c
@@ -43,6 +43,8 @@ static void arm_smmu_make_nested_cd_table_ste(
target->data[0] |= nested_domain->ste[0] &
~cpu_to_le64(STRTAB_STE_0_CFG);
target->data[1] |= nested_domain->ste[1];
+ /* Merge events for DoS mitigations on eventq */
+ target->data[1] |= cpu_to_le64(STRTAB_STE_1_MEV);
}
/*
@@ -85,6 +87,47 @@ static void arm_smmu_make_nested_domain_ste(
}
}
+int arm_smmu_attach_prepare_vmaster(struct arm_smmu_attach_state *state,
+ struct arm_smmu_nested_domain *nested_domain)
+{
+ struct arm_smmu_vmaster *vmaster;
+ unsigned long vsid;
+ int ret;
+
+ iommu_group_mutex_assert(state->master->dev);
+
+ ret = iommufd_viommu_get_vdev_id(&nested_domain->vsmmu->core,
+ state->master->dev, &vsid);
+ if (ret)
+ return ret;
+
+ vmaster = kzalloc(sizeof(*vmaster), GFP_KERNEL);
+ if (!vmaster)
+ return -ENOMEM;
+ vmaster->vsmmu = nested_domain->vsmmu;
+ vmaster->vsid = vsid;
+ state->vmaster = vmaster;
+
+ return 0;
+}
+
+void arm_smmu_attach_commit_vmaster(struct arm_smmu_attach_state *state)
+{
+ struct arm_smmu_master *master = state->master;
+
+ mutex_lock(&master->smmu->streams_mutex);
+ kfree(master->vmaster);
+ master->vmaster = state->vmaster;
+ mutex_unlock(&master->smmu->streams_mutex);
+}
+
+void arm_smmu_master_clear_vmaster(struct arm_smmu_master *master)
+{
+ struct arm_smmu_attach_state state = { .master = master };
+
+ arm_smmu_attach_commit_vmaster(&state);
+}
+
static int arm_smmu_attach_dev_nested(struct iommu_domain *domain,
struct device *dev)
{
@@ -392,4 +435,21 @@ struct iommufd_viommu *arm_vsmmu_alloc(struct device *dev,
return &vsmmu->core;
}
+int arm_vmaster_report_event(struct arm_smmu_vmaster *vmaster, u64 *evt)
+{
+ struct iommu_vevent_arm_smmuv3 vevt;
+ int i;
+
+ lockdep_assert_held(&vmaster->vsmmu->smmu->streams_mutex);
+
+ vevt.evt[0] = cpu_to_le64((evt[0] & ~EVTQ_0_SID) |
+ FIELD_PREP(EVTQ_0_SID, vmaster->vsid));
+ for (i = 1; i < EVTQ_ENT_DWORDS; i++)
+ vevt.evt[i] = cpu_to_le64(evt[i]);
+
+ return iommufd_viommu_report_event(&vmaster->vsmmu->core,
+ IOMMU_VEVENTQ_TYPE_ARM_SMMUV3, &vevt,
+ sizeof(vevt));
+}
+
MODULE_IMPORT_NS("IOMMUFD");
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 358072b4e293..b4c21aaed126 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -1052,7 +1052,7 @@ void arm_smmu_get_ste_used(const __le64 *ent, __le64 *used_bits)
cpu_to_le64(STRTAB_STE_1_S1DSS | STRTAB_STE_1_S1CIR |
STRTAB_STE_1_S1COR | STRTAB_STE_1_S1CSH |
STRTAB_STE_1_S1STALLD | STRTAB_STE_1_STRW |
- STRTAB_STE_1_EATS);
+ STRTAB_STE_1_EATS | STRTAB_STE_1_MEV);
used_bits[2] |= cpu_to_le64(STRTAB_STE_2_S2VMID);
/*
@@ -1068,7 +1068,7 @@ void arm_smmu_get_ste_used(const __le64 *ent, __le64 *used_bits)
if (cfg & BIT(1)) {
used_bits[1] |=
cpu_to_le64(STRTAB_STE_1_S2FWB | STRTAB_STE_1_EATS |
- STRTAB_STE_1_SHCFG);
+ STRTAB_STE_1_SHCFG | STRTAB_STE_1_MEV);
used_bits[2] |=
cpu_to_le64(STRTAB_STE_2_S2VMID | STRTAB_STE_2_VTCR |
STRTAB_STE_2_S2AA64 | STRTAB_STE_2_S2ENDI |
@@ -1813,8 +1813,8 @@ static void arm_smmu_decode_event(struct arm_smmu_device *smmu, u64 *raw,
mutex_unlock(&smmu->streams_mutex);
}
-static int arm_smmu_handle_event(struct arm_smmu_device *smmu,
- struct arm_smmu_event *event)
+static int arm_smmu_handle_event(struct arm_smmu_device *smmu, u64 *evt,
+ struct arm_smmu_event *event)
{
int ret = 0;
u32 perm = 0;
@@ -1823,6 +1823,10 @@ static int arm_smmu_handle_event(struct arm_smmu_device *smmu,
struct iommu_fault *flt = &fault_evt.fault;
switch (event->id) {
+ case EVT_ID_BAD_STE_CONFIG:
+ case EVT_ID_STREAM_DISABLED_FAULT:
+ case EVT_ID_BAD_SUBSTREAMID_CONFIG:
+ case EVT_ID_BAD_CD_CONFIG:
case EVT_ID_TRANSLATION_FAULT:
case EVT_ID_ADDR_SIZE_FAULT:
case EVT_ID_ACCESS_FAULT:
@@ -1832,31 +1836,30 @@ static int arm_smmu_handle_event(struct arm_smmu_device *smmu,
return -EOPNOTSUPP;
}
- if (!event->stall)
- return -EOPNOTSUPP;
-
- if (event->read)
- perm |= IOMMU_FAULT_PERM_READ;
- else
- perm |= IOMMU_FAULT_PERM_WRITE;
+ if (event->stall) {
+ if (event->read)
+ perm |= IOMMU_FAULT_PERM_READ;
+ else
+ perm |= IOMMU_FAULT_PERM_WRITE;
- if (event->instruction)
- perm |= IOMMU_FAULT_PERM_EXEC;
+ if (event->instruction)
+ perm |= IOMMU_FAULT_PERM_EXEC;
- if (event->privileged)
- perm |= IOMMU_FAULT_PERM_PRIV;
+ if (event->privileged)
+ perm |= IOMMU_FAULT_PERM_PRIV;
- flt->type = IOMMU_FAULT_PAGE_REQ;
- flt->prm = (struct iommu_fault_page_request) {
- .flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE,
- .grpid = event->stag,
- .perm = perm,
- .addr = event->iova,
- };
+ flt->type = IOMMU_FAULT_PAGE_REQ;
+ flt->prm = (struct iommu_fault_page_request){
+ .flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE,
+ .grpid = event->stag,
+ .perm = perm,
+ .addr = event->iova,
+ };
- if (event->ssv) {
- flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
- flt->prm.pasid = event->ssid;
+ if (event->ssv) {
+ flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
+ flt->prm.pasid = event->ssid;
+ }
}
mutex_lock(&smmu->streams_mutex);
@@ -1866,7 +1869,12 @@ static int arm_smmu_handle_event(struct arm_smmu_device *smmu,
goto out_unlock;
}
- ret = iommu_report_device_fault(master->dev, &fault_evt);
+ if (event->stall)
+ ret = iommu_report_device_fault(master->dev, &fault_evt);
+ else if (master->vmaster && !event->s2)
+ ret = arm_vmaster_report_event(master->vmaster, evt);
+ else
+ ret = -EOPNOTSUPP; /* Unhandled events should be pinned */
out_unlock:
mutex_unlock(&smmu->streams_mutex);
return ret;
@@ -1944,7 +1952,7 @@ static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
do {
while (!queue_remove_raw(q, evt)) {
arm_smmu_decode_event(smmu, evt, &event);
- if (arm_smmu_handle_event(smmu, &event))
+ if (arm_smmu_handle_event(smmu, evt, &event))
arm_smmu_dump_event(smmu, evt, &event, &rs);
put_device(event.dev);
@@ -2803,6 +2811,7 @@ int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state,
struct arm_smmu_domain *smmu_domain =
to_smmu_domain_devices(new_domain);
unsigned long flags;
+ int ret;
/*
* arm_smmu_share_asid() must not see two domains pointing to the same
@@ -2832,9 +2841,18 @@ int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state,
}
if (smmu_domain) {
+ if (new_domain->type == IOMMU_DOMAIN_NESTED) {
+ ret = arm_smmu_attach_prepare_vmaster(
+ state, to_smmu_nested_domain(new_domain));
+ if (ret)
+ return ret;
+ }
+
master_domain = kzalloc(sizeof(*master_domain), GFP_KERNEL);
- if (!master_domain)
+ if (!master_domain) {
+ kfree(state->vmaster);
return -ENOMEM;
+ }
master_domain->master = master;
master_domain->ssid = state->ssid;
if (new_domain->type == IOMMU_DOMAIN_NESTED)
@@ -2861,6 +2879,7 @@ int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state,
spin_unlock_irqrestore(&smmu_domain->devices_lock,
flags);
kfree(master_domain);
+ kfree(state->vmaster);
return -EINVAL;
}
@@ -2893,6 +2912,8 @@ void arm_smmu_attach_commit(struct arm_smmu_attach_state *state)
lockdep_assert_held(&arm_smmu_asid_lock);
+ arm_smmu_attach_commit_vmaster(state);
+
if (state->ats_enabled && !master->ats_enabled) {
arm_smmu_enable_ats(master);
} else if (state->ats_enabled && master->ats_enabled) {
@@ -3162,6 +3183,7 @@ static int arm_smmu_attach_dev_identity(struct iommu_domain *domain,
struct arm_smmu_ste ste;
struct arm_smmu_master *master = dev_iommu_priv_get(dev);
+ arm_smmu_master_clear_vmaster(master);
arm_smmu_make_bypass_ste(master->smmu, &ste);
arm_smmu_attach_dev_ste(domain, dev, &ste, STRTAB_STE_1_S1DSS_BYPASS);
return 0;
@@ -3180,7 +3202,9 @@ static int arm_smmu_attach_dev_blocked(struct iommu_domain *domain,
struct device *dev)
{
struct arm_smmu_ste ste;
+ struct arm_smmu_master *master = dev_iommu_priv_get(dev);
+ arm_smmu_master_clear_vmaster(master);
arm_smmu_make_abort_ste(&ste);
arm_smmu_attach_dev_ste(domain, dev, &ste,
STRTAB_STE_1_S1DSS_TERMINATE);
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index bd9d7c85576a..dd1ad56ce863 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -266,6 +266,7 @@ static inline u32 arm_smmu_strtab_l2_idx(u32 sid)
#define STRTAB_STE_1_S1COR GENMASK_ULL(5, 4)
#define STRTAB_STE_1_S1CSH GENMASK_ULL(7, 6)
+#define STRTAB_STE_1_MEV (1UL << 19)
#define STRTAB_STE_1_S2FWB (1UL << 25)
#define STRTAB_STE_1_S1STALLD (1UL << 27)
@@ -799,6 +800,11 @@ struct arm_smmu_stream {
struct rb_node node;
};
+struct arm_smmu_vmaster {
+ struct arm_vsmmu *vsmmu;
+ unsigned long vsid;
+};
+
struct arm_smmu_event {
u8 stall : 1,
ssv : 1,
@@ -824,6 +830,7 @@ struct arm_smmu_master {
struct arm_smmu_device *smmu;
struct device *dev;
struct arm_smmu_stream *streams;
+ struct arm_smmu_vmaster *vmaster; /* use smmu->streams_mutex */
/* Locked by the iommu core using the group mutex */
struct arm_smmu_ctx_desc_cfg cd_table;
unsigned int num_streams;
@@ -972,6 +979,7 @@ struct arm_smmu_attach_state {
bool disable_ats;
ioasid_t ssid;
/* Resulting state */
+ struct arm_smmu_vmaster *vmaster;
bool ats_enabled;
};
@@ -1055,9 +1063,37 @@ struct iommufd_viommu *arm_vsmmu_alloc(struct device *dev,
struct iommu_domain *parent,
struct iommufd_ctx *ictx,
unsigned int viommu_type);
+int arm_smmu_attach_prepare_vmaster(struct arm_smmu_attach_state *state,
+ struct arm_smmu_nested_domain *nested_domain);
+void arm_smmu_attach_commit_vmaster(struct arm_smmu_attach_state *state);
+void arm_smmu_master_clear_vmaster(struct arm_smmu_master *master);
+int arm_vmaster_report_event(struct arm_smmu_vmaster *vmaster, u64 *evt);
#else
#define arm_smmu_hw_info NULL
#define arm_vsmmu_alloc NULL
+
+static inline int
+arm_smmu_attach_prepare_vmaster(struct arm_smmu_attach_state *state,
+ struct arm_smmu_nested_domain *nested_domain)
+{
+ return 0;
+}
+
+static inline void
+arm_smmu_attach_commit_vmaster(struct arm_smmu_attach_state *state)
+{
+}
+
+static inline void
+arm_smmu_master_clear_vmaster(struct arm_smmu_master *master)
+{
+}
+
+static inline int arm_vmaster_report_event(struct arm_smmu_vmaster *vmaster,
+ u64 *evt)
+{
+ return -EOPNOTSUPP;
+}
#endif /* CONFIG_ARM_SMMU_V3_IOMMUFD */
#endif /* _ARM_SMMU_V3_H */
diff --git a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c
index d525ab43a4ae..dd7d030d2e89 100644
--- a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c
+++ b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c
@@ -487,17 +487,6 @@ static int tegra241_cmdqv_hw_reset(struct arm_smmu_device *smmu)
/* VCMDQ Resource Helpers */
-static void tegra241_vcmdq_free_smmu_cmdq(struct tegra241_vcmdq *vcmdq)
-{
- struct arm_smmu_queue *q = &vcmdq->cmdq.q;
- size_t nents = 1 << q->llq.max_n_shift;
- size_t qsz = nents << CMDQ_ENT_SZ_SHIFT;
-
- if (!q->base)
- return;
- dmam_free_coherent(vcmdq->cmdqv->smmu.dev, qsz, q->base, q->base_dma);
-}
-
static int tegra241_vcmdq_alloc_smmu_cmdq(struct tegra241_vcmdq *vcmdq)
{
struct arm_smmu_device *smmu = &vcmdq->cmdqv->smmu;
@@ -560,7 +549,8 @@ static void tegra241_vintf_free_lvcmdq(struct tegra241_vintf *vintf, u16 lidx)
struct tegra241_vcmdq *vcmdq = vintf->lvcmdqs[lidx];
char header[64];
- tegra241_vcmdq_free_smmu_cmdq(vcmdq);
+ /* Note that the lvcmdq queue memory space is managed by devres */
+
tegra241_vintf_deinit_lvcmdq(vintf, lidx);
dev_dbg(vintf->cmdqv->dev,
@@ -768,13 +758,13 @@ static int tegra241_cmdqv_init_structures(struct arm_smmu_device *smmu)
vintf = kzalloc(sizeof(*vintf), GFP_KERNEL);
if (!vintf)
- goto out_fallback;
+ return -ENOMEM;
/* Init VINTF0 for in-kernel use */
ret = tegra241_cmdqv_init_vintf(cmdqv, 0, vintf);
if (ret) {
dev_err(cmdqv->dev, "failed to init vintf0: %d\n", ret);
- goto free_vintf;
+ return ret;
}
/* Preallocate logical VCMDQs to VINTF0 */
@@ -783,24 +773,12 @@ static int tegra241_cmdqv_init_structures(struct arm_smmu_device *smmu)
vcmdq = tegra241_vintf_alloc_lvcmdq(vintf, lidx);
if (IS_ERR(vcmdq))
- goto free_lvcmdq;
+ return PTR_ERR(vcmdq);
}
/* Now, we are ready to run all the impl ops */
smmu->impl_ops = &tegra241_cmdqv_impl_ops;
return 0;
-
-free_lvcmdq:
- for (lidx--; lidx >= 0; lidx--)
- tegra241_vintf_free_lvcmdq(vintf, lidx);
- tegra241_cmdqv_deinit_vintf(cmdqv, vintf->idx);
-free_vintf:
- kfree(vintf);
-out_fallback:
- dev_info(smmu->impl_dev, "Falling back to standard SMMU CMDQ\n");
- smmu->options &= ~ARM_SMMU_OPT_TEGRA241_CMDQV;
- tegra241_cmdqv_remove(smmu);
- return 0;
}
#ifdef CONFIG_IOMMU_DEBUGFS
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c
index de205a34ffc6..8f439c265a23 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
@@ -79,8 +79,11 @@ static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu)
static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu)
{
- if (pm_runtime_enabled(smmu->dev))
- pm_runtime_put_autosuspend(smmu->dev);
+ if (pm_runtime_enabled(smmu->dev)) {
+ pm_runtime_mark_last_busy(smmu->dev);
+ __pm_runtime_put_autosuspend(smmu->dev);
+
+ }
}
static void arm_smmu_rpm_use_autosuspend(struct arm_smmu_device *smmu)
@@ -1195,7 +1198,6 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
/* Looks ok, so add the device to the domain */
arm_smmu_master_install_s2crs(cfg, S2CR_TYPE_TRANS,
smmu_domain->cfg.cbndx, fwspec);
- arm_smmu_rpm_use_autosuspend(smmu);
rpm_put:
arm_smmu_rpm_put(smmu);
return ret;
@@ -1218,7 +1220,6 @@ static int arm_smmu_attach_dev_type(struct device *dev,
return ret;
arm_smmu_master_install_s2crs(cfg, type, 0, fwspec);
- arm_smmu_rpm_use_autosuspend(smmu);
arm_smmu_rpm_put(smmu);
return 0;
}
@@ -1486,7 +1487,6 @@ static struct iommu_device *arm_smmu_probe_device(struct device *dev)
out_cfg_free:
kfree(cfg);
out_free:
- iommu_fwspec_free(dev);
return ERR_PTR(ret);
}
@@ -2246,6 +2246,7 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
if (dev->pm_domain) {
pm_runtime_set_active(dev);
pm_runtime_enable(dev);
+ arm_smmu_rpm_use_autosuspend(smmu);
}
return 0;
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 2a9fa0c8cc00..a775e4dbe06f 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -24,6 +24,7 @@
#include <linux/memremap.h>
#include <linux/mm.h>
#include <linux/mutex.h>
+#include <linux/msi.h>
#include <linux/of_iommu.h>
#include <linux/pci.h>
#include <linux/scatterlist.h>
@@ -41,11 +42,6 @@ struct iommu_dma_msi_page {
phys_addr_t phys;
};
-enum iommu_dma_cookie_type {
- IOMMU_DMA_IOVA_COOKIE,
- IOMMU_DMA_MSI_COOKIE,
-};
-
enum iommu_dma_queue_type {
IOMMU_DMA_OPTS_PER_CPU_QUEUE,
IOMMU_DMA_OPTS_SINGLE_QUEUE,
@@ -58,35 +54,30 @@ struct iommu_dma_options {
};
struct iommu_dma_cookie {
- enum iommu_dma_cookie_type type;
+ struct iova_domain iovad;
+ struct list_head msi_page_list;
+ /* Flush queue */
union {
- /* Full allocator for IOMMU_DMA_IOVA_COOKIE */
- struct {
- struct iova_domain iovad;
- /* Flush queue */
- union {
- struct iova_fq *single_fq;
- struct iova_fq __percpu *percpu_fq;
- };
- /* Number of TLB flushes that have been started */
- atomic64_t fq_flush_start_cnt;
- /* Number of TLB flushes that have been finished */
- atomic64_t fq_flush_finish_cnt;
- /* Timer to regularily empty the flush queues */
- struct timer_list fq_timer;
- /* 1 when timer is active, 0 when not */
- atomic_t fq_timer_on;
- };
- /* Trivial linear page allocator for IOMMU_DMA_MSI_COOKIE */
- dma_addr_t msi_iova;
+ struct iova_fq *single_fq;
+ struct iova_fq __percpu *percpu_fq;
};
- struct list_head msi_page_list;
-
+ /* Number of TLB flushes that have been started */
+ atomic64_t fq_flush_start_cnt;
+ /* Number of TLB flushes that have been finished */
+ atomic64_t fq_flush_finish_cnt;
+ /* Timer to regularily empty the flush queues */
+ struct timer_list fq_timer;
+ /* 1 when timer is active, 0 when not */
+ atomic_t fq_timer_on;
/* Domain for flush queue callback; NULL if flush queue not in use */
- struct iommu_domain *fq_domain;
+ struct iommu_domain *fq_domain;
/* Options for dma-iommu use */
- struct iommu_dma_options options;
- struct mutex mutex;
+ struct iommu_dma_options options;
+};
+
+struct iommu_dma_msi_cookie {
+ dma_addr_t msi_iova;
+ struct list_head msi_page_list;
};
static DEFINE_STATIC_KEY_FALSE(iommu_deferred_attach_enabled);
@@ -280,7 +271,7 @@ static void iommu_dma_free_fq(struct iommu_dma_cookie *cookie)
if (!cookie->fq_domain)
return;
- del_timer_sync(&cookie->fq_timer);
+ timer_delete_sync(&cookie->fq_timer);
if (cookie->options.qt == IOMMU_DMA_OPTS_SINGLE_QUEUE)
iommu_dma_free_fq_single(cookie->single_fq);
else
@@ -365,39 +356,24 @@ int iommu_dma_init_fq(struct iommu_domain *domain)
return 0;
}
-static inline size_t cookie_msi_granule(struct iommu_dma_cookie *cookie)
-{
- if (cookie->type == IOMMU_DMA_IOVA_COOKIE)
- return cookie->iovad.granule;
- return PAGE_SIZE;
-}
-
-static struct iommu_dma_cookie *cookie_alloc(enum iommu_dma_cookie_type type)
-{
- struct iommu_dma_cookie *cookie;
-
- cookie = kzalloc(sizeof(*cookie), GFP_KERNEL);
- if (cookie) {
- INIT_LIST_HEAD(&cookie->msi_page_list);
- cookie->type = type;
- }
- return cookie;
-}
-
/**
* iommu_get_dma_cookie - Acquire DMA-API resources for a domain
* @domain: IOMMU domain to prepare for DMA-API usage
*/
int iommu_get_dma_cookie(struct iommu_domain *domain)
{
- if (domain->iova_cookie)
+ struct iommu_dma_cookie *cookie;
+
+ if (domain->cookie_type != IOMMU_COOKIE_NONE)
return -EEXIST;
- domain->iova_cookie = cookie_alloc(IOMMU_DMA_IOVA_COOKIE);
- if (!domain->iova_cookie)
+ cookie = kzalloc(sizeof(*cookie), GFP_KERNEL);
+ if (!cookie)
return -ENOMEM;
- mutex_init(&domain->iova_cookie->mutex);
+ INIT_LIST_HEAD(&cookie->msi_page_list);
+ domain->cookie_type = IOMMU_COOKIE_DMA_IOVA;
+ domain->iova_cookie = cookie;
return 0;
}
@@ -415,48 +391,56 @@ int iommu_get_dma_cookie(struct iommu_domain *domain)
*/
int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base)
{
- struct iommu_dma_cookie *cookie;
+ struct iommu_dma_msi_cookie *cookie;
if (domain->type != IOMMU_DOMAIN_UNMANAGED)
return -EINVAL;
- if (domain->iova_cookie)
+ if (domain->cookie_type != IOMMU_COOKIE_NONE)
return -EEXIST;
- cookie = cookie_alloc(IOMMU_DMA_MSI_COOKIE);
+ cookie = kzalloc(sizeof(*cookie), GFP_KERNEL);
if (!cookie)
return -ENOMEM;
cookie->msi_iova = base;
- domain->iova_cookie = cookie;
+ INIT_LIST_HEAD(&cookie->msi_page_list);
+ domain->cookie_type = IOMMU_COOKIE_DMA_MSI;
+ domain->msi_cookie = cookie;
return 0;
}
EXPORT_SYMBOL(iommu_get_msi_cookie);
/**
* iommu_put_dma_cookie - Release a domain's DMA mapping resources
- * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie() or
- * iommu_get_msi_cookie()
+ * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie()
*/
void iommu_put_dma_cookie(struct iommu_domain *domain)
{
struct iommu_dma_cookie *cookie = domain->iova_cookie;
struct iommu_dma_msi_page *msi, *tmp;
- if (!cookie)
- return;
-
- if (cookie->type == IOMMU_DMA_IOVA_COOKIE && cookie->iovad.granule) {
+ if (cookie->iovad.granule) {
iommu_dma_free_fq(cookie);
put_iova_domain(&cookie->iovad);
}
+ list_for_each_entry_safe(msi, tmp, &cookie->msi_page_list, list)
+ kfree(msi);
+ kfree(cookie);
+}
- list_for_each_entry_safe(msi, tmp, &cookie->msi_page_list, list) {
- list_del(&msi->list);
+/**
+ * iommu_put_msi_cookie - Release a domain's MSI mapping resources
+ * @domain: IOMMU domain previously prepared by iommu_get_msi_cookie()
+ */
+void iommu_put_msi_cookie(struct iommu_domain *domain)
+{
+ struct iommu_dma_msi_cookie *cookie = domain->msi_cookie;
+ struct iommu_dma_msi_page *msi, *tmp;
+
+ list_for_each_entry_safe(msi, tmp, &cookie->msi_page_list, list)
kfree(msi);
- }
kfree(cookie);
- domain->iova_cookie = NULL;
}
/**
@@ -676,7 +660,7 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, struct device *dev
struct iova_domain *iovad;
int ret;
- if (!cookie || cookie->type != IOMMU_DMA_IOVA_COOKIE)
+ if (!cookie || domain->cookie_type != IOMMU_COOKIE_DMA_IOVA)
return -EINVAL;
iovad = &cookie->iovad;
@@ -698,23 +682,20 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, struct device *dev
domain->geometry.aperture_start >> order);
/* start_pfn is always nonzero for an already-initialised domain */
- mutex_lock(&cookie->mutex);
if (iovad->start_pfn) {
if (1UL << order != iovad->granule ||
base_pfn != iovad->start_pfn) {
pr_warn("Incompatible range for DMA domain\n");
- ret = -EFAULT;
- goto done_unlock;
+ return -EFAULT;
}
- ret = 0;
- goto done_unlock;
+ return 0;
}
init_iova_domain(iovad, 1UL << order, base_pfn);
ret = iova_domain_init_rcaches(iovad);
if (ret)
- goto done_unlock;
+ return ret;
iommu_dma_init_options(&cookie->options, dev);
@@ -723,11 +704,7 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, struct device *dev
(!device_iommu_capable(dev, IOMMU_CAP_DEFERRED_FLUSH) || iommu_dma_init_fq(domain)))
domain->type = IOMMU_DOMAIN_DMA;
- ret = iova_reserve_iommu_regions(dev, domain);
-
-done_unlock:
- mutex_unlock(&cookie->mutex);
- return ret;
+ return iova_reserve_iommu_regions(dev, domain);
}
/**
@@ -766,9 +743,9 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,
struct iova_domain *iovad = &cookie->iovad;
unsigned long shift, iova_len, iova;
- if (cookie->type == IOMMU_DMA_MSI_COOKIE) {
- cookie->msi_iova += size;
- return cookie->msi_iova - size;
+ if (domain->cookie_type == IOMMU_COOKIE_DMA_MSI) {
+ domain->msi_cookie->msi_iova += size;
+ return domain->msi_cookie->msi_iova - size;
}
shift = iova_shift(iovad);
@@ -805,16 +782,16 @@ done:
return (dma_addr_t)iova << shift;
}
-static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
- dma_addr_t iova, size_t size, struct iommu_iotlb_gather *gather)
+static void iommu_dma_free_iova(struct iommu_domain *domain, dma_addr_t iova,
+ size_t size, struct iommu_iotlb_gather *gather)
{
- struct iova_domain *iovad = &cookie->iovad;
+ struct iova_domain *iovad = &domain->iova_cookie->iovad;
/* The MSI case is only ever cleaning up its most recent allocation */
- if (cookie->type == IOMMU_DMA_MSI_COOKIE)
- cookie->msi_iova -= size;
+ if (domain->cookie_type == IOMMU_COOKIE_DMA_MSI)
+ domain->msi_cookie->msi_iova -= size;
else if (gather && gather->queued)
- queue_iova(cookie, iova_pfn(iovad, iova),
+ queue_iova(domain->iova_cookie, iova_pfn(iovad, iova),
size >> iova_shift(iovad),
&gather->freelist);
else
@@ -842,7 +819,7 @@ static void __iommu_dma_unmap(struct device *dev, dma_addr_t dma_addr,
if (!iotlb_gather.queued)
iommu_iotlb_sync(domain, &iotlb_gather);
- iommu_dma_free_iova(cookie, dma_addr, size, &iotlb_gather);
+ iommu_dma_free_iova(domain, dma_addr, size, &iotlb_gather);
}
static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
@@ -870,7 +847,7 @@ static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
return DMA_MAPPING_ERROR;
if (iommu_map(domain, iova, phys - iova_off, size, prot, GFP_ATOMIC)) {
- iommu_dma_free_iova(cookie, iova, size, NULL);
+ iommu_dma_free_iova(domain, iova, size, NULL);
return DMA_MAPPING_ERROR;
}
return iova + iova_off;
@@ -1007,7 +984,7 @@ static struct page **__iommu_dma_alloc_noncontiguous(struct device *dev,
out_free_sg:
sg_free_table(sgt);
out_free_iova:
- iommu_dma_free_iova(cookie, iova, size, NULL);
+ iommu_dma_free_iova(domain, iova, size, NULL);
out_free_pages:
__iommu_dma_free_pages(pages, count);
return NULL;
@@ -1484,7 +1461,7 @@ int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
return __finalise_sg(dev, sg, nents, iova);
out_free_iova:
- iommu_dma_free_iova(cookie, iova, iova_len, NULL);
+ iommu_dma_free_iova(domain, iova, iova_len, NULL);
out_restore_sg:
__invalidate_sg(sg, nents);
out:
@@ -1762,17 +1739,47 @@ out_err:
dev->dma_iommu = false;
}
+static bool has_msi_cookie(const struct iommu_domain *domain)
+{
+ return domain && (domain->cookie_type == IOMMU_COOKIE_DMA_IOVA ||
+ domain->cookie_type == IOMMU_COOKIE_DMA_MSI);
+}
+
+static size_t cookie_msi_granule(const struct iommu_domain *domain)
+{
+ switch (domain->cookie_type) {
+ case IOMMU_COOKIE_DMA_IOVA:
+ return domain->iova_cookie->iovad.granule;
+ case IOMMU_COOKIE_DMA_MSI:
+ return PAGE_SIZE;
+ default:
+ BUG();
+ }
+}
+
+static struct list_head *cookie_msi_pages(const struct iommu_domain *domain)
+{
+ switch (domain->cookie_type) {
+ case IOMMU_COOKIE_DMA_IOVA:
+ return &domain->iova_cookie->msi_page_list;
+ case IOMMU_COOKIE_DMA_MSI:
+ return &domain->msi_cookie->msi_page_list;
+ default:
+ BUG();
+ }
+}
+
static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
phys_addr_t msi_addr, struct iommu_domain *domain)
{
- struct iommu_dma_cookie *cookie = domain->iova_cookie;
+ struct list_head *msi_page_list = cookie_msi_pages(domain);
struct iommu_dma_msi_page *msi_page;
dma_addr_t iova;
int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
- size_t size = cookie_msi_granule(cookie);
+ size_t size = cookie_msi_granule(domain);
msi_addr &= ~(phys_addr_t)(size - 1);
- list_for_each_entry(msi_page, &cookie->msi_page_list, list)
+ list_for_each_entry(msi_page, msi_page_list, list)
if (msi_page->phys == msi_addr)
return msi_page;
@@ -1790,70 +1797,35 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
INIT_LIST_HEAD(&msi_page->list);
msi_page->phys = msi_addr;
msi_page->iova = iova;
- list_add(&msi_page->list, &cookie->msi_page_list);
+ list_add(&msi_page->list, msi_page_list);
return msi_page;
out_free_iova:
- iommu_dma_free_iova(cookie, iova, size, NULL);
+ iommu_dma_free_iova(domain, iova, size, NULL);
out_free_page:
kfree(msi_page);
return NULL;
}
-/**
- * iommu_dma_prepare_msi() - Map the MSI page in the IOMMU domain
- * @desc: MSI descriptor, will store the MSI page
- * @msi_addr: MSI target address to be mapped
- *
- * Return: 0 on success or negative error code if the mapping failed.
- */
-int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr)
+int iommu_dma_sw_msi(struct iommu_domain *domain, struct msi_desc *desc,
+ phys_addr_t msi_addr)
{
struct device *dev = msi_desc_to_dev(desc);
- struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
- struct iommu_dma_msi_page *msi_page;
- static DEFINE_MUTEX(msi_prepare_lock); /* see below */
+ const struct iommu_dma_msi_page *msi_page;
- if (!domain || !domain->iova_cookie) {
- desc->iommu_cookie = NULL;
+ if (!has_msi_cookie(domain)) {
+ msi_desc_set_iommu_msi_iova(desc, 0, 0);
return 0;
}
- /*
- * In fact the whole prepare operation should already be serialised by
- * irq_domain_mutex further up the callchain, but that's pretty subtle
- * on its own, so consider this locking as failsafe documentation...
- */
- mutex_lock(&msi_prepare_lock);
+ iommu_group_mutex_assert(dev);
msi_page = iommu_dma_get_msi_page(dev, msi_addr, domain);
- mutex_unlock(&msi_prepare_lock);
-
- msi_desc_set_iommu_cookie(desc, msi_page);
-
if (!msi_page)
return -ENOMEM;
- return 0;
-}
-/**
- * iommu_dma_compose_msi_msg() - Apply translation to an MSI message
- * @desc: MSI descriptor prepared by iommu_dma_prepare_msi()
- * @msg: MSI message containing target physical address
- */
-void iommu_dma_compose_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
-{
- struct device *dev = msi_desc_to_dev(desc);
- const struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
- const struct iommu_dma_msi_page *msi_page;
-
- msi_page = msi_desc_get_iommu_cookie(desc);
-
- if (!domain || !domain->iova_cookie || WARN_ON(!msi_page))
- return;
-
- msg->address_hi = upper_32_bits(msi_page->iova);
- msg->address_lo &= cookie_msi_granule(domain->iova_cookie) - 1;
- msg->address_lo += lower_32_bits(msi_page->iova);
+ msi_desc_set_iommu_msi_iova(desc, msi_page->iova,
+ ilog2(cookie_msi_granule(domain)));
+ return 0;
}
static int iommu_dma_init(void)
diff --git a/drivers/iommu/dma-iommu.h b/drivers/iommu/dma-iommu.h
index c12d63457c76..eca201c1f963 100644
--- a/drivers/iommu/dma-iommu.h
+++ b/drivers/iommu/dma-iommu.h
@@ -13,11 +13,15 @@ void iommu_setup_dma_ops(struct device *dev);
int iommu_get_dma_cookie(struct iommu_domain *domain);
void iommu_put_dma_cookie(struct iommu_domain *domain);
+void iommu_put_msi_cookie(struct iommu_domain *domain);
int iommu_dma_init_fq(struct iommu_domain *domain);
void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list);
+int iommu_dma_sw_msi(struct iommu_domain *domain, struct msi_desc *desc,
+ phys_addr_t msi_addr);
+
extern bool iommu_dma_forcedac;
#else /* CONFIG_IOMMU_DMA */
@@ -40,9 +44,19 @@ static inline void iommu_put_dma_cookie(struct iommu_domain *domain)
{
}
+static inline void iommu_put_msi_cookie(struct iommu_domain *domain)
+{
+}
+
static inline void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list)
{
}
+static inline int iommu_dma_sw_msi(struct iommu_domain *domain,
+ struct msi_desc *desc, phys_addr_t msi_addr)
+{
+ return -ENODEV;
+}
+
#endif /* CONFIG_IOMMU_DMA */
#endif /* __DMA_IOMMU_H */
diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index 69e23e017d9e..317266aca6e2 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -832,7 +832,7 @@ static int __maybe_unused exynos_sysmmu_suspend(struct device *dev)
struct exynos_iommu_owner *owner = dev_iommu_priv_get(master);
mutex_lock(&owner->rpm_lock);
- if (&data->domain->domain != &exynos_identity_domain) {
+ if (data->domain) {
dev_dbg(data->sysmmu, "saving state\n");
__sysmmu_disable(data);
}
@@ -850,7 +850,7 @@ static int __maybe_unused exynos_sysmmu_resume(struct device *dev)
struct exynos_iommu_owner *owner = dev_iommu_priv_get(master);
mutex_lock(&owner->rpm_lock);
- if (&data->domain->domain != &exynos_identity_domain) {
+ if (data->domain) {
dev_dbg(data->sysmmu, "restoring state\n");
__sysmmu_enable(data);
}
diff --git a/drivers/iommu/hyperv-iommu.c b/drivers/iommu/hyperv-iommu.c
index 2a86aa5d54c6..761ab647f372 100644
--- a/drivers/iommu/hyperv-iommu.c
+++ b/drivers/iommu/hyperv-iommu.c
@@ -130,7 +130,7 @@ static int __init hyperv_prepare_irq_remapping(void)
x86_init.hyper.msi_ext_dest_id())
return -ENODEV;
- if (hv_root_partition) {
+ if (hv_root_partition()) {
name = "HYPERV-ROOT-IR";
ops = &hyperv_root_ir_domain_ops;
} else {
@@ -151,7 +151,7 @@ static int __init hyperv_prepare_irq_remapping(void)
return -ENOMEM;
}
- if (hv_root_partition)
+ if (hv_root_partition())
return 0; /* The rest is only relevant to guests */
/*
@@ -217,7 +217,7 @@ hyperv_root_ir_compose_msi_msg(struct irq_data *irq_data, struct msi_msg *msg)
status = hv_unmap_ioapic_interrupt(ioapic_id, &entry);
if (status != HV_STATUS_SUCCESS)
- pr_debug("%s: unexpected unmap status %lld\n", __func__, status);
+ hv_status_debug(status, "failed to unmap\n");
data->entry.ioapic_rte.as_uint64 = 0;
data->entry.source = 0; /* Invalid source */
@@ -228,7 +228,7 @@ hyperv_root_ir_compose_msi_msg(struct irq_data *irq_data, struct msi_msg *msg)
vector, &entry);
if (status != HV_STATUS_SUCCESS) {
- pr_err("%s: map hypercall failed, status %lld\n", __func__, status);
+ hv_status_err(status, "map failed\n");
return;
}
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index bf1f0c814348..b29da2d96d0b 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -737,7 +737,8 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
return NULL;
domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
- pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
+ pteval = virt_to_phys(tmp_page) | DMA_PTE_READ |
+ DMA_PTE_WRITE;
if (domain->use_first_level)
pteval |= DMA_FL_PTE_US | DMA_FL_PTE_ACCESS;
@@ -1172,32 +1173,59 @@ static bool dev_needs_extra_dtlb_flush(struct pci_dev *pdev)
return true;
}
-static void iommu_enable_pci_caps(struct device_domain_info *info)
+static void iommu_enable_pci_ats(struct device_domain_info *info)
{
struct pci_dev *pdev;
- if (!dev_is_pci(info->dev))
+ if (!info->ats_supported)
return;
pdev = to_pci_dev(info->dev);
- if (info->ats_supported && pci_ats_page_aligned(pdev) &&
- !pci_enable_ats(pdev, VTD_PAGE_SHIFT))
+ if (!pci_ats_page_aligned(pdev))
+ return;
+
+ if (!pci_enable_ats(pdev, VTD_PAGE_SHIFT))
info->ats_enabled = 1;
}
-static void iommu_disable_pci_caps(struct device_domain_info *info)
+static void iommu_disable_pci_ats(struct device_domain_info *info)
+{
+ if (!info->ats_enabled)
+ return;
+
+ pci_disable_ats(to_pci_dev(info->dev));
+ info->ats_enabled = 0;
+}
+
+static void iommu_enable_pci_pri(struct device_domain_info *info)
{
struct pci_dev *pdev;
- if (!dev_is_pci(info->dev))
+ if (!info->ats_enabled || !info->pri_supported)
return;
pdev = to_pci_dev(info->dev);
+ /* PASID is required in PRG Response Message. */
+ if (info->pasid_enabled && !pci_prg_resp_pasid_required(pdev))
+ return;
- if (info->ats_enabled) {
- pci_disable_ats(pdev);
- info->ats_enabled = 0;
- }
+ if (pci_reset_pri(pdev))
+ return;
+
+ if (!pci_enable_pri(pdev, PRQ_DEPTH))
+ info->pri_enabled = 1;
+}
+
+static void iommu_disable_pci_pri(struct device_domain_info *info)
+{
+ if (!info->pri_enabled)
+ return;
+
+ if (WARN_ON(info->iopf_refcount))
+ iopf_queue_remove_device(info->iommu->iopf_queue, info->dev);
+
+ pci_disable_pri(to_pci_dev(info->dev));
+ info->pri_enabled = 0;
}
static void intel_flush_iotlb_all(struct iommu_domain *domain)
@@ -1556,12 +1584,19 @@ domain_context_mapping(struct dmar_domain *domain, struct device *dev)
struct device_domain_info *info = dev_iommu_priv_get(dev);
struct intel_iommu *iommu = info->iommu;
u8 bus = info->bus, devfn = info->devfn;
+ int ret;
if (!dev_is_pci(dev))
return domain_context_mapping_one(domain, iommu, bus, devfn);
- return pci_for_each_dma_alias(to_pci_dev(dev),
- domain_context_mapping_cb, domain);
+ ret = pci_for_each_dma_alias(to_pci_dev(dev),
+ domain_context_mapping_cb, domain);
+ if (ret)
+ return ret;
+
+ iommu_enable_pci_ats(info);
+
+ return 0;
}
/* Return largest possible superpage level for a given mapping */
@@ -1748,7 +1783,7 @@ static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8
context_clear_entry(context);
__iommu_flush_cache(iommu, context, sizeof(*context));
spin_unlock(&iommu->lock);
- intel_context_flush_present(info, context, did, true);
+ intel_context_flush_no_pasid(info, context, did);
}
int __domain_setup_first_level(struct intel_iommu *iommu,
@@ -1843,8 +1878,6 @@ static int dmar_domain_attach_device(struct dmar_domain *domain,
if (ret)
goto out_block_translation;
- iommu_enable_pci_caps(info);
-
ret = cache_tag_assign_domain(domain, dev, IOMMU_NO_PASID);
if (ret)
goto out_block_translation;
@@ -2871,16 +2904,19 @@ void intel_iommu_shutdown(void)
if (no_iommu || dmar_disabled)
return;
- down_write(&dmar_global_lock);
+ /*
+ * All other CPUs were brought down, hotplug interrupts were disabled,
+ * no lock and RCU checking needed anymore
+ */
+ list_for_each_entry(drhd, &dmar_drhd_units, list) {
+ iommu = drhd->iommu;
- /* Disable PMRs explicitly here. */
- for_each_iommu(iommu, drhd)
+ /* Disable PMRs explicitly here. */
iommu_disable_protect_mem_regions(iommu);
- /* Make sure the IOMMUs are switched off */
- intel_disable_iommus();
-
- up_write(&dmar_global_lock);
+ /* Make sure the IOMMUs are switched off */
+ iommu_disable_translation(iommu);
+ }
}
static struct intel_iommu *dev_to_intel_iommu(struct device *dev)
@@ -3013,6 +3049,7 @@ static int __init probe_acpi_namespace_devices(void)
if (dev->bus != &acpi_bus_type)
continue;
+ up_read(&dmar_global_lock);
adev = to_acpi_device(dev);
mutex_lock(&adev->physical_node_lock);
list_for_each_entry(pn,
@@ -3022,6 +3059,7 @@ static int __init probe_acpi_namespace_devices(void)
break;
}
mutex_unlock(&adev->physical_node_lock);
+ down_read(&dmar_global_lock);
if (ret)
return ret;
@@ -3205,6 +3243,7 @@ static void domain_context_clear(struct device_domain_info *info)
pci_for_each_dma_alias(to_pci_dev(info->dev),
&domain_context_clear_one_cb, info);
+ iommu_disable_pci_ats(info);
}
/*
@@ -3221,7 +3260,6 @@ void device_block_translation(struct device *dev)
if (info->domain)
cache_tag_unassign_domain(info->domain, dev, IOMMU_NO_PASID);
- iommu_disable_pci_caps(info);
if (!dev_is_real_dma_subdevice(dev)) {
if (sm_supported(iommu))
intel_pasid_tear_down_entry(iommu, dev,
@@ -3345,7 +3383,8 @@ intel_iommu_domain_alloc_paging_flags(struct device *dev, u32 flags,
bool first_stage;
if (flags &
- (~(IOMMU_HWPT_ALLOC_NEST_PARENT | IOMMU_HWPT_ALLOC_DIRTY_TRACKING)))
+ (~(IOMMU_HWPT_ALLOC_NEST_PARENT | IOMMU_HWPT_ALLOC_DIRTY_TRACKING |
+ IOMMU_HWPT_ALLOC_PASID)))
return ERR_PTR(-EOPNOTSUPP);
if (nested_parent && !nested_supported(iommu))
return ERR_PTR(-EOPNOTSUPP);
@@ -3756,6 +3795,10 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev)
!pci_enable_pasid(pdev, info->pasid_supported & ~1))
info->pasid_enabled = 1;
+ if (sm_supported(iommu))
+ iommu_enable_pci_ats(info);
+ iommu_enable_pci_pri(info);
+
return &iommu->iommu;
free_table:
intel_pasid_free_table(dev);
@@ -3772,6 +3815,9 @@ static void intel_iommu_release_device(struct device *dev)
struct device_domain_info *info = dev_iommu_priv_get(dev);
struct intel_iommu *iommu = info->iommu;
+ iommu_disable_pci_pri(info);
+ iommu_disable_pci_ats(info);
+
if (info->pasid_enabled) {
pci_disable_pasid(to_pci_dev(dev));
info->pasid_enabled = 0;
@@ -3789,7 +3835,6 @@ static void intel_iommu_release_device(struct device *dev)
intel_pasid_free_table(dev);
intel_iommu_debugfs_remove_dev(info);
kfree(info);
- set_dma_ops(dev, NULL);
}
static void intel_iommu_get_resv_regions(struct device *device,
@@ -3858,151 +3903,41 @@ static struct iommu_group *intel_iommu_device_group(struct device *dev)
return generic_device_group(dev);
}
-static int intel_iommu_enable_sva(struct device *dev)
+int intel_iommu_enable_iopf(struct device *dev)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
- struct intel_iommu *iommu;
-
- if (!info || dmar_disabled)
- return -EINVAL;
-
- iommu = info->iommu;
- if (!iommu)
- return -EINVAL;
-
- if (!(iommu->flags & VTD_FLAG_SVM_CAPABLE))
- return -ENODEV;
-
- if (!info->pasid_enabled || !info->ats_enabled)
- return -EINVAL;
-
- /*
- * Devices having device-specific I/O fault handling should not
- * support PCI/PRI. The IOMMU side has no means to check the
- * capability of device-specific IOPF. Therefore, IOMMU can only
- * default that if the device driver enables SVA on a non-PRI
- * device, it will handle IOPF in its own way.
- */
- if (!info->pri_supported)
- return 0;
-
- /* Devices supporting PRI should have it enabled. */
- if (!info->pri_enabled)
- return -EINVAL;
-
- return 0;
-}
-
-static int context_flip_pri(struct device_domain_info *info, bool enable)
-{
struct intel_iommu *iommu = info->iommu;
- u8 bus = info->bus, devfn = info->devfn;
- struct context_entry *context;
- u16 did;
-
- spin_lock(&iommu->lock);
- if (context_copied(iommu, bus, devfn)) {
- spin_unlock(&iommu->lock);
- return -EINVAL;
- }
-
- context = iommu_context_addr(iommu, bus, devfn, false);
- if (!context || !context_present(context)) {
- spin_unlock(&iommu->lock);
- return -ENODEV;
- }
- did = context_domain_id(context);
-
- if (enable)
- context_set_sm_pre(context);
- else
- context_clear_sm_pre(context);
-
- if (!ecap_coherent(iommu->ecap))
- clflush_cache_range(context, sizeof(*context));
- intel_context_flush_present(info, context, did, true);
- spin_unlock(&iommu->lock);
-
- return 0;
-}
-
-static int intel_iommu_enable_iopf(struct device *dev)
-{
- struct pci_dev *pdev = dev_is_pci(dev) ? to_pci_dev(dev) : NULL;
- struct device_domain_info *info = dev_iommu_priv_get(dev);
- struct intel_iommu *iommu;
int ret;
- if (!pdev || !info || !info->ats_enabled || !info->pri_supported)
+ if (!info->pri_enabled)
return -ENODEV;
- if (info->pri_enabled)
- return -EBUSY;
-
- iommu = info->iommu;
- if (!iommu)
- return -EINVAL;
-
- /* PASID is required in PRG Response Message. */
- if (info->pasid_enabled && !pci_prg_resp_pasid_required(pdev))
- return -EINVAL;
-
- ret = pci_reset_pri(pdev);
- if (ret)
- return ret;
+ if (info->iopf_refcount) {
+ info->iopf_refcount++;
+ return 0;
+ }
ret = iopf_queue_add_device(iommu->iopf_queue, dev);
if (ret)
return ret;
- ret = context_flip_pri(info, true);
- if (ret)
- goto err_remove_device;
-
- ret = pci_enable_pri(pdev, PRQ_DEPTH);
- if (ret)
- goto err_clear_pri;
-
- info->pri_enabled = 1;
+ info->iopf_refcount = 1;
return 0;
-err_clear_pri:
- context_flip_pri(info, false);
-err_remove_device:
- iopf_queue_remove_device(iommu->iopf_queue, dev);
-
- return ret;
}
-static int intel_iommu_disable_iopf(struct device *dev)
+void intel_iommu_disable_iopf(struct device *dev)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
struct intel_iommu *iommu = info->iommu;
- if (!info->pri_enabled)
- return -EINVAL;
+ if (WARN_ON(!info->pri_enabled || !info->iopf_refcount))
+ return;
- /* Disable new PRI reception: */
- context_flip_pri(info, false);
+ if (--info->iopf_refcount)
+ return;
- /*
- * Remove device from fault queue and acknowledge all outstanding
- * PRQs to the device:
- */
iopf_queue_remove_device(iommu->iopf_queue, dev);
-
- /*
- * PCIe spec states that by clearing PRI enable bit, the Page
- * Request Interface will not issue new page requests, but has
- * outstanding page requests that have been transmitted or are
- * queued for transmission. This is supposed to be called after
- * the device driver has stopped DMA, all PASIDs have been
- * unbound and the outstanding PRQs have been drained.
- */
- pci_disable_pri(to_pci_dev(dev));
- info->pri_enabled = 0;
-
- return 0;
}
static int
@@ -4013,7 +3948,7 @@ intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat)
return intel_iommu_enable_iopf(dev);
case IOMMU_DEV_FEAT_SVA:
- return intel_iommu_enable_sva(dev);
+ return 0;
default:
return -ENODEV;
@@ -4025,7 +3960,8 @@ intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat)
{
switch (feat) {
case IOMMU_DEV_FEAT_IOPF:
- return intel_iommu_disable_iopf(dev);
+ intel_iommu_disable_iopf(dev);
+ return 0;
case IOMMU_DEV_FEAT_SVA:
return 0;
@@ -4410,13 +4346,10 @@ static int identity_domain_attach_dev(struct iommu_domain *domain, struct device
if (dev_is_real_dma_subdevice(dev))
return 0;
- if (sm_supported(iommu)) {
+ if (sm_supported(iommu))
ret = intel_pasid_setup_pass_through(iommu, dev, IOMMU_NO_PASID);
- if (!ret)
- iommu_enable_pci_caps(info);
- } else {
+ else
ret = device_setup_pass_through(dev);
- }
return ret;
}
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index 6ea7bbe26b19..c4916886da5a 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -774,6 +774,7 @@ struct device_domain_info {
u8 ats_enabled:1;
u8 dtlb_extra_inval:1; /* Quirk for devices need extra flush */
u8 ats_qdep;
+ unsigned int iopf_refcount;
struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
struct intel_iommu *iommu; /* IOMMU used by this device */
struct dmar_domain *domain; /* pointer to domain */
@@ -953,25 +954,6 @@ static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
return 1UL << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
}
-/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
- are never going to work. */
-static inline unsigned long mm_to_dma_pfn_start(unsigned long mm_pfn)
-{
- return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
-}
-static inline unsigned long mm_to_dma_pfn_end(unsigned long mm_pfn)
-{
- return ((mm_pfn + 1) << (PAGE_SHIFT - VTD_PAGE_SHIFT)) - 1;
-}
-static inline unsigned long page_to_dma_pfn(struct page *pg)
-{
- return mm_to_dma_pfn_start(page_to_pfn(pg));
-}
-static inline unsigned long virt_to_dma_pfn(void *p)
-{
- return page_to_dma_pfn(virt_to_page(p));
-}
-
static inline void context_set_present(struct context_entry *context)
{
context->lo |= 1;
@@ -1304,9 +1286,8 @@ void cache_tag_flush_all(struct dmar_domain *domain);
void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start,
unsigned long end);
-void intel_context_flush_present(struct device_domain_info *info,
- struct context_entry *context,
- u16 did, bool affect_domains);
+void intel_context_flush_no_pasid(struct device_domain_info *info,
+ struct context_entry *context, u16 did);
int intel_iommu_enable_prq(struct intel_iommu *iommu);
int intel_iommu_finish_prq(struct intel_iommu *iommu);
@@ -1314,6 +1295,9 @@ void intel_iommu_page_response(struct device *dev, struct iopf_fault *evt,
struct iommu_page_response *msg);
void intel_iommu_drain_pasid_prq(struct device *dev, u32 pasid);
+int intel_iommu_enable_iopf(struct device *dev);
+void intel_iommu_disable_iopf(struct device *dev);
+
#ifdef CONFIG_INTEL_IOMMU_SVM
void intel_svm_check(struct intel_iommu *iommu);
struct iommu_domain *intel_svm_domain_alloc(struct device *dev,
diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c
index ad795c772f21..3bc2a03cceca 100644
--- a/drivers/iommu/intel/irq_remapping.c
+++ b/drivers/iommu/intel/irq_remapping.c
@@ -25,11 +25,6 @@
#include "../irq_remapping.h"
#include "../iommu-pages.h"
-enum irq_mode {
- IRQ_REMAPPING,
- IRQ_POSTING,
-};
-
struct ioapic_scope {
struct intel_iommu *iommu;
unsigned int id;
@@ -49,8 +44,8 @@ struct irq_2_iommu {
u16 irte_index;
u16 sub_handle;
u8 irte_mask;
- enum irq_mode mode;
bool posted_msi;
+ bool posted_vcpu;
};
struct intel_ir_data {
@@ -138,7 +133,6 @@ static int alloc_irte(struct intel_iommu *iommu,
irq_iommu->irte_index = index;
irq_iommu->sub_handle = 0;
irq_iommu->irte_mask = mask;
- irq_iommu->mode = IRQ_REMAPPING;
}
raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
@@ -193,8 +187,6 @@ static int modify_irte(struct irq_2_iommu *irq_iommu,
rc = qi_flush_iec(iommu, index, 0);
- /* Update iommu mode according to the IRTE mode */
- irq_iommu->mode = irte->pst ? IRQ_POSTING : IRQ_REMAPPING;
raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
return rc;
@@ -1169,7 +1161,26 @@ static void intel_ir_reconfigure_irte_posted(struct irq_data *irqd)
static inline void intel_ir_reconfigure_irte_posted(struct irq_data *irqd) {}
#endif
-static void intel_ir_reconfigure_irte(struct irq_data *irqd, bool force)
+static void __intel_ir_reconfigure_irte(struct irq_data *irqd, bool force_host)
+{
+ struct intel_ir_data *ir_data = irqd->chip_data;
+
+ /*
+ * Don't modify IRTEs for IRQs that are being posted to vCPUs if the
+ * host CPU affinity changes.
+ */
+ if (ir_data->irq_2_iommu.posted_vcpu && !force_host)
+ return;
+
+ ir_data->irq_2_iommu.posted_vcpu = false;
+
+ if (ir_data->irq_2_iommu.posted_msi)
+ intel_ir_reconfigure_irte_posted(irqd);
+ else
+ modify_irte(&ir_data->irq_2_iommu, &ir_data->irte_entry);
+}
+
+static void intel_ir_reconfigure_irte(struct irq_data *irqd, bool force_host)
{
struct intel_ir_data *ir_data = irqd->chip_data;
struct irte *irte = &ir_data->irte_entry;
@@ -1182,10 +1193,7 @@ static void intel_ir_reconfigure_irte(struct irq_data *irqd, bool force)
irte->vector = cfg->vector;
irte->dest_id = IRTE_DEST(cfg->dest_apicid);
- if (ir_data->irq_2_iommu.posted_msi)
- intel_ir_reconfigure_irte_posted(irqd);
- else if (force || ir_data->irq_2_iommu.mode == IRQ_REMAPPING)
- modify_irte(&ir_data->irq_2_iommu, irte);
+ __intel_ir_reconfigure_irte(irqd, force_host);
}
/*
@@ -1240,7 +1248,7 @@ static int intel_ir_set_vcpu_affinity(struct irq_data *data, void *info)
/* stop posting interrupts, back to the default mode */
if (!vcpu_pi_info) {
- modify_irte(&ir_data->irq_2_iommu, &ir_data->irte_entry);
+ __intel_ir_reconfigure_irte(data, true);
} else {
struct irte irte_pi;
@@ -1263,6 +1271,7 @@ static int intel_ir_set_vcpu_affinity(struct irq_data *data, void *info)
irte_pi.pda_h = (vcpu_pi_info->pi_desc_addr >> 32) &
~(-1UL << PDA_HIGH_BIT);
+ ir_data->irq_2_iommu.posted_vcpu = true;
modify_irte(&ir_data->irq_2_iommu, &irte_pi);
}
@@ -1278,43 +1287,44 @@ static struct irq_chip intel_ir_chip = {
};
/*
- * With posted MSIs, all vectors are multiplexed into a single notification
- * vector. Devices MSIs are then dispatched in a demux loop where
- * EOIs can be coalesced as well.
+ * With posted MSIs, the MSI vectors are multiplexed into a single notification
+ * vector, and only the notification vector is sent to the APIC IRR. Device
+ * MSIs are then dispatched in a demux loop that harvests the MSIs from the
+ * CPU's Posted Interrupt Request bitmap. I.e. Posted MSIs never get sent to
+ * the APIC IRR, and thus do not need an EOI. The notification handler instead
+ * performs a single EOI after processing the PIR.
*
- * "INTEL-IR-POST" IRQ chip does not do EOI on ACK, thus the dummy irq_ack()
- * function. Instead EOI is performed by the posted interrupt notification
- * handler.
+ * Note! Pending SMP/CPU affinity changes, which are per MSI, must still be
+ * honored, only the APIC EOI is omitted.
*
* For the example below, 3 MSIs are coalesced into one CPU notification. Only
- * one apic_eoi() is needed.
+ * one apic_eoi() is needed, but each MSI needs to process pending changes to
+ * its CPU affinity.
*
* __sysvec_posted_msi_notification()
* irq_enter();
* handle_edge_irq()
* irq_chip_ack_parent()
- * dummy(); // No EOI
+ * irq_move_irq(); // No EOI
* handle_irq_event()
* driver_handler()
* handle_edge_irq()
* irq_chip_ack_parent()
- * dummy(); // No EOI
+ * irq_move_irq(); // No EOI
* handle_irq_event()
* driver_handler()
* handle_edge_irq()
* irq_chip_ack_parent()
- * dummy(); // No EOI
+ * irq_move_irq(); // No EOI
* handle_irq_event()
* driver_handler()
* apic_eoi()
* irq_exit()
+ *
*/
-
-static void dummy_ack(struct irq_data *d) { }
-
static struct irq_chip intel_ir_chip_post_msi = {
.name = "INTEL-IR-POST",
- .irq_ack = dummy_ack,
+ .irq_ack = irq_move_irq,
.irq_set_affinity = intel_ir_set_affinity,
.irq_compose_msi_msg = intel_ir_compose_msi_msg,
.irq_set_vcpu_affinity = intel_ir_set_vcpu_affinity,
@@ -1489,6 +1499,9 @@ static void intel_irq_remapping_deactivate(struct irq_domain *domain,
struct intel_ir_data *data = irq_data->chip_data;
struct irte entry;
+ WARN_ON_ONCE(data->irq_2_iommu.posted_vcpu);
+ data->irq_2_iommu.posted_vcpu = false;
+
memset(&entry, 0, sizeof(entry));
modify_irte(&data->irq_2_iommu, &entry);
}
diff --git a/drivers/iommu/intel/nested.c b/drivers/iommu/intel/nested.c
index aba92c00b427..6ac5c534bef4 100644
--- a/drivers/iommu/intel/nested.c
+++ b/drivers/iommu/intel/nested.c
@@ -198,7 +198,7 @@ intel_iommu_domain_alloc_nested(struct device *dev, struct iommu_domain *parent,
struct dmar_domain *domain;
int ret;
- if (!nested_supported(iommu) || flags)
+ if (!nested_supported(iommu) || flags & ~IOMMU_HWPT_ALLOC_PASID)
return ERR_PTR(-EOPNOTSUPP);
/* Must be nested domain */
diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
index fb59a7d35958..7ee18bb48bd4 100644
--- a/drivers/iommu/intel/pasid.c
+++ b/drivers/iommu/intel/pasid.c
@@ -932,7 +932,7 @@ static void device_pasid_table_teardown(struct device *dev, u8 bus, u8 devfn)
context_clear_entry(context);
__iommu_flush_cache(iommu, context, sizeof(*context));
spin_unlock(&iommu->lock);
- intel_context_flush_present(info, context, did, false);
+ intel_context_flush_no_pasid(info, context, did);
}
static int pci_pasid_table_teardown(struct pci_dev *pdev, u16 alias, void *data)
@@ -992,6 +992,8 @@ static int context_entry_set_pasid_table(struct context_entry *context,
context_set_sm_dte(context);
if (info->pasid_supported)
context_set_pasid(context);
+ if (info->pri_supported)
+ context_set_sm_pre(context);
context_set_fault_enable(context);
context_set_present(context);
@@ -1117,17 +1119,15 @@ static void __context_flush_dev_iotlb(struct device_domain_info *info)
/*
* Cache invalidations after change in a context table entry that was present
- * according to the Spec 6.5.3.3 (Guidance to Software for Invalidations). If
- * IOMMU is in scalable mode and all PASID table entries of the device were
- * non-present, set flush_domains to false. Otherwise, true.
+ * according to the Spec 6.5.3.3 (Guidance to Software for Invalidations).
+ * This helper can only be used when IOMMU is working in the legacy mode or
+ * IOMMU is in scalable mode but all PASID table entries of the device are
+ * non-present.
*/
-void intel_context_flush_present(struct device_domain_info *info,
- struct context_entry *context,
- u16 did, bool flush_domains)
+void intel_context_flush_no_pasid(struct device_domain_info *info,
+ struct context_entry *context, u16 did)
{
struct intel_iommu *iommu = info->iommu;
- struct pasid_entry *pte;
- int i;
/*
* Device-selective context-cache invalidation. The Domain-ID field
@@ -1150,30 +1150,5 @@ void intel_context_flush_present(struct device_domain_info *info,
return;
}
- /*
- * For scalable mode:
- * - Domain-selective PASID-cache invalidation to affected domains
- * - Domain-selective IOTLB invalidation to affected domains
- * - Global Device-TLB invalidation to affected functions
- */
- if (flush_domains) {
- /*
- * If the IOMMU is running in scalable mode and there might
- * be potential PASID translations, the caller should hold
- * the lock to ensure that context changes and cache flushes
- * are atomic.
- */
- assert_spin_locked(&iommu->lock);
- for (i = 0; i < info->pasid_table->max_pasid; i++) {
- pte = intel_pasid_get_entry(info->dev, i);
- if (!pte || !pasid_pte_is_present(pte))
- continue;
-
- did = pasid_get_domain_id(pte);
- qi_flush_pasid_cache(iommu, did, QI_PC_ALL_PASIDS, 0);
- iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
- }
- }
-
__context_flush_dev_iotlb(info);
}
diff --git a/drivers/iommu/intel/prq.c b/drivers/iommu/intel/prq.c
index 064194399b38..5b6a64d96850 100644
--- a/drivers/iommu/intel/prq.c
+++ b/drivers/iommu/intel/prq.c
@@ -67,7 +67,7 @@ void intel_iommu_drain_pasid_prq(struct device *dev, u32 pasid)
u16 sid, did;
info = dev_iommu_priv_get(dev);
- if (!info->pri_enabled)
+ if (!info->iopf_refcount)
return;
iommu = info->iommu;
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index f5569347591f..ba93123cb4eb 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -110,6 +110,41 @@ static const struct mmu_notifier_ops intel_mmuops = {
.free_notifier = intel_mm_free_notifier,
};
+static int intel_iommu_sva_supported(struct device *dev)
+{
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+ struct intel_iommu *iommu;
+
+ if (!info || dmar_disabled)
+ return -EINVAL;
+
+ iommu = info->iommu;
+ if (!iommu)
+ return -EINVAL;
+
+ if (!(iommu->flags & VTD_FLAG_SVM_CAPABLE))
+ return -ENODEV;
+
+ if (!info->pasid_enabled || !info->ats_enabled)
+ return -EINVAL;
+
+ /*
+ * Devices having device-specific I/O fault handling should not
+ * support PCI/PRI. The IOMMU side has no means to check the
+ * capability of device-specific IOPF. Therefore, IOMMU can only
+ * default that if the device driver enables SVA on a non-PRI
+ * device, it will handle IOPF in its own way.
+ */
+ if (!info->pri_supported)
+ return 0;
+
+ /* Devices supporting PRI should have it enabled. */
+ if (!info->pri_enabled)
+ return -EINVAL;
+
+ return 0;
+}
+
static int intel_svm_set_dev_pasid(struct iommu_domain *domain,
struct device *dev, ioasid_t pasid,
struct iommu_domain *old)
@@ -121,6 +156,10 @@ static int intel_svm_set_dev_pasid(struct iommu_domain *domain,
unsigned long sflags;
int ret = 0;
+ ret = intel_iommu_sva_supported(dev);
+ if (ret)
+ return ret;
+
dev_pasid = domain_add_dev_pasid(domain, dev, pasid);
if (IS_ERR(dev_pasid))
return PTR_ERR(dev_pasid);
@@ -161,6 +200,10 @@ struct iommu_domain *intel_svm_domain_alloc(struct device *dev,
struct dmar_domain *domain;
int ret;
+ ret = intel_iommu_sva_supported(dev);
+ if (ret)
+ return ERR_PTR(ret);
+
domain = kzalloc(sizeof(*domain), GFP_KERNEL);
if (!domain)
return ERR_PTR(-ENOMEM);
diff --git a/drivers/iommu/io-pgtable-dart.c b/drivers/iommu/io-pgtable-dart.c
index c004640640ee..06aca9ab52f9 100644
--- a/drivers/iommu/io-pgtable-dart.c
+++ b/drivers/iommu/io-pgtable-dart.c
@@ -135,7 +135,6 @@ static int dart_init_pte(struct dart_io_pgtable *data,
pte |= FIELD_PREP(APPLE_DART_PTE_SUBPAGE_START, 0);
pte |= FIELD_PREP(APPLE_DART_PTE_SUBPAGE_END, 0xfff);
- pte |= APPLE_DART1_PTE_PROT_SP_DIS;
pte |= APPLE_DART_PTE_VALID;
for (i = 0; i < num_entries; i++)
@@ -211,6 +210,7 @@ static dart_iopte dart_prot_to_pte(struct dart_io_pgtable *data,
dart_iopte pte = 0;
if (data->iop.fmt == APPLE_DART) {
+ pte |= APPLE_DART1_PTE_PROT_SP_DIS;
if (!(prot & IOMMU_WRITE))
pte |= APPLE_DART1_PTE_PROT_NO_WRITE;
if (!(prot & IOMMU_READ))
diff --git a/drivers/iommu/iommu-priv.h b/drivers/iommu/iommu-priv.h
index de5b54eaa8bf..e236b932e766 100644
--- a/drivers/iommu/iommu-priv.h
+++ b/drivers/iommu/iommu-priv.h
@@ -5,6 +5,7 @@
#define __LINUX_IOMMU_PRIV_H
#include <linux/iommu.h>
+#include <linux/msi.h>
static inline const struct iommu_ops *dev_iommu_ops(struct device *dev)
{
@@ -17,6 +18,8 @@ static inline const struct iommu_ops *dev_iommu_ops(struct device *dev)
return dev->iommu->iommu_dev->ops;
}
+void dev_iommu_free(struct device *dev);
+
const struct iommu_ops *iommu_ops_from_fwnode(const struct fwnode_handle *fwnode);
static inline const struct iommu_ops *iommu_fwspec_ops(struct iommu_fwspec *fwspec)
@@ -24,8 +27,7 @@ static inline const struct iommu_ops *iommu_fwspec_ops(struct iommu_fwspec *fwsp
return iommu_ops_from_fwnode(fwspec ? fwspec->iommu_fwnode : NULL);
}
-int iommu_group_replace_domain(struct iommu_group *group,
- struct iommu_domain *new_domain);
+void iommu_fwspec_free(struct device *dev);
int iommu_device_register_bus(struct iommu_device *iommu,
const struct iommu_ops *ops,
@@ -46,4 +48,19 @@ void iommu_detach_group_handle(struct iommu_domain *domain,
int iommu_replace_group_handle(struct iommu_group *group,
struct iommu_domain *new_domain,
struct iommu_attach_handle *handle);
+
+#if IS_ENABLED(CONFIG_IOMMUFD_DRIVER_CORE) && IS_ENABLED(CONFIG_IRQ_MSI_IOMMU)
+int iommufd_sw_msi(struct iommu_domain *domain, struct msi_desc *desc,
+ phys_addr_t msi_addr);
+#else /* !CONFIG_IOMMUFD_DRIVER_CORE || !CONFIG_IRQ_MSI_IOMMU */
+static inline int iommufd_sw_msi(struct iommu_domain *domain,
+ struct msi_desc *desc, phys_addr_t msi_addr)
+{
+ return -EOPNOTSUPP;
+}
+#endif /* CONFIG_IOMMUFD_DRIVER_CORE && CONFIG_IRQ_MSI_IOMMU */
+
+int iommu_replace_device_pasid(struct iommu_domain *domain,
+ struct device *dev, ioasid_t pasid,
+ struct iommu_attach_handle *handle);
#endif /* __LINUX_IOMMU_PRIV_H */
diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c
index 503c5d23c1ea..ab18bc494eef 100644
--- a/drivers/iommu/iommu-sva.c
+++ b/drivers/iommu/iommu-sva.c
@@ -310,6 +310,7 @@ static struct iommu_domain *iommu_sva_domain_alloc(struct device *dev,
}
domain->type = IOMMU_DOMAIN_SVA;
+ domain->cookie_type = IOMMU_COOKIE_SVA;
mmgrab(mm);
domain->mm = mm;
domain->owner = ops;
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 60aed01e54f2..4f91a740c15f 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -18,6 +18,7 @@
#include <linux/errno.h>
#include <linux/host1x_context_bus.h>
#include <linux/iommu.h>
+#include <linux/iommufd.h>
#include <linux/idr.h>
#include <linux/err.h>
#include <linux/pci.h>
@@ -45,6 +46,9 @@ static unsigned int iommu_def_domain_type __read_mostly;
static bool iommu_dma_strict __read_mostly = IS_ENABLED(CONFIG_IOMMU_DEFAULT_DMA_STRICT);
static u32 iommu_cmd_line __read_mostly;
+/* Tags used with xa_tag_pointer() in group->pasid_array */
+enum { IOMMU_PASID_ARRAY_DOMAIN = 0, IOMMU_PASID_ARRAY_HANDLE = 1 };
+
struct iommu_group {
struct kobject kobj;
struct kobject *devices_kobj;
@@ -352,7 +356,7 @@ static struct dev_iommu *dev_iommu_get(struct device *dev)
return param;
}
-static void dev_iommu_free(struct device *dev)
+void dev_iommu_free(struct device *dev)
{
struct dev_iommu *param = dev->iommu;
@@ -404,14 +408,40 @@ EXPORT_SYMBOL_GPL(dev_iommu_priv_set);
* Init the dev->iommu and dev->iommu_group in the struct device and get the
* driver probed
*/
-static int iommu_init_device(struct device *dev, const struct iommu_ops *ops)
+static int iommu_init_device(struct device *dev)
{
+ const struct iommu_ops *ops;
struct iommu_device *iommu_dev;
struct iommu_group *group;
int ret;
if (!dev_iommu_get(dev))
return -ENOMEM;
+ /*
+ * For FDT-based systems and ACPI IORT/VIOT, the common firmware parsing
+ * is buried in the bus dma_configure path. Properly unpicking that is
+ * still a big job, so for now just invoke the whole thing. The device
+ * already having a driver bound means dma_configure has already run and
+ * either found no IOMMU to wait for, or we're in its replay call right
+ * now, so either way there's no point calling it again.
+ */
+ if (!dev->driver && dev->bus->dma_configure) {
+ mutex_unlock(&iommu_probe_device_lock);
+ dev->bus->dma_configure(dev);
+ mutex_lock(&iommu_probe_device_lock);
+ }
+ /*
+ * At this point, relevant devices either now have a fwspec which will
+ * match ops registered with a non-NULL fwnode, or we can reasonably
+ * assume that only one of Intel, AMD, s390, PAMU or legacy SMMUv2 can
+ * be present, and that any of their registered instances has suitable
+ * ops for probing, and thus cheekily co-opt the same mechanism.
+ */
+ ops = iommu_fwspec_ops(dev->iommu->fwspec);
+ if (!ops) {
+ ret = -ENODEV;
+ goto err_free;
+ }
if (!try_module_get(ops->owner)) {
ret = -EINVAL;
@@ -508,29 +538,27 @@ static void iommu_deinit_device(struct device *dev)
dev->iommu_group = NULL;
module_put(ops->owner);
dev_iommu_free(dev);
+#ifdef CONFIG_IOMMU_DMA
+ dev->dma_iommu = false;
+#endif
+}
+
+static struct iommu_domain *pasid_array_entry_to_domain(void *entry)
+{
+ if (xa_pointer_tag(entry) == IOMMU_PASID_ARRAY_DOMAIN)
+ return xa_untag_pointer(entry);
+ return ((struct iommu_attach_handle *)xa_untag_pointer(entry))->domain;
}
DEFINE_MUTEX(iommu_probe_device_lock);
static int __iommu_probe_device(struct device *dev, struct list_head *group_list)
{
- const struct iommu_ops *ops;
struct iommu_group *group;
struct group_device *gdev;
int ret;
/*
- * For FDT-based systems and ACPI IORT/VIOT, drivers register IOMMU
- * instances with non-NULL fwnodes, and client devices should have been
- * identified with a fwspec by this point. Otherwise, we can currently
- * assume that only one of Intel, AMD, s390, PAMU or legacy SMMUv2 can
- * be present, and that any of their registered instances has suitable
- * ops for probing, and thus cheekily co-opt the same mechanism.
- */
- ops = iommu_fwspec_ops(dev_iommu_fwspec_get(dev));
- if (!ops)
- return -ENODEV;
- /*
* Serialise to avoid races between IOMMU drivers registering in
* parallel and/or the "replay" calls from ACPI/OF code via client
* driver probe. Once the latter have been cleaned up we should
@@ -543,9 +571,15 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list
if (dev->iommu_group)
return 0;
- ret = iommu_init_device(dev, ops);
+ ret = iommu_init_device(dev);
if (ret)
return ret;
+ /*
+ * And if we do now see any replay calls, they would indicate someone
+ * misusing the dma_configure path outside bus code.
+ */
+ if (dev->driver)
+ dev_WARN(dev, "late IOMMU probe at driver bind, something fishy here!\n");
group = dev->iommu_group;
gdev = iommu_group_alloc_device(group, dev);
@@ -1950,8 +1984,10 @@ void iommu_set_fault_handler(struct iommu_domain *domain,
iommu_fault_handler_t handler,
void *token)
{
- BUG_ON(!domain);
+ if (WARN_ON(!domain || domain->cookie_type != IOMMU_COOKIE_NONE))
+ return;
+ domain->cookie_type = IOMMU_COOKIE_FAULT_HANDLER;
domain->handler = handler;
domain->handler_token = token;
}
@@ -2021,9 +2057,19 @@ EXPORT_SYMBOL_GPL(iommu_paging_domain_alloc_flags);
void iommu_domain_free(struct iommu_domain *domain)
{
- if (domain->type == IOMMU_DOMAIN_SVA)
+ switch (domain->cookie_type) {
+ case IOMMU_COOKIE_DMA_IOVA:
+ iommu_put_dma_cookie(domain);
+ break;
+ case IOMMU_COOKIE_DMA_MSI:
+ iommu_put_msi_cookie(domain);
+ break;
+ case IOMMU_COOKIE_SVA:
mmdrop(domain->mm);
- iommu_put_dma_cookie(domain);
+ break;
+ default:
+ break;
+ }
if (domain->ops->free)
domain->ops->free(domain);
}
@@ -2147,6 +2193,17 @@ struct iommu_domain *iommu_get_dma_domain(struct device *dev)
return dev->iommu_group->default_domain;
}
+static void *iommu_make_pasid_array_entry(struct iommu_domain *domain,
+ struct iommu_attach_handle *handle)
+{
+ if (handle) {
+ handle->domain = domain;
+ return xa_tag_pointer(handle, IOMMU_PASID_ARRAY_HANDLE);
+ }
+
+ return xa_tag_pointer(domain, IOMMU_PASID_ARRAY_DOMAIN);
+}
+
static int __iommu_attach_group(struct iommu_domain *domain,
struct iommu_group *group)
{
@@ -2187,32 +2244,6 @@ int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group)
}
EXPORT_SYMBOL_GPL(iommu_attach_group);
-/**
- * iommu_group_replace_domain - replace the domain that a group is attached to
- * @group: IOMMU group that will be attached to the new domain
- * @new_domain: new IOMMU domain to replace with
- *
- * This API allows the group to switch domains without being forced to go to
- * the blocking domain in-between.
- *
- * If the currently attached domain is a core domain (e.g. a default_domain),
- * it will act just like the iommu_attach_group().
- */
-int iommu_group_replace_domain(struct iommu_group *group,
- struct iommu_domain *new_domain)
-{
- int ret;
-
- if (!new_domain)
- return -EINVAL;
-
- mutex_lock(&group->mutex);
- ret = __iommu_group_set_domain(group, new_domain);
- mutex_unlock(&group->mutex);
- return ret;
-}
-EXPORT_SYMBOL_NS_GPL(iommu_group_replace_domain, "IOMMUFD_INTERNAL");
-
static int __iommu_device_set_domain(struct iommu_group *group,
struct device *dev,
struct iommu_domain *new_domain,
@@ -2689,7 +2720,8 @@ int report_iommu_fault(struct iommu_domain *domain, struct device *dev,
* if upper layers showed interest and installed a fault handler,
* invoke it.
*/
- if (domain->handler)
+ if (domain->cookie_type == IOMMU_COOKIE_FAULT_HANDLER &&
+ domain->handler)
ret = domain->handler(domain, dev, iova, flags,
domain->handler_token);
@@ -2849,7 +2881,6 @@ void iommu_fwspec_free(struct device *dev)
dev_iommu_fwspec_set(dev, NULL);
}
}
-EXPORT_SYMBOL_GPL(iommu_fwspec_free);
int iommu_fwspec_add_ids(struct device *dev, const u32 *ids, int num_ids)
{
@@ -3097,6 +3128,11 @@ int iommu_device_use_default_domain(struct device *dev)
return 0;
mutex_lock(&group->mutex);
+ /* We may race against bus_iommu_probe() finalising groups here */
+ if (!group->default_domain) {
+ ret = -EPROBE_DEFER;
+ goto unlock_out;
+ }
if (group->owner_cnt) {
if (group->domain != group->default_domain || group->owner ||
!xa_empty(&group->pasid_array)) {
@@ -3323,14 +3359,15 @@ static void iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid,
}
static int __iommu_set_group_pasid(struct iommu_domain *domain,
- struct iommu_group *group, ioasid_t pasid)
+ struct iommu_group *group, ioasid_t pasid,
+ struct iommu_domain *old)
{
struct group_device *device, *last_gdev;
int ret;
for_each_group_device(group, device) {
ret = domain->ops->set_dev_pasid(domain, device->dev,
- pasid, NULL);
+ pasid, old);
if (ret)
goto err_revert;
}
@@ -3342,7 +3379,15 @@ err_revert:
for_each_group_device(group, device) {
if (device == last_gdev)
break;
- iommu_remove_dev_pasid(device->dev, pasid, domain);
+ /*
+ * If no old domain, undo the succeeded devices/pasid.
+ * Otherwise, rollback the succeeded devices/pasid to the old
+ * domain. And it is a driver bug to fail attaching with a
+ * previously good domain.
+ */
+ if (!old || WARN_ON(old->ops->set_dev_pasid(old, device->dev,
+ pasid, domain)))
+ iommu_remove_dev_pasid(device->dev, pasid, domain);
}
return ret;
}
@@ -3364,6 +3409,9 @@ static void __iommu_remove_group_pasid(struct iommu_group *group,
* @pasid: the pasid of the device.
* @handle: the attach handle.
*
+ * Caller should always provide a new handle to avoid race with the paths
+ * that have lockless reference to handle if it intends to pass a valid handle.
+ *
* Return: 0 on success, or an error.
*/
int iommu_attach_device_pasid(struct iommu_domain *domain,
@@ -3374,6 +3422,7 @@ int iommu_attach_device_pasid(struct iommu_domain *domain,
struct iommu_group *group = dev->iommu_group;
struct group_device *device;
const struct iommu_ops *ops;
+ void *entry;
int ret;
if (!group)
@@ -3397,22 +3446,128 @@ int iommu_attach_device_pasid(struct iommu_domain *domain,
}
}
- if (handle)
- handle->domain = domain;
+ entry = iommu_make_pasid_array_entry(domain, handle);
- ret = xa_insert(&group->pasid_array, pasid, handle, GFP_KERNEL);
+ /*
+ * Entry present is a failure case. Use xa_insert() instead of
+ * xa_reserve().
+ */
+ ret = xa_insert(&group->pasid_array, pasid, XA_ZERO_ENTRY, GFP_KERNEL);
if (ret)
goto out_unlock;
- ret = __iommu_set_group_pasid(domain, group, pasid);
- if (ret)
- xa_erase(&group->pasid_array, pasid);
+ ret = __iommu_set_group_pasid(domain, group, pasid, NULL);
+ if (ret) {
+ xa_release(&group->pasid_array, pasid);
+ goto out_unlock;
+ }
+
+ /*
+ * The xa_insert() above reserved the memory, and the group->mutex is
+ * held, this cannot fail. The new domain cannot be visible until the
+ * operation succeeds as we cannot tolerate PRIs becoming concurrently
+ * queued and then failing attach.
+ */
+ WARN_ON(xa_is_err(xa_store(&group->pasid_array,
+ pasid, entry, GFP_KERNEL)));
+
out_unlock:
mutex_unlock(&group->mutex);
return ret;
}
EXPORT_SYMBOL_GPL(iommu_attach_device_pasid);
+/**
+ * iommu_replace_device_pasid - Replace the domain that a specific pasid
+ * of the device is attached to
+ * @domain: the new iommu domain
+ * @dev: the attached device.
+ * @pasid: the pasid of the device.
+ * @handle: the attach handle.
+ *
+ * This API allows the pasid to switch domains. The @pasid should have been
+ * attached. Otherwise, this fails. The pasid will keep the old configuration
+ * if replacement failed.
+ *
+ * Caller should always provide a new handle to avoid race with the paths
+ * that have lockless reference to handle if it intends to pass a valid handle.
+ *
+ * Return 0 on success, or an error.
+ */
+int iommu_replace_device_pasid(struct iommu_domain *domain,
+ struct device *dev, ioasid_t pasid,
+ struct iommu_attach_handle *handle)
+{
+ /* Caller must be a probed driver on dev */
+ struct iommu_group *group = dev->iommu_group;
+ struct iommu_attach_handle *entry;
+ struct iommu_domain *curr_domain;
+ void *curr;
+ int ret;
+
+ if (!group)
+ return -ENODEV;
+
+ if (!domain->ops->set_dev_pasid)
+ return -EOPNOTSUPP;
+
+ if (dev_iommu_ops(dev) != domain->owner ||
+ pasid == IOMMU_NO_PASID || !handle)
+ return -EINVAL;
+
+ mutex_lock(&group->mutex);
+ entry = iommu_make_pasid_array_entry(domain, handle);
+ curr = xa_cmpxchg(&group->pasid_array, pasid, NULL,
+ XA_ZERO_ENTRY, GFP_KERNEL);
+ if (xa_is_err(curr)) {
+ ret = xa_err(curr);
+ goto out_unlock;
+ }
+
+ /*
+ * No domain (with or without handle) attached, hence not
+ * a replace case.
+ */
+ if (!curr) {
+ xa_release(&group->pasid_array, pasid);
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
+ /*
+ * Reusing handle is problematic as there are paths that refers
+ * the handle without lock. To avoid race, reject the callers that
+ * attempt it.
+ */
+ if (curr == entry) {
+ WARN_ON(1);
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
+ curr_domain = pasid_array_entry_to_domain(curr);
+ ret = 0;
+
+ if (curr_domain != domain) {
+ ret = __iommu_set_group_pasid(domain, group,
+ pasid, curr_domain);
+ if (ret)
+ goto out_unlock;
+ }
+
+ /*
+ * The above xa_cmpxchg() reserved the memory, and the
+ * group->mutex is held, this cannot fail.
+ */
+ WARN_ON(xa_is_err(xa_store(&group->pasid_array,
+ pasid, entry, GFP_KERNEL)));
+
+out_unlock:
+ mutex_unlock(&group->mutex);
+ return ret;
+}
+EXPORT_SYMBOL_NS_GPL(iommu_replace_device_pasid, "IOMMUFD_INTERNAL");
+
/*
* iommu_detach_device_pasid() - Detach the domain from pasid of device
* @domain: the iommu domain.
@@ -3480,13 +3635,17 @@ struct iommu_attach_handle *
iommu_attach_handle_get(struct iommu_group *group, ioasid_t pasid, unsigned int type)
{
struct iommu_attach_handle *handle;
+ void *entry;
xa_lock(&group->pasid_array);
- handle = xa_load(&group->pasid_array, pasid);
- if (!handle)
+ entry = xa_load(&group->pasid_array, pasid);
+ if (!entry || xa_pointer_tag(entry) != IOMMU_PASID_ARRAY_HANDLE) {
handle = ERR_PTR(-ENOENT);
- else if (type && handle->domain->type != type)
- handle = ERR_PTR(-EBUSY);
+ } else {
+ handle = xa_untag_pointer(entry);
+ if (type && handle->domain->type != type)
+ handle = ERR_PTR(-EBUSY);
+ }
xa_unlock(&group->pasid_array);
return handle;
@@ -3504,30 +3663,43 @@ EXPORT_SYMBOL_NS_GPL(iommu_attach_handle_get, "IOMMUFD_INTERNAL");
* This is a variant of iommu_attach_group(). It allows the caller to provide
* an attach handle and use it when the domain is attached. This is currently
* used by IOMMUFD to deliver the I/O page faults.
+ *
+ * Caller should always provide a new handle to avoid race with the paths
+ * that have lockless reference to handle.
*/
int iommu_attach_group_handle(struct iommu_domain *domain,
struct iommu_group *group,
struct iommu_attach_handle *handle)
{
+ void *entry;
int ret;
- if (handle)
- handle->domain = domain;
+ if (!handle)
+ return -EINVAL;
mutex_lock(&group->mutex);
- ret = xa_insert(&group->pasid_array, IOMMU_NO_PASID, handle, GFP_KERNEL);
+ entry = iommu_make_pasid_array_entry(domain, handle);
+ ret = xa_insert(&group->pasid_array,
+ IOMMU_NO_PASID, XA_ZERO_ENTRY, GFP_KERNEL);
if (ret)
- goto err_unlock;
+ goto out_unlock;
ret = __iommu_attach_group(domain, group);
- if (ret)
- goto err_erase;
- mutex_unlock(&group->mutex);
+ if (ret) {
+ xa_release(&group->pasid_array, IOMMU_NO_PASID);
+ goto out_unlock;
+ }
- return 0;
-err_erase:
- xa_erase(&group->pasid_array, IOMMU_NO_PASID);
-err_unlock:
+ /*
+ * The xa_insert() above reserved the memory, and the group->mutex is
+ * held, this cannot fail. The new domain cannot be visible until the
+ * operation succeeds as we cannot tolerate PRIs becoming concurrently
+ * queued and then failing attach.
+ */
+ WARN_ON(xa_is_err(xa_store(&group->pasid_array,
+ IOMMU_NO_PASID, entry, GFP_KERNEL)));
+
+out_unlock:
mutex_unlock(&group->mutex);
return ret;
}
@@ -3557,33 +3729,37 @@ EXPORT_SYMBOL_NS_GPL(iommu_detach_group_handle, "IOMMUFD_INTERNAL");
* @new_domain: new IOMMU domain to replace with
* @handle: attach handle
*
- * This is a variant of iommu_group_replace_domain(). It allows the caller to
- * provide an attach handle for the new domain and use it when the domain is
- * attached.
+ * This API allows the group to switch domains without being forced to go to
+ * the blocking domain in-between. It allows the caller to provide an attach
+ * handle for the new domain and use it when the domain is attached.
+ *
+ * If the currently attached domain is a core domain (e.g. a default_domain),
+ * it will act just like the iommu_attach_group_handle().
+ *
+ * Caller should always provide a new handle to avoid race with the paths
+ * that have lockless reference to handle.
*/
int iommu_replace_group_handle(struct iommu_group *group,
struct iommu_domain *new_domain,
struct iommu_attach_handle *handle)
{
- void *curr;
+ void *curr, *entry;
int ret;
- if (!new_domain)
+ if (!new_domain || !handle)
return -EINVAL;
mutex_lock(&group->mutex);
- if (handle) {
- ret = xa_reserve(&group->pasid_array, IOMMU_NO_PASID, GFP_KERNEL);
- if (ret)
- goto err_unlock;
- handle->domain = new_domain;
- }
+ entry = iommu_make_pasid_array_entry(new_domain, handle);
+ ret = xa_reserve(&group->pasid_array, IOMMU_NO_PASID, GFP_KERNEL);
+ if (ret)
+ goto err_unlock;
ret = __iommu_group_set_domain(group, new_domain);
if (ret)
goto err_release;
- curr = xa_store(&group->pasid_array, IOMMU_NO_PASID, handle, GFP_KERNEL);
+ curr = xa_store(&group->pasid_array, IOMMU_NO_PASID, entry, GFP_KERNEL);
WARN_ON(xa_is_err(curr));
mutex_unlock(&group->mutex);
@@ -3596,3 +3772,45 @@ err_unlock:
return ret;
}
EXPORT_SYMBOL_NS_GPL(iommu_replace_group_handle, "IOMMUFD_INTERNAL");
+
+#if IS_ENABLED(CONFIG_IRQ_MSI_IOMMU)
+/**
+ * iommu_dma_prepare_msi() - Map the MSI page in the IOMMU domain
+ * @desc: MSI descriptor, will store the MSI page
+ * @msi_addr: MSI target address to be mapped
+ *
+ * The implementation of sw_msi() should take msi_addr and map it to
+ * an IOVA in the domain and call msi_desc_set_iommu_msi_iova() with the
+ * mapping information.
+ *
+ * Return: 0 on success or negative error code if the mapping failed.
+ */
+int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr)
+{
+ struct device *dev = msi_desc_to_dev(desc);
+ struct iommu_group *group = dev->iommu_group;
+ int ret = 0;
+
+ if (!group)
+ return 0;
+
+ mutex_lock(&group->mutex);
+ /* An IDENTITY domain must pass through */
+ if (group->domain && group->domain->type != IOMMU_DOMAIN_IDENTITY) {
+ switch (group->domain->cookie_type) {
+ case IOMMU_COOKIE_DMA_MSI:
+ case IOMMU_COOKIE_DMA_IOVA:
+ ret = iommu_dma_sw_msi(group->domain, desc, msi_addr);
+ break;
+ case IOMMU_COOKIE_IOMMUFD:
+ ret = iommufd_sw_msi(group->domain, desc, msi_addr);
+ break;
+ default:
+ ret = -EOPNOTSUPP;
+ break;
+ }
+ }
+ mutex_unlock(&group->mutex);
+ return ret;
+}
+#endif /* CONFIG_IRQ_MSI_IOMMU */
diff --git a/drivers/iommu/iommufd/Kconfig b/drivers/iommu/iommufd/Kconfig
index 0a07f9449fd9..2beeb4f60ee5 100644
--- a/drivers/iommu/iommufd/Kconfig
+++ b/drivers/iommu/iommufd/Kconfig
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
config IOMMUFD_DRIVER_CORE
- tristate
+ bool
default (IOMMUFD_DRIVER || IOMMUFD) if IOMMUFD!=n
config IOMMUFD
diff --git a/drivers/iommu/iommufd/Makefile b/drivers/iommu/iommufd/Makefile
index cb784da6cddc..71d692c9a8f4 100644
--- a/drivers/iommu/iommufd/Makefile
+++ b/drivers/iommu/iommufd/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
iommufd-y := \
device.o \
- fault.o \
+ eventq.o \
hw_pagetable.o \
io_pagetable.o \
ioas.o \
diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
index dfd0898fb6c1..2111bad72c72 100644
--- a/drivers/iommu/iommufd/device.c
+++ b/drivers/iommu/iommufd/device.c
@@ -3,6 +3,7 @@
*/
#include <linux/iommu.h>
#include <linux/iommufd.h>
+#include <linux/pci-ats.h>
#include <linux/slab.h>
#include <uapi/linux/iommufd.h>
@@ -17,12 +18,17 @@ MODULE_PARM_DESC(
"Allow IOMMUFD to bind to devices even if the platform cannot isolate "
"the MSI interrupt window. Enabling this is a security weakness.");
+struct iommufd_attach {
+ struct iommufd_hw_pagetable *hwpt;
+ struct xarray device_array;
+};
+
static void iommufd_group_release(struct kref *kref)
{
struct iommufd_group *igroup =
container_of(kref, struct iommufd_group, ref);
- WARN_ON(igroup->hwpt || !list_empty(&igroup->device_list));
+ WARN_ON(!xa_empty(&igroup->pasid_attach));
xa_cmpxchg(&igroup->ictx->groups, iommu_group_id(igroup->group), igroup,
NULL, GFP_KERNEL);
@@ -89,7 +95,7 @@ static struct iommufd_group *iommufd_get_group(struct iommufd_ctx *ictx,
kref_init(&new_igroup->ref);
mutex_init(&new_igroup->lock);
- INIT_LIST_HEAD(&new_igroup->device_list);
+ xa_init(&new_igroup->pasid_attach);
new_igroup->sw_msi_start = PHYS_ADDR_MAX;
/* group reference moves into new_igroup */
new_igroup->group = group;
@@ -293,56 +299,83 @@ u32 iommufd_device_to_id(struct iommufd_device *idev)
}
EXPORT_SYMBOL_NS_GPL(iommufd_device_to_id, "IOMMUFD");
+static unsigned int iommufd_group_device_num(struct iommufd_group *igroup,
+ ioasid_t pasid)
+{
+ struct iommufd_attach *attach;
+ struct iommufd_device *idev;
+ unsigned int count = 0;
+ unsigned long index;
+
+ lockdep_assert_held(&igroup->lock);
+
+ attach = xa_load(&igroup->pasid_attach, pasid);
+ if (attach)
+ xa_for_each(&attach->device_array, index, idev)
+ count++;
+ return count;
+}
+
+#ifdef CONFIG_IRQ_MSI_IOMMU
static int iommufd_group_setup_msi(struct iommufd_group *igroup,
struct iommufd_hwpt_paging *hwpt_paging)
{
- phys_addr_t sw_msi_start = igroup->sw_msi_start;
- int rc;
+ struct iommufd_ctx *ictx = igroup->ictx;
+ struct iommufd_sw_msi_map *cur;
+
+ if (igroup->sw_msi_start == PHYS_ADDR_MAX)
+ return 0;
/*
- * If the IOMMU driver gives a IOMMU_RESV_SW_MSI then it is asking us to
- * call iommu_get_msi_cookie() on its behalf. This is necessary to setup
- * the MSI window so iommu_dma_prepare_msi() can install pages into our
- * domain after request_irq(). If it is not done interrupts will not
- * work on this domain.
- *
- * FIXME: This is conceptually broken for iommufd since we want to allow
- * userspace to change the domains, eg switch from an identity IOAS to a
- * DMA IOAS. There is currently no way to create a MSI window that
- * matches what the IRQ layer actually expects in a newly created
- * domain.
+ * Install all the MSI pages the device has been using into the domain
*/
- if (sw_msi_start != PHYS_ADDR_MAX && !hwpt_paging->msi_cookie) {
- rc = iommu_get_msi_cookie(hwpt_paging->common.domain,
- sw_msi_start);
+ guard(mutex)(&ictx->sw_msi_lock);
+ list_for_each_entry(cur, &ictx->sw_msi_list, sw_msi_item) {
+ int rc;
+
+ if (cur->sw_msi_start != igroup->sw_msi_start ||
+ !test_bit(cur->id, igroup->required_sw_msi.bitmap))
+ continue;
+
+ rc = iommufd_sw_msi_install(ictx, hwpt_paging, cur);
if (rc)
return rc;
-
- /*
- * iommu_get_msi_cookie() can only be called once per domain,
- * it returns -EBUSY on later calls.
- */
- hwpt_paging->msi_cookie = true;
}
return 0;
}
+#else
+static inline int
+iommufd_group_setup_msi(struct iommufd_group *igroup,
+ struct iommufd_hwpt_paging *hwpt_paging)
+{
+ return 0;
+}
+#endif
+
+static bool
+iommufd_group_first_attach(struct iommufd_group *igroup, ioasid_t pasid)
+{
+ lockdep_assert_held(&igroup->lock);
+ return !xa_load(&igroup->pasid_attach, pasid);
+}
static int
iommufd_device_attach_reserved_iova(struct iommufd_device *idev,
struct iommufd_hwpt_paging *hwpt_paging)
{
+ struct iommufd_group *igroup = idev->igroup;
int rc;
- lockdep_assert_held(&idev->igroup->lock);
+ lockdep_assert_held(&igroup->lock);
rc = iopt_table_enforce_dev_resv_regions(&hwpt_paging->ioas->iopt,
idev->dev,
- &idev->igroup->sw_msi_start);
+ &igroup->sw_msi_start);
if (rc)
return rc;
- if (list_empty(&idev->igroup->device_list)) {
- rc = iommufd_group_setup_msi(idev->igroup, hwpt_paging);
+ if (iommufd_group_first_attach(igroup, IOMMU_NO_PASID)) {
+ rc = iommufd_group_setup_msi(igroup, hwpt_paging);
if (rc) {
iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt,
idev->dev);
@@ -352,23 +385,217 @@ iommufd_device_attach_reserved_iova(struct iommufd_device *idev,
return 0;
}
+/* The device attach/detach/replace helpers for attach_handle */
+
+static bool iommufd_device_is_attached(struct iommufd_device *idev,
+ ioasid_t pasid)
+{
+ struct iommufd_attach *attach;
+
+ attach = xa_load(&idev->igroup->pasid_attach, pasid);
+ return xa_load(&attach->device_array, idev->obj.id);
+}
+
+static int iommufd_hwpt_pasid_compat(struct iommufd_hw_pagetable *hwpt,
+ struct iommufd_device *idev,
+ ioasid_t pasid)
+{
+ struct iommufd_group *igroup = idev->igroup;
+
+ lockdep_assert_held(&igroup->lock);
+
+ if (pasid == IOMMU_NO_PASID) {
+ unsigned long start = IOMMU_NO_PASID;
+
+ if (!hwpt->pasid_compat &&
+ xa_find_after(&igroup->pasid_attach,
+ &start, UINT_MAX, XA_PRESENT))
+ return -EINVAL;
+ } else {
+ struct iommufd_attach *attach;
+
+ if (!hwpt->pasid_compat)
+ return -EINVAL;
+
+ attach = xa_load(&igroup->pasid_attach, IOMMU_NO_PASID);
+ if (attach && attach->hwpt && !attach->hwpt->pasid_compat)
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int iommufd_hwpt_attach_device(struct iommufd_hw_pagetable *hwpt,
+ struct iommufd_device *idev,
+ ioasid_t pasid)
+{
+ struct iommufd_attach_handle *handle;
+ int rc;
+
+ rc = iommufd_hwpt_pasid_compat(hwpt, idev, pasid);
+ if (rc)
+ return rc;
+
+ handle = kzalloc(sizeof(*handle), GFP_KERNEL);
+ if (!handle)
+ return -ENOMEM;
+
+ if (hwpt->fault) {
+ rc = iommufd_fault_iopf_enable(idev);
+ if (rc)
+ goto out_free_handle;
+ }
+
+ handle->idev = idev;
+ if (pasid == IOMMU_NO_PASID)
+ rc = iommu_attach_group_handle(hwpt->domain, idev->igroup->group,
+ &handle->handle);
+ else
+ rc = iommu_attach_device_pasid(hwpt->domain, idev->dev, pasid,
+ &handle->handle);
+ if (rc)
+ goto out_disable_iopf;
+
+ return 0;
+
+out_disable_iopf:
+ if (hwpt->fault)
+ iommufd_fault_iopf_disable(idev);
+out_free_handle:
+ kfree(handle);
+ return rc;
+}
+
+static struct iommufd_attach_handle *
+iommufd_device_get_attach_handle(struct iommufd_device *idev, ioasid_t pasid)
+{
+ struct iommu_attach_handle *handle;
+
+ lockdep_assert_held(&idev->igroup->lock);
+
+ handle =
+ iommu_attach_handle_get(idev->igroup->group, pasid, 0);
+ if (IS_ERR(handle))
+ return NULL;
+ return to_iommufd_handle(handle);
+}
+
+static void iommufd_hwpt_detach_device(struct iommufd_hw_pagetable *hwpt,
+ struct iommufd_device *idev,
+ ioasid_t pasid)
+{
+ struct iommufd_attach_handle *handle;
+
+ handle = iommufd_device_get_attach_handle(idev, pasid);
+ if (pasid == IOMMU_NO_PASID)
+ iommu_detach_group_handle(hwpt->domain, idev->igroup->group);
+ else
+ iommu_detach_device_pasid(hwpt->domain, idev->dev, pasid);
+
+ if (hwpt->fault) {
+ iommufd_auto_response_faults(hwpt, handle);
+ iommufd_fault_iopf_disable(idev);
+ }
+ kfree(handle);
+}
+
+static int iommufd_hwpt_replace_device(struct iommufd_device *idev,
+ ioasid_t pasid,
+ struct iommufd_hw_pagetable *hwpt,
+ struct iommufd_hw_pagetable *old)
+{
+ struct iommufd_attach_handle *handle, *old_handle;
+ int rc;
+
+ rc = iommufd_hwpt_pasid_compat(hwpt, idev, pasid);
+ if (rc)
+ return rc;
+
+ old_handle = iommufd_device_get_attach_handle(idev, pasid);
+
+ handle = kzalloc(sizeof(*handle), GFP_KERNEL);
+ if (!handle)
+ return -ENOMEM;
+
+ if (hwpt->fault && !old->fault) {
+ rc = iommufd_fault_iopf_enable(idev);
+ if (rc)
+ goto out_free_handle;
+ }
+
+ handle->idev = idev;
+ if (pasid == IOMMU_NO_PASID)
+ rc = iommu_replace_group_handle(idev->igroup->group,
+ hwpt->domain, &handle->handle);
+ else
+ rc = iommu_replace_device_pasid(hwpt->domain, idev->dev,
+ pasid, &handle->handle);
+ if (rc)
+ goto out_disable_iopf;
+
+ if (old->fault) {
+ iommufd_auto_response_faults(hwpt, old_handle);
+ if (!hwpt->fault)
+ iommufd_fault_iopf_disable(idev);
+ }
+ kfree(old_handle);
+
+ return 0;
+
+out_disable_iopf:
+ if (hwpt->fault && !old->fault)
+ iommufd_fault_iopf_disable(idev);
+out_free_handle:
+ kfree(handle);
+ return rc;
+}
+
int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
- struct iommufd_device *idev)
+ struct iommufd_device *idev, ioasid_t pasid)
{
struct iommufd_hwpt_paging *hwpt_paging = find_hwpt_paging(hwpt);
+ bool attach_resv = hwpt_paging && pasid == IOMMU_NO_PASID;
+ struct iommufd_group *igroup = idev->igroup;
+ struct iommufd_hw_pagetable *old_hwpt;
+ struct iommufd_attach *attach;
int rc;
- mutex_lock(&idev->igroup->lock);
+ mutex_lock(&igroup->lock);
- if (idev->igroup->hwpt != NULL && idev->igroup->hwpt != hwpt) {
- rc = -EINVAL;
+ attach = xa_cmpxchg(&igroup->pasid_attach, pasid, NULL,
+ XA_ZERO_ENTRY, GFP_KERNEL);
+ if (xa_is_err(attach)) {
+ rc = xa_err(attach);
goto err_unlock;
}
- if (hwpt_paging) {
+ if (!attach) {
+ attach = kzalloc(sizeof(*attach), GFP_KERNEL);
+ if (!attach) {
+ rc = -ENOMEM;
+ goto err_release_pasid;
+ }
+ xa_init(&attach->device_array);
+ }
+
+ old_hwpt = attach->hwpt;
+
+ rc = xa_insert(&attach->device_array, idev->obj.id, XA_ZERO_ENTRY,
+ GFP_KERNEL);
+ if (rc) {
+ WARN_ON(rc == -EBUSY && !old_hwpt);
+ goto err_free_attach;
+ }
+
+ if (old_hwpt && old_hwpt != hwpt) {
+ rc = -EINVAL;
+ goto err_release_devid;
+ }
+
+ if (attach_resv) {
rc = iommufd_device_attach_reserved_iova(idev, hwpt_paging);
if (rc)
- goto err_unlock;
+ goto err_release_devid;
}
/*
@@ -378,51 +605,74 @@ int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
* reserved regions are only updated during individual device
* attachment.
*/
- if (list_empty(&idev->igroup->device_list)) {
- rc = iommufd_hwpt_attach_device(hwpt, idev);
+ if (iommufd_group_first_attach(igroup, pasid)) {
+ rc = iommufd_hwpt_attach_device(hwpt, idev, pasid);
if (rc)
goto err_unresv;
- idev->igroup->hwpt = hwpt;
+ attach->hwpt = hwpt;
+ WARN_ON(xa_is_err(xa_store(&igroup->pasid_attach, pasid, attach,
+ GFP_KERNEL)));
}
refcount_inc(&hwpt->obj.users);
- list_add_tail(&idev->group_item, &idev->igroup->device_list);
- mutex_unlock(&idev->igroup->lock);
+ WARN_ON(xa_is_err(xa_store(&attach->device_array, idev->obj.id,
+ idev, GFP_KERNEL)));
+ mutex_unlock(&igroup->lock);
return 0;
err_unresv:
- if (hwpt_paging)
+ if (attach_resv)
iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, idev->dev);
+err_release_devid:
+ xa_release(&attach->device_array, idev->obj.id);
+err_free_attach:
+ if (iommufd_group_first_attach(igroup, pasid))
+ kfree(attach);
+err_release_pasid:
+ if (iommufd_group_first_attach(igroup, pasid))
+ xa_release(&igroup->pasid_attach, pasid);
err_unlock:
- mutex_unlock(&idev->igroup->lock);
+ mutex_unlock(&igroup->lock);
return rc;
}
struct iommufd_hw_pagetable *
-iommufd_hw_pagetable_detach(struct iommufd_device *idev)
+iommufd_hw_pagetable_detach(struct iommufd_device *idev, ioasid_t pasid)
{
- struct iommufd_hw_pagetable *hwpt = idev->igroup->hwpt;
- struct iommufd_hwpt_paging *hwpt_paging = find_hwpt_paging(hwpt);
+ struct iommufd_group *igroup = idev->igroup;
+ struct iommufd_hwpt_paging *hwpt_paging;
+ struct iommufd_hw_pagetable *hwpt;
+ struct iommufd_attach *attach;
- mutex_lock(&idev->igroup->lock);
- list_del(&idev->group_item);
- if (list_empty(&idev->igroup->device_list)) {
- iommufd_hwpt_detach_device(hwpt, idev);
- idev->igroup->hwpt = NULL;
+ mutex_lock(&igroup->lock);
+ attach = xa_load(&igroup->pasid_attach, pasid);
+ if (!attach) {
+ mutex_unlock(&igroup->lock);
+ return NULL;
}
- if (hwpt_paging)
+
+ hwpt = attach->hwpt;
+ hwpt_paging = find_hwpt_paging(hwpt);
+
+ xa_erase(&attach->device_array, idev->obj.id);
+ if (xa_empty(&attach->device_array)) {
+ iommufd_hwpt_detach_device(hwpt, idev, pasid);
+ xa_erase(&igroup->pasid_attach, pasid);
+ kfree(attach);
+ }
+ if (hwpt_paging && pasid == IOMMU_NO_PASID)
iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, idev->dev);
- mutex_unlock(&idev->igroup->lock);
+ mutex_unlock(&igroup->lock);
/* Caller must destroy hwpt */
return hwpt;
}
static struct iommufd_hw_pagetable *
-iommufd_device_do_attach(struct iommufd_device *idev,
+iommufd_device_do_attach(struct iommufd_device *idev, ioasid_t pasid,
struct iommufd_hw_pagetable *hwpt)
{
int rc;
- rc = iommufd_hw_pagetable_attach(hwpt, idev);
+ rc = iommufd_hw_pagetable_attach(hwpt, idev, pasid);
if (rc)
return ERR_PTR(rc);
return NULL;
@@ -432,11 +682,14 @@ static void
iommufd_group_remove_reserved_iova(struct iommufd_group *igroup,
struct iommufd_hwpt_paging *hwpt_paging)
{
+ struct iommufd_attach *attach;
struct iommufd_device *cur;
+ unsigned long index;
lockdep_assert_held(&igroup->lock);
- list_for_each_entry(cur, &igroup->device_list, group_item)
+ attach = xa_load(&igroup->pasid_attach, IOMMU_NO_PASID);
+ xa_for_each(&attach->device_array, index, cur)
iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, cur->dev);
}
@@ -445,14 +698,17 @@ iommufd_group_do_replace_reserved_iova(struct iommufd_group *igroup,
struct iommufd_hwpt_paging *hwpt_paging)
{
struct iommufd_hwpt_paging *old_hwpt_paging;
+ struct iommufd_attach *attach;
struct iommufd_device *cur;
+ unsigned long index;
int rc;
lockdep_assert_held(&igroup->lock);
- old_hwpt_paging = find_hwpt_paging(igroup->hwpt);
+ attach = xa_load(&igroup->pasid_attach, IOMMU_NO_PASID);
+ old_hwpt_paging = find_hwpt_paging(attach->hwpt);
if (!old_hwpt_paging || hwpt_paging->ioas != old_hwpt_paging->ioas) {
- list_for_each_entry(cur, &igroup->device_list, group_item) {
+ xa_for_each(&attach->device_array, index, cur) {
rc = iopt_table_enforce_dev_resv_regions(
&hwpt_paging->ioas->iopt, cur->dev, NULL);
if (rc)
@@ -471,69 +727,81 @@ err_unresv:
}
static struct iommufd_hw_pagetable *
-iommufd_device_do_replace(struct iommufd_device *idev,
+iommufd_device_do_replace(struct iommufd_device *idev, ioasid_t pasid,
struct iommufd_hw_pagetable *hwpt)
{
struct iommufd_hwpt_paging *hwpt_paging = find_hwpt_paging(hwpt);
+ bool attach_resv = hwpt_paging && pasid == IOMMU_NO_PASID;
struct iommufd_hwpt_paging *old_hwpt_paging;
struct iommufd_group *igroup = idev->igroup;
struct iommufd_hw_pagetable *old_hwpt;
+ struct iommufd_attach *attach;
unsigned int num_devices;
int rc;
- mutex_lock(&idev->igroup->lock);
+ mutex_lock(&igroup->lock);
- if (igroup->hwpt == NULL) {
+ attach = xa_load(&igroup->pasid_attach, pasid);
+ if (!attach) {
rc = -EINVAL;
goto err_unlock;
}
- if (hwpt == igroup->hwpt) {
- mutex_unlock(&idev->igroup->lock);
+ old_hwpt = attach->hwpt;
+
+ WARN_ON(!old_hwpt || xa_empty(&attach->device_array));
+
+ if (!iommufd_device_is_attached(idev, pasid)) {
+ rc = -EINVAL;
+ goto err_unlock;
+ }
+
+ if (hwpt == old_hwpt) {
+ mutex_unlock(&igroup->lock);
return NULL;
}
- old_hwpt = igroup->hwpt;
- if (hwpt_paging) {
+ if (attach_resv) {
rc = iommufd_group_do_replace_reserved_iova(igroup, hwpt_paging);
if (rc)
goto err_unlock;
}
- rc = iommufd_hwpt_replace_device(idev, hwpt, old_hwpt);
+ rc = iommufd_hwpt_replace_device(idev, pasid, hwpt, old_hwpt);
if (rc)
goto err_unresv;
old_hwpt_paging = find_hwpt_paging(old_hwpt);
- if (old_hwpt_paging &&
+ if (old_hwpt_paging && pasid == IOMMU_NO_PASID &&
(!hwpt_paging || hwpt_paging->ioas != old_hwpt_paging->ioas))
iommufd_group_remove_reserved_iova(igroup, old_hwpt_paging);
- igroup->hwpt = hwpt;
+ attach->hwpt = hwpt;
- num_devices = list_count_nodes(&igroup->device_list);
+ num_devices = iommufd_group_device_num(igroup, pasid);
/*
- * Move the refcounts held by the device_list to the new hwpt. Retain a
+ * Move the refcounts held by the device_array to the new hwpt. Retain a
* refcount for this thread as the caller will free it.
*/
refcount_add(num_devices, &hwpt->obj.users);
if (num_devices > 1)
WARN_ON(refcount_sub_and_test(num_devices - 1,
&old_hwpt->obj.users));
- mutex_unlock(&idev->igroup->lock);
+ mutex_unlock(&igroup->lock);
/* Caller must destroy old_hwpt */
return old_hwpt;
err_unresv:
- if (hwpt_paging)
+ if (attach_resv)
iommufd_group_remove_reserved_iova(igroup, hwpt_paging);
err_unlock:
- mutex_unlock(&idev->igroup->lock);
+ mutex_unlock(&igroup->lock);
return ERR_PTR(rc);
}
typedef struct iommufd_hw_pagetable *(*attach_fn)(
- struct iommufd_device *idev, struct iommufd_hw_pagetable *hwpt);
+ struct iommufd_device *idev, ioasid_t pasid,
+ struct iommufd_hw_pagetable *hwpt);
/*
* When automatically managing the domains we search for a compatible domain in
@@ -541,7 +809,7 @@ typedef struct iommufd_hw_pagetable *(*attach_fn)(
* Automatic domain selection will never pick a manually created domain.
*/
static struct iommufd_hw_pagetable *
-iommufd_device_auto_get_domain(struct iommufd_device *idev,
+iommufd_device_auto_get_domain(struct iommufd_device *idev, ioasid_t pasid,
struct iommufd_ioas *ioas, u32 *pt_id,
attach_fn do_attach)
{
@@ -570,7 +838,7 @@ iommufd_device_auto_get_domain(struct iommufd_device *idev,
hwpt = &hwpt_paging->common;
if (!iommufd_lock_obj(&hwpt->obj))
continue;
- destroy_hwpt = (*do_attach)(idev, hwpt);
+ destroy_hwpt = (*do_attach)(idev, pasid, hwpt);
if (IS_ERR(destroy_hwpt)) {
iommufd_put_object(idev->ictx, &hwpt->obj);
/*
@@ -588,8 +856,8 @@ iommufd_device_auto_get_domain(struct iommufd_device *idev,
goto out_unlock;
}
- hwpt_paging = iommufd_hwpt_paging_alloc(idev->ictx, ioas, idev, 0,
- immediate_attach, NULL);
+ hwpt_paging = iommufd_hwpt_paging_alloc(idev->ictx, ioas, idev, pasid,
+ 0, immediate_attach, NULL);
if (IS_ERR(hwpt_paging)) {
destroy_hwpt = ERR_CAST(hwpt_paging);
goto out_unlock;
@@ -597,7 +865,7 @@ iommufd_device_auto_get_domain(struct iommufd_device *idev,
hwpt = &hwpt_paging->common;
if (!immediate_attach) {
- destroy_hwpt = (*do_attach)(idev, hwpt);
+ destroy_hwpt = (*do_attach)(idev, pasid, hwpt);
if (IS_ERR(destroy_hwpt))
goto out_abort;
} else {
@@ -618,8 +886,9 @@ out_unlock:
return destroy_hwpt;
}
-static int iommufd_device_change_pt(struct iommufd_device *idev, u32 *pt_id,
- attach_fn do_attach)
+static int iommufd_device_change_pt(struct iommufd_device *idev,
+ ioasid_t pasid,
+ u32 *pt_id, attach_fn do_attach)
{
struct iommufd_hw_pagetable *destroy_hwpt;
struct iommufd_object *pt_obj;
@@ -634,7 +903,7 @@ static int iommufd_device_change_pt(struct iommufd_device *idev, u32 *pt_id,
struct iommufd_hw_pagetable *hwpt =
container_of(pt_obj, struct iommufd_hw_pagetable, obj);
- destroy_hwpt = (*do_attach)(idev, hwpt);
+ destroy_hwpt = (*do_attach)(idev, pasid, hwpt);
if (IS_ERR(destroy_hwpt))
goto out_put_pt_obj;
break;
@@ -643,8 +912,8 @@ static int iommufd_device_change_pt(struct iommufd_device *idev, u32 *pt_id,
struct iommufd_ioas *ioas =
container_of(pt_obj, struct iommufd_ioas, obj);
- destroy_hwpt = iommufd_device_auto_get_domain(idev, ioas, pt_id,
- do_attach);
+ destroy_hwpt = iommufd_device_auto_get_domain(idev, pasid, ioas,
+ pt_id, do_attach);
if (IS_ERR(destroy_hwpt))
goto out_put_pt_obj;
break;
@@ -666,22 +935,26 @@ out_put_pt_obj:
}
/**
- * iommufd_device_attach - Connect a device to an iommu_domain
+ * iommufd_device_attach - Connect a device/pasid to an iommu_domain
* @idev: device to attach
+ * @pasid: pasid to attach
* @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HWPT_PAGING
* Output the IOMMUFD_OBJ_HWPT_PAGING ID
*
- * This connects the device to an iommu_domain, either automatically or manually
- * selected. Once this completes the device could do DMA.
+ * This connects the device/pasid to an iommu_domain, either automatically
+ * or manually selected. Once this completes the device could do DMA with
+ * @pasid. @pasid is IOMMU_NO_PASID if this attach is for no pasid usage.
*
* The caller should return the resulting pt_id back to userspace.
* This function is undone by calling iommufd_device_detach().
*/
-int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id)
+int iommufd_device_attach(struct iommufd_device *idev, ioasid_t pasid,
+ u32 *pt_id)
{
int rc;
- rc = iommufd_device_change_pt(idev, pt_id, &iommufd_device_do_attach);
+ rc = iommufd_device_change_pt(idev, pasid, pt_id,
+ &iommufd_device_do_attach);
if (rc)
return rc;
@@ -695,8 +968,9 @@ int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id)
EXPORT_SYMBOL_NS_GPL(iommufd_device_attach, "IOMMUFD");
/**
- * iommufd_device_replace - Change the device's iommu_domain
+ * iommufd_device_replace - Change the device/pasid's iommu_domain
* @idev: device to change
+ * @pasid: pasid to change
* @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HWPT_PAGING
* Output the IOMMUFD_OBJ_HWPT_PAGING ID
*
@@ -707,27 +981,33 @@ EXPORT_SYMBOL_NS_GPL(iommufd_device_attach, "IOMMUFD");
*
* If it fails then no change is made to the attachment. The iommu driver may
* implement this so there is no disruption in translation. This can only be
- * called if iommufd_device_attach() has already succeeded.
+ * called if iommufd_device_attach() has already succeeded. @pasid is
+ * IOMMU_NO_PASID for no pasid usage.
*/
-int iommufd_device_replace(struct iommufd_device *idev, u32 *pt_id)
+int iommufd_device_replace(struct iommufd_device *idev, ioasid_t pasid,
+ u32 *pt_id)
{
- return iommufd_device_change_pt(idev, pt_id,
+ return iommufd_device_change_pt(idev, pasid, pt_id,
&iommufd_device_do_replace);
}
EXPORT_SYMBOL_NS_GPL(iommufd_device_replace, "IOMMUFD");
/**
- * iommufd_device_detach - Disconnect a device to an iommu_domain
+ * iommufd_device_detach - Disconnect a device/device to an iommu_domain
* @idev: device to detach
+ * @pasid: pasid to detach
*
* Undo iommufd_device_attach(). This disconnects the idev from the previously
* attached pt_id. The device returns back to a blocked DMA translation.
+ * @pasid is IOMMU_NO_PASID for no pasid usage.
*/
-void iommufd_device_detach(struct iommufd_device *idev)
+void iommufd_device_detach(struct iommufd_device *idev, ioasid_t pasid)
{
struct iommufd_hw_pagetable *hwpt;
- hwpt = iommufd_hw_pagetable_detach(idev);
+ hwpt = iommufd_hw_pagetable_detach(idev, pasid);
+ if (!hwpt)
+ return;
iommufd_hw_pagetable_put(idev->ictx, hwpt);
refcount_dec(&idev->obj.users);
}
@@ -1127,7 +1407,7 @@ int iommufd_access_rw(struct iommufd_access *access, unsigned long iova,
struct io_pagetable *iopt;
struct iopt_area *area;
unsigned long last_iova;
- int rc;
+ int rc = -EINVAL;
if (!length)
return -EINVAL;
@@ -1183,7 +1463,8 @@ int iommufd_get_hw_info(struct iommufd_ucmd *ucmd)
void *data;
int rc;
- if (cmd->flags || cmd->__reserved)
+ if (cmd->flags || cmd->__reserved[0] || cmd->__reserved[1] ||
+ cmd->__reserved[2])
return -EOPNOTSUPP;
idev = iommufd_get_device(ucmd, cmd->dev_id);
@@ -1240,6 +1521,36 @@ int iommufd_get_hw_info(struct iommufd_ucmd *ucmd)
if (device_iommu_capable(idev->dev, IOMMU_CAP_DIRTY_TRACKING))
cmd->out_capabilities |= IOMMU_HW_CAP_DIRTY_TRACKING;
+ cmd->out_max_pasid_log2 = 0;
+ /*
+ * Currently, all iommu drivers enable PASID in the probe_device()
+ * op if iommu and device supports it. So the max_pasids stored in
+ * dev->iommu indicates both PASID support and enable status. A
+ * non-zero dev->iommu->max_pasids means PASID is supported and
+ * enabled. The iommufd only reports PASID capability to userspace
+ * if it's enabled.
+ */
+ if (idev->dev->iommu->max_pasids) {
+ cmd->out_max_pasid_log2 = ilog2(idev->dev->iommu->max_pasids);
+
+ if (dev_is_pci(idev->dev)) {
+ struct pci_dev *pdev = to_pci_dev(idev->dev);
+ int ctrl;
+
+ ctrl = pci_pasid_status(pdev);
+
+ WARN_ON_ONCE(ctrl < 0 ||
+ !(ctrl & PCI_PASID_CTRL_ENABLE));
+
+ if (ctrl & PCI_PASID_CTRL_EXEC)
+ cmd->out_capabilities |=
+ IOMMU_HW_CAP_PCI_PASID_EXEC;
+ if (ctrl & PCI_PASID_CTRL_PRIV)
+ cmd->out_capabilities |=
+ IOMMU_HW_CAP_PCI_PASID_PRIV;
+ }
+ }
+
rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
out_free:
kfree(data);
diff --git a/drivers/iommu/iommufd/driver.c b/drivers/iommu/iommufd/driver.c
index 2d98b04ff1cb..922cd1fe7ec2 100644
--- a/drivers/iommu/iommufd/driver.c
+++ b/drivers/iommu/iommufd/driver.c
@@ -49,5 +49,203 @@ struct device *iommufd_viommu_find_dev(struct iommufd_viommu *viommu,
}
EXPORT_SYMBOL_NS_GPL(iommufd_viommu_find_dev, "IOMMUFD");
+/* Return -ENOENT if device is not associated to the vIOMMU */
+int iommufd_viommu_get_vdev_id(struct iommufd_viommu *viommu,
+ struct device *dev, unsigned long *vdev_id)
+{
+ struct iommufd_vdevice *vdev;
+ unsigned long index;
+ int rc = -ENOENT;
+
+ if (WARN_ON_ONCE(!vdev_id))
+ return -EINVAL;
+
+ xa_lock(&viommu->vdevs);
+ xa_for_each(&viommu->vdevs, index, vdev) {
+ if (vdev->dev == dev) {
+ *vdev_id = vdev->id;
+ rc = 0;
+ break;
+ }
+ }
+ xa_unlock(&viommu->vdevs);
+ return rc;
+}
+EXPORT_SYMBOL_NS_GPL(iommufd_viommu_get_vdev_id, "IOMMUFD");
+
+/*
+ * Typically called in driver's threaded IRQ handler.
+ * The @type and @event_data must be defined in include/uapi/linux/iommufd.h
+ */
+int iommufd_viommu_report_event(struct iommufd_viommu *viommu,
+ enum iommu_veventq_type type, void *event_data,
+ size_t data_len)
+{
+ struct iommufd_veventq *veventq;
+ struct iommufd_vevent *vevent;
+ int rc = 0;
+
+ if (WARN_ON_ONCE(!data_len || !event_data))
+ return -EINVAL;
+
+ down_read(&viommu->veventqs_rwsem);
+
+ veventq = iommufd_viommu_find_veventq(viommu, type);
+ if (!veventq) {
+ rc = -EOPNOTSUPP;
+ goto out_unlock_veventqs;
+ }
+
+ spin_lock(&veventq->common.lock);
+ if (veventq->num_events == veventq->depth) {
+ vevent = &veventq->lost_events_header;
+ goto out_set_header;
+ }
+
+ vevent = kzalloc(struct_size(vevent, event_data, data_len), GFP_ATOMIC);
+ if (!vevent) {
+ rc = -ENOMEM;
+ vevent = &veventq->lost_events_header;
+ goto out_set_header;
+ }
+ memcpy(vevent->event_data, event_data, data_len);
+ vevent->data_len = data_len;
+ veventq->num_events++;
+
+out_set_header:
+ iommufd_vevent_handler(veventq, vevent);
+ spin_unlock(&veventq->common.lock);
+out_unlock_veventqs:
+ up_read(&viommu->veventqs_rwsem);
+ return rc;
+}
+EXPORT_SYMBOL_NS_GPL(iommufd_viommu_report_event, "IOMMUFD");
+
+#ifdef CONFIG_IRQ_MSI_IOMMU
+/*
+ * Get a iommufd_sw_msi_map for the msi physical address requested by the irq
+ * layer. The mapping to IOVA is global to the iommufd file descriptor, every
+ * domain that is attached to a device using the same MSI parameters will use
+ * the same IOVA.
+ */
+static struct iommufd_sw_msi_map *
+iommufd_sw_msi_get_map(struct iommufd_ctx *ictx, phys_addr_t msi_addr,
+ phys_addr_t sw_msi_start)
+{
+ struct iommufd_sw_msi_map *cur;
+ unsigned int max_pgoff = 0;
+
+ lockdep_assert_held(&ictx->sw_msi_lock);
+
+ list_for_each_entry(cur, &ictx->sw_msi_list, sw_msi_item) {
+ if (cur->sw_msi_start != sw_msi_start)
+ continue;
+ max_pgoff = max(max_pgoff, cur->pgoff + 1);
+ if (cur->msi_addr == msi_addr)
+ return cur;
+ }
+
+ if (ictx->sw_msi_id >=
+ BITS_PER_BYTE * sizeof_field(struct iommufd_sw_msi_maps, bitmap))
+ return ERR_PTR(-EOVERFLOW);
+
+ cur = kzalloc(sizeof(*cur), GFP_KERNEL);
+ if (!cur)
+ return ERR_PTR(-ENOMEM);
+
+ cur->sw_msi_start = sw_msi_start;
+ cur->msi_addr = msi_addr;
+ cur->pgoff = max_pgoff;
+ cur->id = ictx->sw_msi_id++;
+ list_add_tail(&cur->sw_msi_item, &ictx->sw_msi_list);
+ return cur;
+}
+
+int iommufd_sw_msi_install(struct iommufd_ctx *ictx,
+ struct iommufd_hwpt_paging *hwpt_paging,
+ struct iommufd_sw_msi_map *msi_map)
+{
+ unsigned long iova;
+
+ lockdep_assert_held(&ictx->sw_msi_lock);
+
+ iova = msi_map->sw_msi_start + msi_map->pgoff * PAGE_SIZE;
+ if (!test_bit(msi_map->id, hwpt_paging->present_sw_msi.bitmap)) {
+ int rc;
+
+ rc = iommu_map(hwpt_paging->common.domain, iova,
+ msi_map->msi_addr, PAGE_SIZE,
+ IOMMU_WRITE | IOMMU_READ | IOMMU_MMIO,
+ GFP_KERNEL_ACCOUNT);
+ if (rc)
+ return rc;
+ __set_bit(msi_map->id, hwpt_paging->present_sw_msi.bitmap);
+ }
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(iommufd_sw_msi_install, "IOMMUFD_INTERNAL");
+
+/*
+ * Called by the irq code if the platform translates the MSI address through the
+ * IOMMU. msi_addr is the physical address of the MSI page. iommufd will
+ * allocate a fd global iova for the physical page that is the same on all
+ * domains and devices.
+ */
+int iommufd_sw_msi(struct iommu_domain *domain, struct msi_desc *desc,
+ phys_addr_t msi_addr)
+{
+ struct device *dev = msi_desc_to_dev(desc);
+ struct iommufd_hwpt_paging *hwpt_paging;
+ struct iommu_attach_handle *raw_handle;
+ struct iommufd_attach_handle *handle;
+ struct iommufd_sw_msi_map *msi_map;
+ struct iommufd_ctx *ictx;
+ unsigned long iova;
+ int rc;
+
+ /*
+ * It is safe to call iommu_attach_handle_get() here because the iommu
+ * core code invokes this under the group mutex which also prevents any
+ * change of the attach handle for the duration of this function.
+ */
+ iommu_group_mutex_assert(dev);
+
+ raw_handle =
+ iommu_attach_handle_get(dev->iommu_group, IOMMU_NO_PASID, 0);
+ if (IS_ERR(raw_handle))
+ return 0;
+ hwpt_paging = find_hwpt_paging(domain->iommufd_hwpt);
+
+ handle = to_iommufd_handle(raw_handle);
+ /* No IOMMU_RESV_SW_MSI means no change to the msi_msg */
+ if (handle->idev->igroup->sw_msi_start == PHYS_ADDR_MAX)
+ return 0;
+
+ ictx = handle->idev->ictx;
+ guard(mutex)(&ictx->sw_msi_lock);
+ /*
+ * The input msi_addr is the exact byte offset of the MSI doorbell, we
+ * assume the caller has checked that it is contained with a MMIO region
+ * that is secure to map at PAGE_SIZE.
+ */
+ msi_map = iommufd_sw_msi_get_map(handle->idev->ictx,
+ msi_addr & PAGE_MASK,
+ handle->idev->igroup->sw_msi_start);
+ if (IS_ERR(msi_map))
+ return PTR_ERR(msi_map);
+
+ rc = iommufd_sw_msi_install(ictx, hwpt_paging, msi_map);
+ if (rc)
+ return rc;
+ __set_bit(msi_map->id, handle->idev->igroup->required_sw_msi.bitmap);
+
+ iova = msi_map->sw_msi_start + msi_map->pgoff * PAGE_SIZE;
+ msi_desc_set_iommu_msi_iova(desc, iova, PAGE_SHIFT);
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(iommufd_sw_msi, "IOMMUFD");
+#endif
+
MODULE_DESCRIPTION("iommufd code shared with builtin modules");
+MODULE_IMPORT_NS("IOMMUFD_INTERNAL");
MODULE_LICENSE("GPL");
diff --git a/drivers/iommu/iommufd/eventq.c b/drivers/iommu/iommufd/eventq.c
new file mode 100644
index 000000000000..f39cf0797347
--- /dev/null
+++ b/drivers/iommu/iommufd/eventq.c
@@ -0,0 +1,598 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2024 Intel Corporation
+ */
+#define pr_fmt(fmt) "iommufd: " fmt
+
+#include <linux/anon_inodes.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/iommufd.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/pci.h>
+#include <linux/pci-ats.h>
+#include <linux/poll.h>
+#include <uapi/linux/iommufd.h>
+
+#include "../iommu-priv.h"
+#include "iommufd_private.h"
+
+/* IOMMUFD_OBJ_FAULT Functions */
+
+int iommufd_fault_iopf_enable(struct iommufd_device *idev)
+{
+ struct device *dev = idev->dev;
+ int ret;
+
+ /*
+ * Once we turn on PCI/PRI support for VF, the response failure code
+ * should not be forwarded to the hardware due to PRI being a shared
+ * resource between PF and VFs. There is no coordination for this
+ * shared capability. This waits for a vPRI reset to recover.
+ */
+ if (dev_is_pci(dev)) {
+ struct pci_dev *pdev = to_pci_dev(dev);
+
+ if (pdev->is_virtfn && pci_pri_supported(pdev))
+ return -EINVAL;
+ }
+
+ mutex_lock(&idev->iopf_lock);
+ /* Device iopf has already been on. */
+ if (++idev->iopf_enabled > 1) {
+ mutex_unlock(&idev->iopf_lock);
+ return 0;
+ }
+
+ ret = iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_IOPF);
+ if (ret)
+ --idev->iopf_enabled;
+ mutex_unlock(&idev->iopf_lock);
+
+ return ret;
+}
+
+void iommufd_fault_iopf_disable(struct iommufd_device *idev)
+{
+ mutex_lock(&idev->iopf_lock);
+ if (!WARN_ON(idev->iopf_enabled == 0)) {
+ if (--idev->iopf_enabled == 0)
+ iommu_dev_disable_feature(idev->dev, IOMMU_DEV_FEAT_IOPF);
+ }
+ mutex_unlock(&idev->iopf_lock);
+}
+
+void iommufd_auto_response_faults(struct iommufd_hw_pagetable *hwpt,
+ struct iommufd_attach_handle *handle)
+{
+ struct iommufd_fault *fault = hwpt->fault;
+ struct iopf_group *group, *next;
+ struct list_head free_list;
+ unsigned long index;
+
+ if (!fault)
+ return;
+ INIT_LIST_HEAD(&free_list);
+
+ mutex_lock(&fault->mutex);
+ spin_lock(&fault->common.lock);
+ list_for_each_entry_safe(group, next, &fault->common.deliver, node) {
+ if (group->attach_handle != &handle->handle)
+ continue;
+ list_move(&group->node, &free_list);
+ }
+ spin_unlock(&fault->common.lock);
+
+ list_for_each_entry_safe(group, next, &free_list, node) {
+ list_del(&group->node);
+ iopf_group_response(group, IOMMU_PAGE_RESP_INVALID);
+ iopf_free_group(group);
+ }
+
+ xa_for_each(&fault->response, index, group) {
+ if (group->attach_handle != &handle->handle)
+ continue;
+ xa_erase(&fault->response, index);
+ iopf_group_response(group, IOMMU_PAGE_RESP_INVALID);
+ iopf_free_group(group);
+ }
+ mutex_unlock(&fault->mutex);
+}
+
+void iommufd_fault_destroy(struct iommufd_object *obj)
+{
+ struct iommufd_eventq *eventq =
+ container_of(obj, struct iommufd_eventq, obj);
+ struct iommufd_fault *fault = eventq_to_fault(eventq);
+ struct iopf_group *group, *next;
+ unsigned long index;
+
+ /*
+ * The iommufd object's reference count is zero at this point.
+ * We can be confident that no other threads are currently
+ * accessing this pointer. Therefore, acquiring the mutex here
+ * is unnecessary.
+ */
+ list_for_each_entry_safe(group, next, &fault->common.deliver, node) {
+ list_del(&group->node);
+ iopf_group_response(group, IOMMU_PAGE_RESP_INVALID);
+ iopf_free_group(group);
+ }
+ xa_for_each(&fault->response, index, group) {
+ xa_erase(&fault->response, index);
+ iopf_group_response(group, IOMMU_PAGE_RESP_INVALID);
+ iopf_free_group(group);
+ }
+ xa_destroy(&fault->response);
+ mutex_destroy(&fault->mutex);
+}
+
+static void iommufd_compose_fault_message(struct iommu_fault *fault,
+ struct iommu_hwpt_pgfault *hwpt_fault,
+ struct iommufd_device *idev,
+ u32 cookie)
+{
+ hwpt_fault->flags = fault->prm.flags;
+ hwpt_fault->dev_id = idev->obj.id;
+ hwpt_fault->pasid = fault->prm.pasid;
+ hwpt_fault->grpid = fault->prm.grpid;
+ hwpt_fault->perm = fault->prm.perm;
+ hwpt_fault->addr = fault->prm.addr;
+ hwpt_fault->length = 0;
+ hwpt_fault->cookie = cookie;
+}
+
+/* Fetch the first node out of the fault->deliver list */
+static struct iopf_group *
+iommufd_fault_deliver_fetch(struct iommufd_fault *fault)
+{
+ struct list_head *list = &fault->common.deliver;
+ struct iopf_group *group = NULL;
+
+ spin_lock(&fault->common.lock);
+ if (!list_empty(list)) {
+ group = list_first_entry(list, struct iopf_group, node);
+ list_del(&group->node);
+ }
+ spin_unlock(&fault->common.lock);
+ return group;
+}
+
+/* Restore a node back to the head of the fault->deliver list */
+static void iommufd_fault_deliver_restore(struct iommufd_fault *fault,
+ struct iopf_group *group)
+{
+ spin_lock(&fault->common.lock);
+ list_add(&group->node, &fault->common.deliver);
+ spin_unlock(&fault->common.lock);
+}
+
+static ssize_t iommufd_fault_fops_read(struct file *filep, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ size_t fault_size = sizeof(struct iommu_hwpt_pgfault);
+ struct iommufd_eventq *eventq = filep->private_data;
+ struct iommufd_fault *fault = eventq_to_fault(eventq);
+ struct iommu_hwpt_pgfault data = {};
+ struct iommufd_device *idev;
+ struct iopf_group *group;
+ struct iopf_fault *iopf;
+ size_t done = 0;
+ int rc = 0;
+
+ if (*ppos || count % fault_size)
+ return -ESPIPE;
+
+ mutex_lock(&fault->mutex);
+ while ((group = iommufd_fault_deliver_fetch(fault))) {
+ if (done >= count ||
+ group->fault_count * fault_size > count - done) {
+ iommufd_fault_deliver_restore(fault, group);
+ break;
+ }
+
+ rc = xa_alloc(&fault->response, &group->cookie, group,
+ xa_limit_32b, GFP_KERNEL);
+ if (rc) {
+ iommufd_fault_deliver_restore(fault, group);
+ break;
+ }
+
+ idev = to_iommufd_handle(group->attach_handle)->idev;
+ list_for_each_entry(iopf, &group->faults, list) {
+ iommufd_compose_fault_message(&iopf->fault,
+ &data, idev,
+ group->cookie);
+ if (copy_to_user(buf + done, &data, fault_size)) {
+ xa_erase(&fault->response, group->cookie);
+ iommufd_fault_deliver_restore(fault, group);
+ rc = -EFAULT;
+ break;
+ }
+ done += fault_size;
+ }
+ }
+ mutex_unlock(&fault->mutex);
+
+ return done == 0 ? rc : done;
+}
+
+static ssize_t iommufd_fault_fops_write(struct file *filep, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ size_t response_size = sizeof(struct iommu_hwpt_page_response);
+ struct iommufd_eventq *eventq = filep->private_data;
+ struct iommufd_fault *fault = eventq_to_fault(eventq);
+ struct iommu_hwpt_page_response response;
+ struct iopf_group *group;
+ size_t done = 0;
+ int rc = 0;
+
+ if (*ppos || count % response_size)
+ return -ESPIPE;
+
+ mutex_lock(&fault->mutex);
+ while (count > done) {
+ rc = copy_from_user(&response, buf + done, response_size);
+ if (rc)
+ break;
+
+ static_assert((int)IOMMUFD_PAGE_RESP_SUCCESS ==
+ (int)IOMMU_PAGE_RESP_SUCCESS);
+ static_assert((int)IOMMUFD_PAGE_RESP_INVALID ==
+ (int)IOMMU_PAGE_RESP_INVALID);
+ if (response.code != IOMMUFD_PAGE_RESP_SUCCESS &&
+ response.code != IOMMUFD_PAGE_RESP_INVALID) {
+ rc = -EINVAL;
+ break;
+ }
+
+ group = xa_erase(&fault->response, response.cookie);
+ if (!group) {
+ rc = -EINVAL;
+ break;
+ }
+
+ iopf_group_response(group, response.code);
+ iopf_free_group(group);
+ done += response_size;
+ }
+ mutex_unlock(&fault->mutex);
+
+ return done == 0 ? rc : done;
+}
+
+/* IOMMUFD_OBJ_VEVENTQ Functions */
+
+void iommufd_veventq_abort(struct iommufd_object *obj)
+{
+ struct iommufd_eventq *eventq =
+ container_of(obj, struct iommufd_eventq, obj);
+ struct iommufd_veventq *veventq = eventq_to_veventq(eventq);
+ struct iommufd_viommu *viommu = veventq->viommu;
+ struct iommufd_vevent *cur, *next;
+
+ lockdep_assert_held_write(&viommu->veventqs_rwsem);
+
+ list_for_each_entry_safe(cur, next, &eventq->deliver, node) {
+ list_del(&cur->node);
+ if (cur != &veventq->lost_events_header)
+ kfree(cur);
+ }
+
+ refcount_dec(&viommu->obj.users);
+ list_del(&veventq->node);
+}
+
+void iommufd_veventq_destroy(struct iommufd_object *obj)
+{
+ struct iommufd_veventq *veventq = eventq_to_veventq(
+ container_of(obj, struct iommufd_eventq, obj));
+
+ down_write(&veventq->viommu->veventqs_rwsem);
+ iommufd_veventq_abort(obj);
+ up_write(&veventq->viommu->veventqs_rwsem);
+}
+
+static struct iommufd_vevent *
+iommufd_veventq_deliver_fetch(struct iommufd_veventq *veventq)
+{
+ struct iommufd_eventq *eventq = &veventq->common;
+ struct list_head *list = &eventq->deliver;
+ struct iommufd_vevent *vevent = NULL;
+
+ spin_lock(&eventq->lock);
+ if (!list_empty(list)) {
+ struct iommufd_vevent *next;
+
+ next = list_first_entry(list, struct iommufd_vevent, node);
+ /* Make a copy of the lost_events_header for copy_to_user */
+ if (next == &veventq->lost_events_header) {
+ vevent = kzalloc(sizeof(*vevent), GFP_ATOMIC);
+ if (!vevent)
+ goto out_unlock;
+ }
+ list_del(&next->node);
+ if (vevent)
+ memcpy(vevent, next, sizeof(*vevent));
+ else
+ vevent = next;
+ }
+out_unlock:
+ spin_unlock(&eventq->lock);
+ return vevent;
+}
+
+static void iommufd_veventq_deliver_restore(struct iommufd_veventq *veventq,
+ struct iommufd_vevent *vevent)
+{
+ struct iommufd_eventq *eventq = &veventq->common;
+ struct list_head *list = &eventq->deliver;
+
+ spin_lock(&eventq->lock);
+ if (vevent_for_lost_events_header(vevent)) {
+ /* Remove the copy of the lost_events_header */
+ kfree(vevent);
+ vevent = NULL;
+ /* An empty list needs the lost_events_header back */
+ if (list_empty(list))
+ vevent = &veventq->lost_events_header;
+ }
+ if (vevent)
+ list_add(&vevent->node, list);
+ spin_unlock(&eventq->lock);
+}
+
+static ssize_t iommufd_veventq_fops_read(struct file *filep, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct iommufd_eventq *eventq = filep->private_data;
+ struct iommufd_veventq *veventq = eventq_to_veventq(eventq);
+ struct iommufd_vevent_header *hdr;
+ struct iommufd_vevent *cur;
+ size_t done = 0;
+ int rc = 0;
+
+ if (*ppos)
+ return -ESPIPE;
+
+ while ((cur = iommufd_veventq_deliver_fetch(veventq))) {
+ /* Validate the remaining bytes against the header size */
+ if (done >= count || sizeof(*hdr) > count - done) {
+ iommufd_veventq_deliver_restore(veventq, cur);
+ break;
+ }
+ hdr = &cur->header;
+
+ /* If being a normal vEVENT, validate against the full size */
+ if (!vevent_for_lost_events_header(cur) &&
+ sizeof(hdr) + cur->data_len > count - done) {
+ iommufd_veventq_deliver_restore(veventq, cur);
+ break;
+ }
+
+ if (copy_to_user(buf + done, hdr, sizeof(*hdr))) {
+ iommufd_veventq_deliver_restore(veventq, cur);
+ rc = -EFAULT;
+ break;
+ }
+ done += sizeof(*hdr);
+
+ if (cur->data_len &&
+ copy_to_user(buf + done, cur->event_data, cur->data_len)) {
+ iommufd_veventq_deliver_restore(veventq, cur);
+ rc = -EFAULT;
+ break;
+ }
+ spin_lock(&eventq->lock);
+ if (!vevent_for_lost_events_header(cur))
+ veventq->num_events--;
+ spin_unlock(&eventq->lock);
+ done += cur->data_len;
+ kfree(cur);
+ }
+
+ return done == 0 ? rc : done;
+}
+
+/* Common Event Queue Functions */
+
+static __poll_t iommufd_eventq_fops_poll(struct file *filep,
+ struct poll_table_struct *wait)
+{
+ struct iommufd_eventq *eventq = filep->private_data;
+ __poll_t pollflags = 0;
+
+ if (eventq->obj.type == IOMMUFD_OBJ_FAULT)
+ pollflags |= EPOLLOUT;
+
+ poll_wait(filep, &eventq->wait_queue, wait);
+ spin_lock(&eventq->lock);
+ if (!list_empty(&eventq->deliver))
+ pollflags |= EPOLLIN | EPOLLRDNORM;
+ spin_unlock(&eventq->lock);
+
+ return pollflags;
+}
+
+static int iommufd_eventq_fops_release(struct inode *inode, struct file *filep)
+{
+ struct iommufd_eventq *eventq = filep->private_data;
+
+ refcount_dec(&eventq->obj.users);
+ iommufd_ctx_put(eventq->ictx);
+ return 0;
+}
+
+#define INIT_EVENTQ_FOPS(read_op, write_op) \
+ ((const struct file_operations){ \
+ .owner = THIS_MODULE, \
+ .open = nonseekable_open, \
+ .read = read_op, \
+ .write = write_op, \
+ .poll = iommufd_eventq_fops_poll, \
+ .release = iommufd_eventq_fops_release, \
+ })
+
+static int iommufd_eventq_init(struct iommufd_eventq *eventq, char *name,
+ struct iommufd_ctx *ictx,
+ const struct file_operations *fops)
+{
+ struct file *filep;
+ int fdno;
+
+ spin_lock_init(&eventq->lock);
+ INIT_LIST_HEAD(&eventq->deliver);
+ init_waitqueue_head(&eventq->wait_queue);
+
+ filep = anon_inode_getfile(name, fops, eventq, O_RDWR);
+ if (IS_ERR(filep))
+ return PTR_ERR(filep);
+
+ eventq->ictx = ictx;
+ iommufd_ctx_get(eventq->ictx);
+ eventq->filep = filep;
+ refcount_inc(&eventq->obj.users);
+
+ fdno = get_unused_fd_flags(O_CLOEXEC);
+ if (fdno < 0)
+ fput(filep);
+ return fdno;
+}
+
+static const struct file_operations iommufd_fault_fops =
+ INIT_EVENTQ_FOPS(iommufd_fault_fops_read, iommufd_fault_fops_write);
+
+int iommufd_fault_alloc(struct iommufd_ucmd *ucmd)
+{
+ struct iommu_fault_alloc *cmd = ucmd->cmd;
+ struct iommufd_fault *fault;
+ int fdno;
+ int rc;
+
+ if (cmd->flags)
+ return -EOPNOTSUPP;
+
+ fault = __iommufd_object_alloc(ucmd->ictx, fault, IOMMUFD_OBJ_FAULT,
+ common.obj);
+ if (IS_ERR(fault))
+ return PTR_ERR(fault);
+
+ xa_init_flags(&fault->response, XA_FLAGS_ALLOC1);
+ mutex_init(&fault->mutex);
+
+ fdno = iommufd_eventq_init(&fault->common, "[iommufd-pgfault]",
+ ucmd->ictx, &iommufd_fault_fops);
+ if (fdno < 0) {
+ rc = fdno;
+ goto out_abort;
+ }
+
+ cmd->out_fault_id = fault->common.obj.id;
+ cmd->out_fault_fd = fdno;
+
+ rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
+ if (rc)
+ goto out_put_fdno;
+ iommufd_object_finalize(ucmd->ictx, &fault->common.obj);
+
+ fd_install(fdno, fault->common.filep);
+
+ return 0;
+out_put_fdno:
+ put_unused_fd(fdno);
+ fput(fault->common.filep);
+out_abort:
+ iommufd_object_abort_and_destroy(ucmd->ictx, &fault->common.obj);
+
+ return rc;
+}
+
+int iommufd_fault_iopf_handler(struct iopf_group *group)
+{
+ struct iommufd_hw_pagetable *hwpt;
+ struct iommufd_fault *fault;
+
+ hwpt = group->attach_handle->domain->iommufd_hwpt;
+ fault = hwpt->fault;
+
+ spin_lock(&fault->common.lock);
+ list_add_tail(&group->node, &fault->common.deliver);
+ spin_unlock(&fault->common.lock);
+
+ wake_up_interruptible(&fault->common.wait_queue);
+
+ return 0;
+}
+
+static const struct file_operations iommufd_veventq_fops =
+ INIT_EVENTQ_FOPS(iommufd_veventq_fops_read, NULL);
+
+int iommufd_veventq_alloc(struct iommufd_ucmd *ucmd)
+{
+ struct iommu_veventq_alloc *cmd = ucmd->cmd;
+ struct iommufd_veventq *veventq;
+ struct iommufd_viommu *viommu;
+ int fdno;
+ int rc;
+
+ if (cmd->flags || cmd->__reserved ||
+ cmd->type == IOMMU_VEVENTQ_TYPE_DEFAULT)
+ return -EOPNOTSUPP;
+ if (!cmd->veventq_depth)
+ return -EINVAL;
+
+ viommu = iommufd_get_viommu(ucmd, cmd->viommu_id);
+ if (IS_ERR(viommu))
+ return PTR_ERR(viommu);
+
+ down_write(&viommu->veventqs_rwsem);
+
+ if (iommufd_viommu_find_veventq(viommu, cmd->type)) {
+ rc = -EEXIST;
+ goto out_unlock_veventqs;
+ }
+
+ veventq = __iommufd_object_alloc(ucmd->ictx, veventq,
+ IOMMUFD_OBJ_VEVENTQ, common.obj);
+ if (IS_ERR(veventq)) {
+ rc = PTR_ERR(veventq);
+ goto out_unlock_veventqs;
+ }
+
+ veventq->type = cmd->type;
+ veventq->viommu = viommu;
+ refcount_inc(&viommu->obj.users);
+ veventq->depth = cmd->veventq_depth;
+ list_add_tail(&veventq->node, &viommu->veventqs);
+ veventq->lost_events_header.header.flags =
+ IOMMU_VEVENTQ_FLAG_LOST_EVENTS;
+
+ fdno = iommufd_eventq_init(&veventq->common, "[iommufd-viommu-event]",
+ ucmd->ictx, &iommufd_veventq_fops);
+ if (fdno < 0) {
+ rc = fdno;
+ goto out_abort;
+ }
+
+ cmd->out_veventq_id = veventq->common.obj.id;
+ cmd->out_veventq_fd = fdno;
+
+ rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
+ if (rc)
+ goto out_put_fdno;
+
+ iommufd_object_finalize(ucmd->ictx, &veventq->common.obj);
+ fd_install(fdno, veventq->common.filep);
+ goto out_unlock_veventqs;
+
+out_put_fdno:
+ put_unused_fd(fdno);
+ fput(veventq->common.filep);
+out_abort:
+ iommufd_object_abort_and_destroy(ucmd->ictx, &veventq->common.obj);
+out_unlock_veventqs:
+ up_write(&viommu->veventqs_rwsem);
+ iommufd_put_object(ucmd->ictx, &viommu->obj);
+ return rc;
+}
diff --git a/drivers/iommu/iommufd/fault.c b/drivers/iommu/iommufd/fault.c
deleted file mode 100644
index d9a937450e55..000000000000
--- a/drivers/iommu/iommufd/fault.c
+++ /dev/null
@@ -1,462 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/* Copyright (C) 2024 Intel Corporation
- */
-#define pr_fmt(fmt) "iommufd: " fmt
-
-#include <linux/anon_inodes.h>
-#include <linux/file.h>
-#include <linux/fs.h>
-#include <linux/iommufd.h>
-#include <linux/module.h>
-#include <linux/mutex.h>
-#include <linux/pci.h>
-#include <linux/pci-ats.h>
-#include <linux/poll.h>
-#include <uapi/linux/iommufd.h>
-
-#include "../iommu-priv.h"
-#include "iommufd_private.h"
-
-static int iommufd_fault_iopf_enable(struct iommufd_device *idev)
-{
- struct device *dev = idev->dev;
- int ret;
-
- /*
- * Once we turn on PCI/PRI support for VF, the response failure code
- * should not be forwarded to the hardware due to PRI being a shared
- * resource between PF and VFs. There is no coordination for this
- * shared capability. This waits for a vPRI reset to recover.
- */
- if (dev_is_pci(dev)) {
- struct pci_dev *pdev = to_pci_dev(dev);
-
- if (pdev->is_virtfn && pci_pri_supported(pdev))
- return -EINVAL;
- }
-
- mutex_lock(&idev->iopf_lock);
- /* Device iopf has already been on. */
- if (++idev->iopf_enabled > 1) {
- mutex_unlock(&idev->iopf_lock);
- return 0;
- }
-
- ret = iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_IOPF);
- if (ret)
- --idev->iopf_enabled;
- mutex_unlock(&idev->iopf_lock);
-
- return ret;
-}
-
-static void iommufd_fault_iopf_disable(struct iommufd_device *idev)
-{
- mutex_lock(&idev->iopf_lock);
- if (!WARN_ON(idev->iopf_enabled == 0)) {
- if (--idev->iopf_enabled == 0)
- iommu_dev_disable_feature(idev->dev, IOMMU_DEV_FEAT_IOPF);
- }
- mutex_unlock(&idev->iopf_lock);
-}
-
-static int __fault_domain_attach_dev(struct iommufd_hw_pagetable *hwpt,
- struct iommufd_device *idev)
-{
- struct iommufd_attach_handle *handle;
- int ret;
-
- handle = kzalloc(sizeof(*handle), GFP_KERNEL);
- if (!handle)
- return -ENOMEM;
-
- handle->idev = idev;
- ret = iommu_attach_group_handle(hwpt->domain, idev->igroup->group,
- &handle->handle);
- if (ret)
- kfree(handle);
-
- return ret;
-}
-
-int iommufd_fault_domain_attach_dev(struct iommufd_hw_pagetable *hwpt,
- struct iommufd_device *idev)
-{
- int ret;
-
- if (!hwpt->fault)
- return -EINVAL;
-
- ret = iommufd_fault_iopf_enable(idev);
- if (ret)
- return ret;
-
- ret = __fault_domain_attach_dev(hwpt, idev);
- if (ret)
- iommufd_fault_iopf_disable(idev);
-
- return ret;
-}
-
-static void iommufd_auto_response_faults(struct iommufd_hw_pagetable *hwpt,
- struct iommufd_attach_handle *handle)
-{
- struct iommufd_fault *fault = hwpt->fault;
- struct iopf_group *group, *next;
- struct list_head free_list;
- unsigned long index;
-
- if (!fault)
- return;
- INIT_LIST_HEAD(&free_list);
-
- mutex_lock(&fault->mutex);
- spin_lock(&fault->lock);
- list_for_each_entry_safe(group, next, &fault->deliver, node) {
- if (group->attach_handle != &handle->handle)
- continue;
- list_move(&group->node, &free_list);
- }
- spin_unlock(&fault->lock);
-
- list_for_each_entry_safe(group, next, &free_list, node) {
- list_del(&group->node);
- iopf_group_response(group, IOMMU_PAGE_RESP_INVALID);
- iopf_free_group(group);
- }
-
- xa_for_each(&fault->response, index, group) {
- if (group->attach_handle != &handle->handle)
- continue;
- xa_erase(&fault->response, index);
- iopf_group_response(group, IOMMU_PAGE_RESP_INVALID);
- iopf_free_group(group);
- }
- mutex_unlock(&fault->mutex);
-}
-
-static struct iommufd_attach_handle *
-iommufd_device_get_attach_handle(struct iommufd_device *idev)
-{
- struct iommu_attach_handle *handle;
-
- handle = iommu_attach_handle_get(idev->igroup->group, IOMMU_NO_PASID, 0);
- if (IS_ERR(handle))
- return NULL;
-
- return to_iommufd_handle(handle);
-}
-
-void iommufd_fault_domain_detach_dev(struct iommufd_hw_pagetable *hwpt,
- struct iommufd_device *idev)
-{
- struct iommufd_attach_handle *handle;
-
- handle = iommufd_device_get_attach_handle(idev);
- iommu_detach_group_handle(hwpt->domain, idev->igroup->group);
- iommufd_auto_response_faults(hwpt, handle);
- iommufd_fault_iopf_disable(idev);
- kfree(handle);
-}
-
-static int __fault_domain_replace_dev(struct iommufd_device *idev,
- struct iommufd_hw_pagetable *hwpt,
- struct iommufd_hw_pagetable *old)
-{
- struct iommufd_attach_handle *handle, *curr = NULL;
- int ret;
-
- if (old->fault)
- curr = iommufd_device_get_attach_handle(idev);
-
- if (hwpt->fault) {
- handle = kzalloc(sizeof(*handle), GFP_KERNEL);
- if (!handle)
- return -ENOMEM;
-
- handle->idev = idev;
- ret = iommu_replace_group_handle(idev->igroup->group,
- hwpt->domain, &handle->handle);
- } else {
- ret = iommu_replace_group_handle(idev->igroup->group,
- hwpt->domain, NULL);
- }
-
- if (!ret && curr) {
- iommufd_auto_response_faults(old, curr);
- kfree(curr);
- }
-
- return ret;
-}
-
-int iommufd_fault_domain_replace_dev(struct iommufd_device *idev,
- struct iommufd_hw_pagetable *hwpt,
- struct iommufd_hw_pagetable *old)
-{
- bool iopf_off = !hwpt->fault && old->fault;
- bool iopf_on = hwpt->fault && !old->fault;
- int ret;
-
- if (iopf_on) {
- ret = iommufd_fault_iopf_enable(idev);
- if (ret)
- return ret;
- }
-
- ret = __fault_domain_replace_dev(idev, hwpt, old);
- if (ret) {
- if (iopf_on)
- iommufd_fault_iopf_disable(idev);
- return ret;
- }
-
- if (iopf_off)
- iommufd_fault_iopf_disable(idev);
-
- return 0;
-}
-
-void iommufd_fault_destroy(struct iommufd_object *obj)
-{
- struct iommufd_fault *fault = container_of(obj, struct iommufd_fault, obj);
- struct iopf_group *group, *next;
- unsigned long index;
-
- /*
- * The iommufd object's reference count is zero at this point.
- * We can be confident that no other threads are currently
- * accessing this pointer. Therefore, acquiring the mutex here
- * is unnecessary.
- */
- list_for_each_entry_safe(group, next, &fault->deliver, node) {
- list_del(&group->node);
- iopf_group_response(group, IOMMU_PAGE_RESP_INVALID);
- iopf_free_group(group);
- }
- xa_for_each(&fault->response, index, group) {
- xa_erase(&fault->response, index);
- iopf_group_response(group, IOMMU_PAGE_RESP_INVALID);
- iopf_free_group(group);
- }
- xa_destroy(&fault->response);
- mutex_destroy(&fault->mutex);
-}
-
-static void iommufd_compose_fault_message(struct iommu_fault *fault,
- struct iommu_hwpt_pgfault *hwpt_fault,
- struct iommufd_device *idev,
- u32 cookie)
-{
- hwpt_fault->flags = fault->prm.flags;
- hwpt_fault->dev_id = idev->obj.id;
- hwpt_fault->pasid = fault->prm.pasid;
- hwpt_fault->grpid = fault->prm.grpid;
- hwpt_fault->perm = fault->prm.perm;
- hwpt_fault->addr = fault->prm.addr;
- hwpt_fault->length = 0;
- hwpt_fault->cookie = cookie;
-}
-
-static ssize_t iommufd_fault_fops_read(struct file *filep, char __user *buf,
- size_t count, loff_t *ppos)
-{
- size_t fault_size = sizeof(struct iommu_hwpt_pgfault);
- struct iommufd_fault *fault = filep->private_data;
- struct iommu_hwpt_pgfault data = {};
- struct iommufd_device *idev;
- struct iopf_group *group;
- struct iopf_fault *iopf;
- size_t done = 0;
- int rc = 0;
-
- if (*ppos || count % fault_size)
- return -ESPIPE;
-
- mutex_lock(&fault->mutex);
- while ((group = iommufd_fault_deliver_fetch(fault))) {
- if (done >= count ||
- group->fault_count * fault_size > count - done) {
- iommufd_fault_deliver_restore(fault, group);
- break;
- }
-
- rc = xa_alloc(&fault->response, &group->cookie, group,
- xa_limit_32b, GFP_KERNEL);
- if (rc) {
- iommufd_fault_deliver_restore(fault, group);
- break;
- }
-
- idev = to_iommufd_handle(group->attach_handle)->idev;
- list_for_each_entry(iopf, &group->faults, list) {
- iommufd_compose_fault_message(&iopf->fault,
- &data, idev,
- group->cookie);
- if (copy_to_user(buf + done, &data, fault_size)) {
- xa_erase(&fault->response, group->cookie);
- iommufd_fault_deliver_restore(fault, group);
- rc = -EFAULT;
- break;
- }
- done += fault_size;
- }
- }
- mutex_unlock(&fault->mutex);
-
- return done == 0 ? rc : done;
-}
-
-static ssize_t iommufd_fault_fops_write(struct file *filep, const char __user *buf,
- size_t count, loff_t *ppos)
-{
- size_t response_size = sizeof(struct iommu_hwpt_page_response);
- struct iommufd_fault *fault = filep->private_data;
- struct iommu_hwpt_page_response response;
- struct iopf_group *group;
- size_t done = 0;
- int rc = 0;
-
- if (*ppos || count % response_size)
- return -ESPIPE;
-
- mutex_lock(&fault->mutex);
- while (count > done) {
- rc = copy_from_user(&response, buf + done, response_size);
- if (rc)
- break;
-
- static_assert((int)IOMMUFD_PAGE_RESP_SUCCESS ==
- (int)IOMMU_PAGE_RESP_SUCCESS);
- static_assert((int)IOMMUFD_PAGE_RESP_INVALID ==
- (int)IOMMU_PAGE_RESP_INVALID);
- if (response.code != IOMMUFD_PAGE_RESP_SUCCESS &&
- response.code != IOMMUFD_PAGE_RESP_INVALID) {
- rc = -EINVAL;
- break;
- }
-
- group = xa_erase(&fault->response, response.cookie);
- if (!group) {
- rc = -EINVAL;
- break;
- }
-
- iopf_group_response(group, response.code);
- iopf_free_group(group);
- done += response_size;
- }
- mutex_unlock(&fault->mutex);
-
- return done == 0 ? rc : done;
-}
-
-static __poll_t iommufd_fault_fops_poll(struct file *filep,
- struct poll_table_struct *wait)
-{
- struct iommufd_fault *fault = filep->private_data;
- __poll_t pollflags = EPOLLOUT;
-
- poll_wait(filep, &fault->wait_queue, wait);
- spin_lock(&fault->lock);
- if (!list_empty(&fault->deliver))
- pollflags |= EPOLLIN | EPOLLRDNORM;
- spin_unlock(&fault->lock);
-
- return pollflags;
-}
-
-static int iommufd_fault_fops_release(struct inode *inode, struct file *filep)
-{
- struct iommufd_fault *fault = filep->private_data;
-
- refcount_dec(&fault->obj.users);
- iommufd_ctx_put(fault->ictx);
- return 0;
-}
-
-static const struct file_operations iommufd_fault_fops = {
- .owner = THIS_MODULE,
- .open = nonseekable_open,
- .read = iommufd_fault_fops_read,
- .write = iommufd_fault_fops_write,
- .poll = iommufd_fault_fops_poll,
- .release = iommufd_fault_fops_release,
-};
-
-int iommufd_fault_alloc(struct iommufd_ucmd *ucmd)
-{
- struct iommu_fault_alloc *cmd = ucmd->cmd;
- struct iommufd_fault *fault;
- struct file *filep;
- int fdno;
- int rc;
-
- if (cmd->flags)
- return -EOPNOTSUPP;
-
- fault = iommufd_object_alloc(ucmd->ictx, fault, IOMMUFD_OBJ_FAULT);
- if (IS_ERR(fault))
- return PTR_ERR(fault);
-
- fault->ictx = ucmd->ictx;
- INIT_LIST_HEAD(&fault->deliver);
- xa_init_flags(&fault->response, XA_FLAGS_ALLOC1);
- mutex_init(&fault->mutex);
- spin_lock_init(&fault->lock);
- init_waitqueue_head(&fault->wait_queue);
-
- filep = anon_inode_getfile("[iommufd-pgfault]", &iommufd_fault_fops,
- fault, O_RDWR);
- if (IS_ERR(filep)) {
- rc = PTR_ERR(filep);
- goto out_abort;
- }
-
- refcount_inc(&fault->obj.users);
- iommufd_ctx_get(fault->ictx);
- fault->filep = filep;
-
- fdno = get_unused_fd_flags(O_CLOEXEC);
- if (fdno < 0) {
- rc = fdno;
- goto out_fput;
- }
-
- cmd->out_fault_id = fault->obj.id;
- cmd->out_fault_fd = fdno;
-
- rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
- if (rc)
- goto out_put_fdno;
- iommufd_object_finalize(ucmd->ictx, &fault->obj);
-
- fd_install(fdno, fault->filep);
-
- return 0;
-out_put_fdno:
- put_unused_fd(fdno);
-out_fput:
- fput(filep);
-out_abort:
- iommufd_object_abort_and_destroy(ucmd->ictx, &fault->obj);
-
- return rc;
-}
-
-int iommufd_fault_iopf_handler(struct iopf_group *group)
-{
- struct iommufd_hw_pagetable *hwpt;
- struct iommufd_fault *fault;
-
- hwpt = group->attach_handle->domain->fault_data;
- fault = hwpt->fault;
-
- spin_lock(&fault->lock);
- list_add_tail(&group->node, &fault->deliver);
- spin_unlock(&fault->lock);
-
- wake_up_interruptible(&fault->wait_queue);
-
- return 0;
-}
diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c
index 598be26a14e2..487779470261 100644
--- a/drivers/iommu/iommufd/hw_pagetable.c
+++ b/drivers/iommu/iommufd/hw_pagetable.c
@@ -14,7 +14,7 @@ static void __iommufd_hwpt_destroy(struct iommufd_hw_pagetable *hwpt)
iommu_domain_free(hwpt->domain);
if (hwpt->fault)
- refcount_dec(&hwpt->fault->obj.users);
+ refcount_dec(&hwpt->fault->common.obj.users);
}
void iommufd_hwpt_paging_destroy(struct iommufd_object *obj)
@@ -90,6 +90,7 @@ iommufd_hwpt_paging_enforce_cc(struct iommufd_hwpt_paging *hwpt_paging)
* @ictx: iommufd context
* @ioas: IOAS to associate the domain with
* @idev: Device to get an iommu_domain for
+ * @pasid: PASID to get an iommu_domain for
* @flags: Flags from userspace
* @immediate_attach: True if idev should be attached to the hwpt
* @user_data: The user provided driver specific data describing the domain to
@@ -105,13 +106,14 @@ iommufd_hwpt_paging_enforce_cc(struct iommufd_hwpt_paging *hwpt_paging)
*/
struct iommufd_hwpt_paging *
iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
- struct iommufd_device *idev, u32 flags,
- bool immediate_attach,
+ struct iommufd_device *idev, ioasid_t pasid,
+ u32 flags, bool immediate_attach,
const struct iommu_user_data *user_data)
{
const u32 valid_flags = IOMMU_HWPT_ALLOC_NEST_PARENT |
IOMMU_HWPT_ALLOC_DIRTY_TRACKING |
- IOMMU_HWPT_FAULT_ID_VALID;
+ IOMMU_HWPT_FAULT_ID_VALID |
+ IOMMU_HWPT_ALLOC_PASID;
const struct iommu_ops *ops = dev_iommu_ops(idev->dev);
struct iommufd_hwpt_paging *hwpt_paging;
struct iommufd_hw_pagetable *hwpt;
@@ -126,12 +128,16 @@ iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
if ((flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING) &&
!device_iommu_capable(idev->dev, IOMMU_CAP_DIRTY_TRACKING))
return ERR_PTR(-EOPNOTSUPP);
+ if ((flags & IOMMU_HWPT_FAULT_ID_VALID) &&
+ (flags & IOMMU_HWPT_ALLOC_NEST_PARENT))
+ return ERR_PTR(-EOPNOTSUPP);
hwpt_paging = __iommufd_object_alloc(
ictx, hwpt_paging, IOMMUFD_OBJ_HWPT_PAGING, common.obj);
if (IS_ERR(hwpt_paging))
return ERR_CAST(hwpt_paging);
hwpt = &hwpt_paging->common;
+ hwpt->pasid_compat = flags & IOMMU_HWPT_ALLOC_PASID;
INIT_LIST_HEAD(&hwpt_paging->hwpt_item);
/* Pairs with iommufd_hw_pagetable_destroy() */
@@ -156,6 +162,8 @@ iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
goto out_abort;
}
}
+ hwpt->domain->iommufd_hwpt = hwpt;
+ hwpt->domain->cookie_type = IOMMU_COOKIE_IOMMUFD;
/*
* Set the coherency mode before we do iopt_table_add_domain() as some
@@ -184,7 +192,7 @@ iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
* sequence. Once those drivers are fixed this should be removed.
*/
if (immediate_attach) {
- rc = iommufd_hw_pagetable_attach(hwpt, idev);
+ rc = iommufd_hw_pagetable_attach(hwpt, idev, pasid);
if (rc)
goto out_abort;
}
@@ -197,7 +205,7 @@ iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
out_detach:
if (immediate_attach)
- iommufd_hw_pagetable_detach(idev);
+ iommufd_hw_pagetable_detach(idev, pasid);
out_abort:
iommufd_object_abort_and_destroy(ictx, &hwpt->obj);
return ERR_PTR(rc);
@@ -226,7 +234,7 @@ iommufd_hwpt_nested_alloc(struct iommufd_ctx *ictx,
struct iommufd_hw_pagetable *hwpt;
int rc;
- if ((flags & ~IOMMU_HWPT_FAULT_ID_VALID) ||
+ if ((flags & ~(IOMMU_HWPT_FAULT_ID_VALID | IOMMU_HWPT_ALLOC_PASID)) ||
!user_data->len || !ops->domain_alloc_nested)
return ERR_PTR(-EOPNOTSUPP);
if (parent->auto_domain || !parent->nest_parent ||
@@ -238,6 +246,7 @@ iommufd_hwpt_nested_alloc(struct iommufd_ctx *ictx,
if (IS_ERR(hwpt_nested))
return ERR_CAST(hwpt_nested);
hwpt = &hwpt_nested->common;
+ hwpt->pasid_compat = flags & IOMMU_HWPT_ALLOC_PASID;
refcount_inc(&parent->common.obj.users);
hwpt_nested->parent = parent;
@@ -251,6 +260,8 @@ iommufd_hwpt_nested_alloc(struct iommufd_ctx *ictx,
goto out_abort;
}
hwpt->domain->owner = ops;
+ hwpt->domain->iommufd_hwpt = hwpt;
+ hwpt->domain->cookie_type = IOMMU_COOKIE_IOMMUFD;
if (WARN_ON_ONCE(hwpt->domain->type != IOMMU_DOMAIN_NESTED)) {
rc = -EINVAL;
@@ -280,7 +291,7 @@ iommufd_viommu_alloc_hwpt_nested(struct iommufd_viommu *viommu, u32 flags,
struct iommufd_hw_pagetable *hwpt;
int rc;
- if (flags & ~IOMMU_HWPT_FAULT_ID_VALID)
+ if (flags & ~(IOMMU_HWPT_FAULT_ID_VALID | IOMMU_HWPT_ALLOC_PASID))
return ERR_PTR(-EOPNOTSUPP);
if (!user_data->len)
return ERR_PTR(-EOPNOTSUPP);
@@ -292,6 +303,7 @@ iommufd_viommu_alloc_hwpt_nested(struct iommufd_viommu *viommu, u32 flags,
if (IS_ERR(hwpt_nested))
return ERR_CAST(hwpt_nested);
hwpt = &hwpt_nested->common;
+ hwpt->pasid_compat = flags & IOMMU_HWPT_ALLOC_PASID;
hwpt_nested->viommu = viommu;
refcount_inc(&viommu->obj.users);
@@ -306,7 +318,9 @@ iommufd_viommu_alloc_hwpt_nested(struct iommufd_viommu *viommu, u32 flags,
hwpt->domain = NULL;
goto out_abort;
}
+ hwpt->domain->iommufd_hwpt = hwpt;
hwpt->domain->owner = viommu->iommu_dev->ops;
+ hwpt->domain->cookie_type = IOMMU_COOKIE_IOMMUFD;
if (WARN_ON_ONCE(hwpt->domain->type != IOMMU_DOMAIN_NESTED)) {
rc = -EINVAL;
@@ -355,8 +369,8 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd)
ioas = container_of(pt_obj, struct iommufd_ioas, obj);
mutex_lock(&ioas->mutex);
hwpt_paging = iommufd_hwpt_paging_alloc(
- ucmd->ictx, ioas, idev, cmd->flags, false,
- user_data.len ? &user_data : NULL);
+ ucmd->ictx, ioas, idev, IOMMU_NO_PASID, cmd->flags,
+ false, user_data.len ? &user_data : NULL);
if (IS_ERR(hwpt_paging)) {
rc = PTR_ERR(hwpt_paging);
goto out_unlock;
@@ -406,9 +420,8 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd)
}
hwpt->fault = fault;
hwpt->domain->iopf_handler = iommufd_fault_iopf_handler;
- hwpt->domain->fault_data = hwpt;
- refcount_inc(&fault->obj.users);
- iommufd_put_object(ucmd->ictx, &fault->obj);
+ refcount_inc(&fault->common.obj.users);
+ iommufd_put_object(ucmd->ictx, &fault->common.obj);
}
cmd->out_hwpt_id = hwpt->obj.id;
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index 0b1bafc7fd99..80e8c76d25f2 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -19,6 +19,25 @@ struct iommu_group;
struct iommu_option;
struct iommufd_device;
+struct iommufd_sw_msi_map {
+ struct list_head sw_msi_item;
+ phys_addr_t sw_msi_start;
+ phys_addr_t msi_addr;
+ unsigned int pgoff;
+ unsigned int id;
+};
+
+/* Bitmap of struct iommufd_sw_msi_map::id */
+struct iommufd_sw_msi_maps {
+ DECLARE_BITMAP(bitmap, 64);
+};
+
+#ifdef CONFIG_IRQ_MSI_IOMMU
+int iommufd_sw_msi_install(struct iommufd_ctx *ictx,
+ struct iommufd_hwpt_paging *hwpt_paging,
+ struct iommufd_sw_msi_map *msi_map);
+#endif
+
struct iommufd_ctx {
struct file *file;
struct xarray objects;
@@ -26,6 +45,10 @@ struct iommufd_ctx {
wait_queue_head_t destroy_wait;
struct rw_semaphore ioas_creation_lock;
+ struct mutex sw_msi_lock;
+ struct list_head sw_msi_list;
+ unsigned int sw_msi_id;
+
u8 account_mode;
/* Compatibility with VFIO no iommu */
u8 no_iommu_mode;
@@ -276,6 +299,7 @@ struct iommufd_hw_pagetable {
struct iommufd_object obj;
struct iommu_domain *domain;
struct iommufd_fault *fault;
+ bool pasid_compat : 1;
};
struct iommufd_hwpt_paging {
@@ -283,10 +307,10 @@ struct iommufd_hwpt_paging {
struct iommufd_ioas *ioas;
bool auto_domain : 1;
bool enforce_cache_coherency : 1;
- bool msi_cookie : 1;
bool nest_parent : 1;
/* Head at iommufd_ioas::hwpt_list */
struct list_head hwpt_item;
+ struct iommufd_sw_msi_maps present_sw_msi;
};
struct iommufd_hwpt_nested {
@@ -346,13 +370,13 @@ int iommufd_hwpt_get_dirty_bitmap(struct iommufd_ucmd *ucmd);
struct iommufd_hwpt_paging *
iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
- struct iommufd_device *idev, u32 flags,
- bool immediate_attach,
+ struct iommufd_device *idev, ioasid_t pasid,
+ u32 flags, bool immediate_attach,
const struct iommu_user_data *user_data);
int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
- struct iommufd_device *idev);
+ struct iommufd_device *idev, ioasid_t pasid);
struct iommufd_hw_pagetable *
-iommufd_hw_pagetable_detach(struct iommufd_device *idev);
+iommufd_hw_pagetable_detach(struct iommufd_device *idev, ioasid_t pasid);
void iommufd_hwpt_paging_destroy(struct iommufd_object *obj);
void iommufd_hwpt_paging_abort(struct iommufd_object *obj);
void iommufd_hwpt_nested_destroy(struct iommufd_object *obj);
@@ -376,13 +400,15 @@ static inline void iommufd_hw_pagetable_put(struct iommufd_ctx *ictx,
refcount_dec(&hwpt->obj.users);
}
+struct iommufd_attach;
+
struct iommufd_group {
struct kref ref;
struct mutex lock;
struct iommufd_ctx *ictx;
struct iommu_group *group;
- struct iommufd_hw_pagetable *hwpt;
- struct list_head device_list;
+ struct xarray pasid_attach;
+ struct iommufd_sw_msi_maps required_sw_msi;
phys_addr_t sw_msi_start;
};
@@ -433,49 +459,17 @@ void iopt_remove_access(struct io_pagetable *iopt,
u32 iopt_access_list_id);
void iommufd_access_destroy_object(struct iommufd_object *obj);
-/*
- * An iommufd_fault object represents an interface to deliver I/O page faults
- * to the user space. These objects are created/destroyed by the user space and
- * associated with hardware page table objects during page-table allocation.
- */
-struct iommufd_fault {
+struct iommufd_eventq {
struct iommufd_object obj;
struct iommufd_ctx *ictx;
struct file *filep;
spinlock_t lock; /* protects the deliver list */
struct list_head deliver;
- struct mutex mutex; /* serializes response flows */
- struct xarray response;
struct wait_queue_head wait_queue;
};
-/* Fetch the first node out of the fault->deliver list */
-static inline struct iopf_group *
-iommufd_fault_deliver_fetch(struct iommufd_fault *fault)
-{
- struct list_head *list = &fault->deliver;
- struct iopf_group *group = NULL;
-
- spin_lock(&fault->lock);
- if (!list_empty(list)) {
- group = list_first_entry(list, struct iopf_group, node);
- list_del(&group->node);
- }
- spin_unlock(&fault->lock);
- return group;
-}
-
-/* Restore a node back to the head of the fault->deliver list */
-static inline void iommufd_fault_deliver_restore(struct iommufd_fault *fault,
- struct iopf_group *group)
-{
- spin_lock(&fault->lock);
- list_add(&group->node, &fault->deliver);
- spin_unlock(&fault->lock);
-}
-
struct iommufd_attach_handle {
struct iommu_attach_handle handle;
struct iommufd_device *idev;
@@ -484,54 +478,106 @@ struct iommufd_attach_handle {
/* Convert an iommu attach handle to iommufd handle. */
#define to_iommufd_handle(hdl) container_of(hdl, struct iommufd_attach_handle, handle)
+/*
+ * An iommufd_fault object represents an interface to deliver I/O page faults
+ * to the user space. These objects are created/destroyed by the user space and
+ * associated with hardware page table objects during page-table allocation.
+ */
+struct iommufd_fault {
+ struct iommufd_eventq common;
+ struct mutex mutex; /* serializes response flows */
+ struct xarray response;
+};
+
+static inline struct iommufd_fault *
+eventq_to_fault(struct iommufd_eventq *eventq)
+{
+ return container_of(eventq, struct iommufd_fault, common);
+}
+
static inline struct iommufd_fault *
iommufd_get_fault(struct iommufd_ucmd *ucmd, u32 id)
{
return container_of(iommufd_get_object(ucmd->ictx, id,
IOMMUFD_OBJ_FAULT),
- struct iommufd_fault, obj);
+ struct iommufd_fault, common.obj);
}
int iommufd_fault_alloc(struct iommufd_ucmd *ucmd);
void iommufd_fault_destroy(struct iommufd_object *obj);
int iommufd_fault_iopf_handler(struct iopf_group *group);
-int iommufd_fault_domain_attach_dev(struct iommufd_hw_pagetable *hwpt,
- struct iommufd_device *idev);
-void iommufd_fault_domain_detach_dev(struct iommufd_hw_pagetable *hwpt,
- struct iommufd_device *idev);
-int iommufd_fault_domain_replace_dev(struct iommufd_device *idev,
- struct iommufd_hw_pagetable *hwpt,
- struct iommufd_hw_pagetable *old);
+int iommufd_fault_iopf_enable(struct iommufd_device *idev);
+void iommufd_fault_iopf_disable(struct iommufd_device *idev);
+void iommufd_auto_response_faults(struct iommufd_hw_pagetable *hwpt,
+ struct iommufd_attach_handle *handle);
+
+/* An iommufd_vevent represents a vIOMMU event in an iommufd_veventq */
+struct iommufd_vevent {
+ struct iommufd_vevent_header header;
+ struct list_head node; /* for iommufd_eventq::deliver */
+ ssize_t data_len;
+ u64 event_data[] __counted_by(data_len);
+};
+
+#define vevent_for_lost_events_header(vevent) \
+ (vevent->header.flags & IOMMU_VEVENTQ_FLAG_LOST_EVENTS)
-static inline int iommufd_hwpt_attach_device(struct iommufd_hw_pagetable *hwpt,
- struct iommufd_device *idev)
-{
- if (hwpt->fault)
- return iommufd_fault_domain_attach_dev(hwpt, idev);
+/*
+ * An iommufd_veventq object represents an interface to deliver vIOMMU events to
+ * the user space. It is created/destroyed by the user space and associated with
+ * a vIOMMU object during the allocations.
+ */
+struct iommufd_veventq {
+ struct iommufd_eventq common;
+ struct iommufd_viommu *viommu;
+ struct list_head node; /* for iommufd_viommu::veventqs */
+ struct iommufd_vevent lost_events_header;
- return iommu_attach_group(hwpt->domain, idev->igroup->group);
-}
+ unsigned int type;
+ unsigned int depth;
+
+ /* Use common.lock for protection */
+ u32 num_events;
+ u32 sequence;
+};
-static inline void iommufd_hwpt_detach_device(struct iommufd_hw_pagetable *hwpt,
- struct iommufd_device *idev)
+static inline struct iommufd_veventq *
+eventq_to_veventq(struct iommufd_eventq *eventq)
{
- if (hwpt->fault) {
- iommufd_fault_domain_detach_dev(hwpt, idev);
- return;
- }
+ return container_of(eventq, struct iommufd_veventq, common);
+}
- iommu_detach_group(hwpt->domain, idev->igroup->group);
+static inline struct iommufd_veventq *
+iommufd_get_veventq(struct iommufd_ucmd *ucmd, u32 id)
+{
+ return container_of(iommufd_get_object(ucmd->ictx, id,
+ IOMMUFD_OBJ_VEVENTQ),
+ struct iommufd_veventq, common.obj);
}
-static inline int iommufd_hwpt_replace_device(struct iommufd_device *idev,
- struct iommufd_hw_pagetable *hwpt,
- struct iommufd_hw_pagetable *old)
+int iommufd_veventq_alloc(struct iommufd_ucmd *ucmd);
+void iommufd_veventq_destroy(struct iommufd_object *obj);
+void iommufd_veventq_abort(struct iommufd_object *obj);
+
+static inline void iommufd_vevent_handler(struct iommufd_veventq *veventq,
+ struct iommufd_vevent *vevent)
{
- if (old->fault || hwpt->fault)
- return iommufd_fault_domain_replace_dev(idev, hwpt, old);
+ struct iommufd_eventq *eventq = &veventq->common;
+
+ lockdep_assert_held(&eventq->lock);
- return iommu_group_replace_domain(idev->igroup->group, hwpt->domain);
+ /*
+ * Remove the lost_events_header and add the new node at the same time.
+ * Note the new node can be lost_events_header, for a sequence update.
+ */
+ if (list_is_last(&veventq->lost_events_header.node, &eventq->deliver))
+ list_del(&veventq->lost_events_header.node);
+ list_add_tail(&vevent->node, &eventq->deliver);
+ vevent->header.sequence = veventq->sequence;
+ veventq->sequence = (veventq->sequence + 1) & INT_MAX;
+
+ wake_up_interruptible(&eventq->wait_queue);
}
static inline struct iommufd_viommu *
@@ -542,6 +588,20 @@ iommufd_get_viommu(struct iommufd_ucmd *ucmd, u32 id)
struct iommufd_viommu, obj);
}
+static inline struct iommufd_veventq *
+iommufd_viommu_find_veventq(struct iommufd_viommu *viommu, u32 type)
+{
+ struct iommufd_veventq *veventq, *next;
+
+ lockdep_assert_held(&viommu->veventqs_rwsem);
+
+ list_for_each_entry_safe(veventq, next, &viommu->veventqs, node) {
+ if (veventq->type == type)
+ return veventq;
+ }
+ return NULL;
+}
+
int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd);
void iommufd_viommu_destroy(struct iommufd_object *obj);
int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd);
diff --git a/drivers/iommu/iommufd/iommufd_test.h b/drivers/iommu/iommufd/iommufd_test.h
index a6b7a163f636..1cd7e8394129 100644
--- a/drivers/iommu/iommufd/iommufd_test.h
+++ b/drivers/iommu/iommufd/iommufd_test.h
@@ -24,6 +24,11 @@ enum {
IOMMU_TEST_OP_MD_CHECK_IOTLB,
IOMMU_TEST_OP_TRIGGER_IOPF,
IOMMU_TEST_OP_DEV_CHECK_CACHE,
+ IOMMU_TEST_OP_TRIGGER_VEVENT,
+ IOMMU_TEST_OP_PASID_ATTACH,
+ IOMMU_TEST_OP_PASID_REPLACE,
+ IOMMU_TEST_OP_PASID_DETACH,
+ IOMMU_TEST_OP_PASID_CHECK_HWPT,
};
enum {
@@ -48,6 +53,7 @@ enum {
enum {
MOCK_FLAGS_DEVICE_NO_DIRTY = 1 << 0,
MOCK_FLAGS_DEVICE_HUGE_IOVA = 1 << 1,
+ MOCK_FLAGS_DEVICE_PASID = 1 << 2,
};
enum {
@@ -60,6 +66,9 @@ enum {
MOCK_DEV_CACHE_NUM = 4,
};
+/* Reserved for special pasid replace test */
+#define IOMMU_TEST_PASID_RESERVED 1024
+
struct iommu_test_cmd {
__u32 size;
__u32 op;
@@ -145,11 +154,36 @@ struct iommu_test_cmd {
__u32 id;
__u32 cache;
} check_dev_cache;
+ struct {
+ __u32 dev_id;
+ } trigger_vevent;
+ struct {
+ __u32 pasid;
+ __u32 pt_id;
+ /* @id is stdev_id */
+ } pasid_attach;
+ struct {
+ __u32 pasid;
+ __u32 pt_id;
+ /* @id is stdev_id */
+ } pasid_replace;
+ struct {
+ __u32 pasid;
+ /* @id is stdev_id */
+ } pasid_detach;
+ struct {
+ __u32 pasid;
+ __u32 hwpt_id;
+ /* @id is stdev_id */
+ } pasid_check;
};
__u32 last;
};
#define IOMMU_TEST_CMD _IO(IOMMUFD_TYPE, IOMMUFD_CMD_BASE + 32)
+/* Mock device/iommu PASID width */
+#define MOCK_PASID_WIDTH 20
+
/* Mock structs for IOMMU_DEVICE_GET_HW_INFO ioctl */
#define IOMMU_HW_INFO_TYPE_SELFTEST 0xfeedbeef
#define IOMMU_HW_INFO_SELFTEST_REGVAL 0xdeadbeef
@@ -212,4 +246,10 @@ struct iommu_viommu_invalidate_selftest {
__u32 cache_id;
};
+#define IOMMU_VEVENTQ_TYPE_SELFTEST 0xbeefbeef
+
+struct iommu_viommu_event_selftest {
+ __u32 virt_id;
+};
+
#endif
diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
index ccf616462a1c..3df468f64e7d 100644
--- a/drivers/iommu/iommufd/main.c
+++ b/drivers/iommu/iommufd/main.c
@@ -227,6 +227,8 @@ static int iommufd_fops_open(struct inode *inode, struct file *filp)
xa_init(&ictx->groups);
ictx->file = filp;
init_waitqueue_head(&ictx->destroy_wait);
+ mutex_init(&ictx->sw_msi_lock);
+ INIT_LIST_HEAD(&ictx->sw_msi_list);
filp->private_data = ictx;
return 0;
}
@@ -234,6 +236,8 @@ static int iommufd_fops_open(struct inode *inode, struct file *filp)
static int iommufd_fops_release(struct inode *inode, struct file *filp)
{
struct iommufd_ctx *ictx = filp->private_data;
+ struct iommufd_sw_msi_map *next;
+ struct iommufd_sw_msi_map *cur;
struct iommufd_object *obj;
/*
@@ -262,6 +266,11 @@ static int iommufd_fops_release(struct inode *inode, struct file *filp)
break;
}
WARN_ON(!xa_empty(&ictx->groups));
+
+ mutex_destroy(&ictx->sw_msi_lock);
+ list_for_each_entry_safe(cur, next, &ictx->sw_msi_list, sw_msi_item)
+ kfree(cur);
+
kfree(ictx);
return 0;
}
@@ -308,6 +317,7 @@ union ucmd_buffer {
struct iommu_ioas_unmap unmap;
struct iommu_option option;
struct iommu_vdevice_alloc vdev;
+ struct iommu_veventq_alloc veventq;
struct iommu_vfio_ioas vfio_ioas;
struct iommu_viommu_alloc viommu;
#ifdef CONFIG_IOMMUFD_TEST
@@ -363,6 +373,8 @@ static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = {
IOCTL_OP(IOMMU_OPTION, iommufd_option, struct iommu_option, val64),
IOCTL_OP(IOMMU_VDEVICE_ALLOC, iommufd_vdevice_alloc_ioctl,
struct iommu_vdevice_alloc, virt_id),
+ IOCTL_OP(IOMMU_VEVENTQ_ALLOC, iommufd_veventq_alloc,
+ struct iommu_veventq_alloc, out_veventq_fd),
IOCTL_OP(IOMMU_VFIO_IOAS, iommufd_vfio_ioas, struct iommu_vfio_ioas,
__reserved),
IOCTL_OP(IOMMU_VIOMMU_ALLOC, iommufd_viommu_alloc_ioctl,
@@ -505,6 +517,10 @@ static const struct iommufd_object_ops iommufd_object_ops[] = {
[IOMMUFD_OBJ_VDEVICE] = {
.destroy = iommufd_vdevice_destroy,
},
+ [IOMMUFD_OBJ_VEVENTQ] = {
+ .destroy = iommufd_veventq_destroy,
+ .abort = iommufd_veventq_abort,
+ },
[IOMMUFD_OBJ_VIOMMU] = {
.destroy = iommufd_viommu_destroy,
},
diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c
index d40deb0a4f06..18d9a216eb30 100644
--- a/drivers/iommu/iommufd/selftest.c
+++ b/drivers/iommu/iommufd/selftest.c
@@ -161,9 +161,13 @@ enum selftest_obj_type {
struct mock_dev {
struct device dev;
+ struct mock_viommu *viommu;
+ struct rw_semaphore viommu_rwsem;
unsigned long flags;
+ unsigned long vdev_id;
int id;
u32 cache[MOCK_DEV_CACHE_NUM];
+ atomic_t pasid_1024_fake_error;
};
static inline struct mock_dev *to_mock_dev(struct device *dev)
@@ -193,15 +197,71 @@ static int mock_domain_nop_attach(struct iommu_domain *domain,
struct device *dev)
{
struct mock_dev *mdev = to_mock_dev(dev);
+ struct mock_viommu *new_viommu = NULL;
+ unsigned long vdev_id = 0;
+ int rc;
if (domain->dirty_ops && (mdev->flags & MOCK_FLAGS_DEVICE_NO_DIRTY))
return -EINVAL;
+ iommu_group_mutex_assert(dev);
+ if (domain->type == IOMMU_DOMAIN_NESTED) {
+ new_viommu = to_mock_nested(domain)->mock_viommu;
+ if (new_viommu) {
+ rc = iommufd_viommu_get_vdev_id(&new_viommu->core, dev,
+ &vdev_id);
+ if (rc)
+ return rc;
+ }
+ }
+ if (new_viommu != mdev->viommu) {
+ down_write(&mdev->viommu_rwsem);
+ mdev->viommu = new_viommu;
+ mdev->vdev_id = vdev_id;
+ up_write(&mdev->viommu_rwsem);
+ }
+
+ return 0;
+}
+
+static int mock_domain_set_dev_pasid_nop(struct iommu_domain *domain,
+ struct device *dev, ioasid_t pasid,
+ struct iommu_domain *old)
+{
+ struct mock_dev *mdev = to_mock_dev(dev);
+
+ /*
+ * Per the first attach with pasid 1024, set the
+ * mdev->pasid_1024_fake_error. Hence the second call of this op
+ * can fake an error to validate the error path of the core. This
+ * is helpful to test the case in which the iommu core needs to
+ * rollback to the old domain due to driver failure. e.g. replace.
+ * User should be careful about the third call of this op, it shall
+ * succeed since the mdev->pasid_1024_fake_error is cleared in the
+ * second call.
+ */
+ if (pasid == 1024) {
+ if (domain->type == IOMMU_DOMAIN_BLOCKED) {
+ atomic_set(&mdev->pasid_1024_fake_error, 0);
+ } else if (atomic_read(&mdev->pasid_1024_fake_error)) {
+ /*
+ * Clear the flag, and fake an error to fail the
+ * replacement.
+ */
+ atomic_set(&mdev->pasid_1024_fake_error, 0);
+ return -ENOMEM;
+ } else {
+ /* Set the flag to fake an error in next call */
+ atomic_set(&mdev->pasid_1024_fake_error, 1);
+ }
+ }
+
return 0;
}
static const struct iommu_domain_ops mock_blocking_ops = {
.attach_dev = mock_domain_nop_attach,
+ .set_dev_pasid = mock_domain_set_dev_pasid_nop
};
static struct iommu_domain mock_blocking_domain = {
@@ -343,7 +403,7 @@ mock_domain_alloc_nested(struct device *dev, struct iommu_domain *parent,
struct mock_iommu_domain_nested *mock_nested;
struct mock_iommu_domain *mock_parent;
- if (flags)
+ if (flags & ~IOMMU_HWPT_ALLOC_PASID)
return ERR_PTR(-EOPNOTSUPP);
if (!parent || parent->ops != mock_ops.default_domain_ops)
return ERR_PTR(-EINVAL);
@@ -365,7 +425,8 @@ mock_domain_alloc_paging_flags(struct device *dev, u32 flags,
{
bool has_dirty_flag = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
const u32 PAGING_FLAGS = IOMMU_HWPT_ALLOC_DIRTY_TRACKING |
- IOMMU_HWPT_ALLOC_NEST_PARENT;
+ IOMMU_HWPT_ALLOC_NEST_PARENT |
+ IOMMU_HWPT_ALLOC_PASID;
struct mock_dev *mdev = to_mock_dev(dev);
bool no_dirty_ops = mdev->flags & MOCK_FLAGS_DEVICE_NO_DIRTY;
struct mock_iommu_domain *mock;
@@ -585,7 +646,7 @@ mock_viommu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags,
struct mock_viommu *mock_viommu = to_mock_viommu(viommu);
struct mock_iommu_domain_nested *mock_nested;
- if (flags)
+ if (flags & ~IOMMU_HWPT_ALLOC_PASID)
return ERR_PTR(-EOPNOTSUPP);
mock_nested = __mock_domain_alloc_nested(user_data);
@@ -720,6 +781,7 @@ static const struct iommu_ops mock_ops = {
.map_pages = mock_domain_map_pages,
.unmap_pages = mock_domain_unmap_pages,
.iova_to_phys = mock_domain_iova_to_phys,
+ .set_dev_pasid = mock_domain_set_dev_pasid_nop,
},
};
@@ -780,6 +842,7 @@ static struct iommu_domain_ops domain_nested_ops = {
.free = mock_domain_free_nested,
.attach_dev = mock_domain_nop_attach,
.cache_invalidate_user = mock_domain_cache_invalidate_user,
+ .set_dev_pasid = mock_domain_set_dev_pasid_nop,
};
static inline struct iommufd_hw_pagetable *
@@ -839,17 +902,24 @@ static void mock_dev_release(struct device *dev)
static struct mock_dev *mock_dev_create(unsigned long dev_flags)
{
+ struct property_entry prop[] = {
+ PROPERTY_ENTRY_U32("pasid-num-bits", 0),
+ {},
+ };
+ const u32 valid_flags = MOCK_FLAGS_DEVICE_NO_DIRTY |
+ MOCK_FLAGS_DEVICE_HUGE_IOVA |
+ MOCK_FLAGS_DEVICE_PASID;
struct mock_dev *mdev;
int rc, i;
- if (dev_flags &
- ~(MOCK_FLAGS_DEVICE_NO_DIRTY | MOCK_FLAGS_DEVICE_HUGE_IOVA))
+ if (dev_flags & ~valid_flags)
return ERR_PTR(-EINVAL);
mdev = kzalloc(sizeof(*mdev), GFP_KERNEL);
if (!mdev)
return ERR_PTR(-ENOMEM);
+ init_rwsem(&mdev->viommu_rwsem);
device_initialize(&mdev->dev);
mdev->flags = dev_flags;
mdev->dev.release = mock_dev_release;
@@ -866,6 +936,15 @@ static struct mock_dev *mock_dev_create(unsigned long dev_flags)
if (rc)
goto err_put;
+ if (dev_flags & MOCK_FLAGS_DEVICE_PASID)
+ prop[0] = PROPERTY_ENTRY_U32("pasid-num-bits", MOCK_PASID_WIDTH);
+
+ rc = device_create_managed_software_node(&mdev->dev, prop, NULL);
+ if (rc) {
+ dev_err(&mdev->dev, "add pasid-num-bits property failed, rc: %d", rc);
+ goto err_put;
+ }
+
rc = device_add(&mdev->dev);
if (rc)
goto err_put;
@@ -921,7 +1000,7 @@ static int iommufd_test_mock_domain(struct iommufd_ucmd *ucmd,
}
sobj->idev.idev = idev;
- rc = iommufd_device_attach(idev, &pt_id);
+ rc = iommufd_device_attach(idev, IOMMU_NO_PASID, &pt_id);
if (rc)
goto out_unbind;
@@ -936,7 +1015,7 @@ static int iommufd_test_mock_domain(struct iommufd_ucmd *ucmd,
return 0;
out_detach:
- iommufd_device_detach(idev);
+ iommufd_device_detach(idev, IOMMU_NO_PASID);
out_unbind:
iommufd_device_unbind(idev);
out_mdev:
@@ -946,39 +1025,49 @@ out_sobj:
return rc;
}
-/* Replace the mock domain with a manually allocated hw_pagetable */
-static int iommufd_test_mock_domain_replace(struct iommufd_ucmd *ucmd,
- unsigned int device_id, u32 pt_id,
- struct iommu_test_cmd *cmd)
+static struct selftest_obj *
+iommufd_test_get_selftest_obj(struct iommufd_ctx *ictx, u32 id)
{
struct iommufd_object *dev_obj;
struct selftest_obj *sobj;
- int rc;
/*
* Prefer to use the OBJ_SELFTEST because the destroy_rwsem will ensure
* it doesn't race with detach, which is not allowed.
*/
- dev_obj =
- iommufd_get_object(ucmd->ictx, device_id, IOMMUFD_OBJ_SELFTEST);
+ dev_obj = iommufd_get_object(ictx, id, IOMMUFD_OBJ_SELFTEST);
if (IS_ERR(dev_obj))
- return PTR_ERR(dev_obj);
+ return ERR_CAST(dev_obj);
sobj = to_selftest_obj(dev_obj);
if (sobj->type != TYPE_IDEV) {
- rc = -EINVAL;
- goto out_dev_obj;
+ iommufd_put_object(ictx, dev_obj);
+ return ERR_PTR(-EINVAL);
}
+ return sobj;
+}
- rc = iommufd_device_replace(sobj->idev.idev, &pt_id);
+/* Replace the mock domain with a manually allocated hw_pagetable */
+static int iommufd_test_mock_domain_replace(struct iommufd_ucmd *ucmd,
+ unsigned int device_id, u32 pt_id,
+ struct iommu_test_cmd *cmd)
+{
+ struct selftest_obj *sobj;
+ int rc;
+
+ sobj = iommufd_test_get_selftest_obj(ucmd->ictx, device_id);
+ if (IS_ERR(sobj))
+ return PTR_ERR(sobj);
+
+ rc = iommufd_device_replace(sobj->idev.idev, IOMMU_NO_PASID, &pt_id);
if (rc)
- goto out_dev_obj;
+ goto out_sobj;
cmd->mock_domain_replace.pt_id = pt_id;
rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
-out_dev_obj:
- iommufd_put_object(ucmd->ictx, dev_obj);
+out_sobj:
+ iommufd_put_object(ucmd->ictx, &sobj->obj);
return rc;
}
@@ -1597,13 +1686,166 @@ static int iommufd_test_trigger_iopf(struct iommufd_ucmd *ucmd,
return 0;
}
+static int iommufd_test_trigger_vevent(struct iommufd_ucmd *ucmd,
+ struct iommu_test_cmd *cmd)
+{
+ struct iommu_viommu_event_selftest test = {};
+ struct iommufd_device *idev;
+ struct mock_dev *mdev;
+ int rc = -ENOENT;
+
+ idev = iommufd_get_device(ucmd, cmd->trigger_vevent.dev_id);
+ if (IS_ERR(idev))
+ return PTR_ERR(idev);
+ mdev = to_mock_dev(idev->dev);
+
+ down_read(&mdev->viommu_rwsem);
+ if (!mdev->viommu || !mdev->vdev_id)
+ goto out_unlock;
+
+ test.virt_id = mdev->vdev_id;
+ rc = iommufd_viommu_report_event(&mdev->viommu->core,
+ IOMMU_VEVENTQ_TYPE_SELFTEST, &test,
+ sizeof(test));
+out_unlock:
+ up_read(&mdev->viommu_rwsem);
+ iommufd_put_object(ucmd->ictx, &idev->obj);
+
+ return rc;
+}
+
+static inline struct iommufd_hw_pagetable *
+iommufd_get_hwpt(struct iommufd_ucmd *ucmd, u32 id)
+{
+ struct iommufd_object *pt_obj;
+
+ pt_obj = iommufd_get_object(ucmd->ictx, id, IOMMUFD_OBJ_ANY);
+ if (IS_ERR(pt_obj))
+ return ERR_CAST(pt_obj);
+
+ if (pt_obj->type != IOMMUFD_OBJ_HWPT_NESTED &&
+ pt_obj->type != IOMMUFD_OBJ_HWPT_PAGING) {
+ iommufd_put_object(ucmd->ictx, pt_obj);
+ return ERR_PTR(-EINVAL);
+ }
+
+ return container_of(pt_obj, struct iommufd_hw_pagetable, obj);
+}
+
+static int iommufd_test_pasid_check_hwpt(struct iommufd_ucmd *ucmd,
+ struct iommu_test_cmd *cmd)
+{
+ u32 hwpt_id = cmd->pasid_check.hwpt_id;
+ struct iommu_domain *attached_domain;
+ struct iommu_attach_handle *handle;
+ struct iommufd_hw_pagetable *hwpt;
+ struct selftest_obj *sobj;
+ struct mock_dev *mdev;
+ int rc = 0;
+
+ sobj = iommufd_test_get_selftest_obj(ucmd->ictx, cmd->id);
+ if (IS_ERR(sobj))
+ return PTR_ERR(sobj);
+
+ mdev = sobj->idev.mock_dev;
+
+ handle = iommu_attach_handle_get(mdev->dev.iommu_group,
+ cmd->pasid_check.pasid, 0);
+ if (IS_ERR(handle))
+ attached_domain = NULL;
+ else
+ attached_domain = handle->domain;
+
+ /* hwpt_id == 0 means to check if pasid is detached */
+ if (!hwpt_id) {
+ if (attached_domain)
+ rc = -EINVAL;
+ goto out_sobj;
+ }
+
+ hwpt = iommufd_get_hwpt(ucmd, hwpt_id);
+ if (IS_ERR(hwpt)) {
+ rc = PTR_ERR(hwpt);
+ goto out_sobj;
+ }
+
+ if (attached_domain != hwpt->domain)
+ rc = -EINVAL;
+
+ iommufd_put_object(ucmd->ictx, &hwpt->obj);
+out_sobj:
+ iommufd_put_object(ucmd->ictx, &sobj->obj);
+ return rc;
+}
+
+static int iommufd_test_pasid_attach(struct iommufd_ucmd *ucmd,
+ struct iommu_test_cmd *cmd)
+{
+ struct selftest_obj *sobj;
+ int rc;
+
+ sobj = iommufd_test_get_selftest_obj(ucmd->ictx, cmd->id);
+ if (IS_ERR(sobj))
+ return PTR_ERR(sobj);
+
+ rc = iommufd_device_attach(sobj->idev.idev, cmd->pasid_attach.pasid,
+ &cmd->pasid_attach.pt_id);
+ if (rc)
+ goto out_sobj;
+
+ rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
+ if (rc)
+ iommufd_device_detach(sobj->idev.idev,
+ cmd->pasid_attach.pasid);
+
+out_sobj:
+ iommufd_put_object(ucmd->ictx, &sobj->obj);
+ return rc;
+}
+
+static int iommufd_test_pasid_replace(struct iommufd_ucmd *ucmd,
+ struct iommu_test_cmd *cmd)
+{
+ struct selftest_obj *sobj;
+ int rc;
+
+ sobj = iommufd_test_get_selftest_obj(ucmd->ictx, cmd->id);
+ if (IS_ERR(sobj))
+ return PTR_ERR(sobj);
+
+ rc = iommufd_device_replace(sobj->idev.idev, cmd->pasid_attach.pasid,
+ &cmd->pasid_attach.pt_id);
+ if (rc)
+ goto out_sobj;
+
+ rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
+
+out_sobj:
+ iommufd_put_object(ucmd->ictx, &sobj->obj);
+ return rc;
+}
+
+static int iommufd_test_pasid_detach(struct iommufd_ucmd *ucmd,
+ struct iommu_test_cmd *cmd)
+{
+ struct selftest_obj *sobj;
+
+ sobj = iommufd_test_get_selftest_obj(ucmd->ictx, cmd->id);
+ if (IS_ERR(sobj))
+ return PTR_ERR(sobj);
+
+ iommufd_device_detach(sobj->idev.idev, cmd->pasid_detach.pasid);
+ iommufd_put_object(ucmd->ictx, &sobj->obj);
+ return 0;
+}
+
void iommufd_selftest_destroy(struct iommufd_object *obj)
{
struct selftest_obj *sobj = to_selftest_obj(obj);
switch (sobj->type) {
case TYPE_IDEV:
- iommufd_device_detach(sobj->idev.idev);
+ iommufd_device_detach(sobj->idev.idev, IOMMU_NO_PASID);
iommufd_device_unbind(sobj->idev.idev);
mock_dev_destroy(sobj->idev.mock_dev);
break;
@@ -1678,6 +1920,16 @@ int iommufd_test(struct iommufd_ucmd *ucmd)
cmd->dirty.flags);
case IOMMU_TEST_OP_TRIGGER_IOPF:
return iommufd_test_trigger_iopf(ucmd, cmd);
+ case IOMMU_TEST_OP_TRIGGER_VEVENT:
+ return iommufd_test_trigger_vevent(ucmd, cmd);
+ case IOMMU_TEST_OP_PASID_ATTACH:
+ return iommufd_test_pasid_attach(ucmd, cmd);
+ case IOMMU_TEST_OP_PASID_REPLACE:
+ return iommufd_test_pasid_replace(ucmd, cmd);
+ case IOMMU_TEST_OP_PASID_DETACH:
+ return iommufd_test_pasid_detach(ucmd, cmd);
+ case IOMMU_TEST_OP_PASID_CHECK_HWPT:
+ return iommufd_test_pasid_check_hwpt(ucmd, cmd);
default:
return -EOPNOTSUPP;
}
@@ -1724,6 +1976,7 @@ int __init iommufd_test_init(void)
init_completion(&mock_iommu.complete);
mock_iommu_iopf_queue = iopf_queue_alloc("mock-iopfq");
+ mock_iommu.iommu_dev.max_pasids = (1 << MOCK_PASID_WIDTH);
return 0;
diff --git a/drivers/iommu/iommufd/viommu.c b/drivers/iommu/iommufd/viommu.c
index 69b88e8c7c26..01df2b985f02 100644
--- a/drivers/iommu/iommufd/viommu.c
+++ b/drivers/iommu/iommufd/viommu.c
@@ -59,6 +59,8 @@ int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd)
viommu->ictx = ucmd->ictx;
viommu->hwpt = hwpt_paging;
refcount_inc(&viommu->hwpt->common.obj.users);
+ INIT_LIST_HEAD(&viommu->veventqs);
+ init_rwsem(&viommu->veventqs_rwsem);
/*
* It is the most likely case that a physical IOMMU is unpluggable. A
* pluggable IOMMU instance (if exists) is responsible for refcounting
diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index 074daf1aac4e..e424b279a8cd 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -1081,31 +1081,24 @@ static int ipmmu_probe(struct platform_device *pdev)
}
}
+ platform_set_drvdata(pdev, mmu);
/*
* Register the IPMMU to the IOMMU subsystem in the following cases:
* - R-Car Gen2 IPMMU (all devices registered)
* - R-Car Gen3 IPMMU (leaf devices only - skip root IPMMU-MM device)
*/
- if (!mmu->features->has_cache_leaf_nodes || !ipmmu_is_root(mmu)) {
- ret = iommu_device_sysfs_add(&mmu->iommu, &pdev->dev, NULL,
- dev_name(&pdev->dev));
- if (ret)
- return ret;
-
- ret = iommu_device_register(&mmu->iommu, &ipmmu_ops, &pdev->dev);
- if (ret)
- return ret;
- }
+ if (mmu->features->has_cache_leaf_nodes && ipmmu_is_root(mmu))
+ return 0;
- /*
- * We can't create the ARM mapping here as it requires the bus to have
- * an IOMMU, which only happens when bus_set_iommu() is called in
- * ipmmu_init() after the probe function returns.
- */
+ ret = iommu_device_sysfs_add(&mmu->iommu, &pdev->dev, NULL, dev_name(&pdev->dev));
+ if (ret)
+ return ret;
- platform_set_drvdata(pdev, mmu);
+ ret = iommu_device_register(&mmu->iommu, &ipmmu_ops, &pdev->dev);
+ if (ret)
+ iommu_device_sysfs_remove(&mmu->iommu);
- return 0;
+ return ret;
}
static void ipmmu_remove(struct platform_device *pdev)
diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 034b0e670384..df98d0c65f54 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -1372,15 +1372,6 @@ static int mtk_iommu_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, data);
mutex_init(&data->mutex);
- ret = iommu_device_sysfs_add(&data->iommu, dev, NULL,
- "mtk-iommu.%pa", &ioaddr);
- if (ret)
- goto out_link_remove;
-
- ret = iommu_device_register(&data->iommu, &mtk_iommu_ops, dev);
- if (ret)
- goto out_sysfs_remove;
-
if (MTK_IOMMU_HAS_FLAG(data->plat_data, SHARE_PGTABLE)) {
list_add_tail(&data->list, data->plat_data->hw_list);
data->hw_list = data->plat_data->hw_list;
@@ -1390,19 +1381,28 @@ static int mtk_iommu_probe(struct platform_device *pdev)
data->hw_list = &data->hw_list_head;
}
+ ret = iommu_device_sysfs_add(&data->iommu, dev, NULL,
+ "mtk-iommu.%pa", &ioaddr);
+ if (ret)
+ goto out_list_del;
+
+ ret = iommu_device_register(&data->iommu, &mtk_iommu_ops, dev);
+ if (ret)
+ goto out_sysfs_remove;
+
if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM)) {
ret = component_master_add_with_match(dev, &mtk_iommu_com_ops, match);
if (ret)
- goto out_list_del;
+ goto out_device_unregister;
}
return ret;
-out_list_del:
- list_del(&data->list);
+out_device_unregister:
iommu_device_unregister(&data->iommu);
out_sysfs_remove:
iommu_device_sysfs_remove(&data->iommu);
-out_link_remove:
+out_list_del:
+ list_del(&data->list);
if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM))
device_link_remove(data->smicomm_dev, dev);
out_runtime_disable:
diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c
index a565b9e40f4a..66824982e05f 100644
--- a/drivers/iommu/mtk_iommu_v1.c
+++ b/drivers/iommu/mtk_iommu_v1.c
@@ -27,11 +27,20 @@
#include <linux/spinlock.h>
#include <linux/string_choices.h>
#include <asm/barrier.h>
-#include <asm/dma-iommu.h>
#include <dt-bindings/memory/mtk-memory-port.h>
#include <dt-bindings/memory/mt2701-larb-port.h>
#include <soc/mediatek/smi.h>
+#if defined(CONFIG_ARM)
+#include <asm/dma-iommu.h>
+#else
+#define arm_iommu_create_mapping(...) NULL
+#define arm_iommu_attach_device(...) -ENODEV
+struct dma_iommu_mapping {
+ struct iommu_domain *domain;
+};
+#endif
+
#define REG_MMU_PT_BASE_ADDR 0x000
#define F_ALL_INVLD 0x2
@@ -446,22 +455,13 @@ static int mtk_iommu_v1_create_mapping(struct device *dev,
static struct iommu_device *mtk_iommu_v1_probe_device(struct device *dev)
{
- struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
+ struct iommu_fwspec *fwspec = NULL;
struct of_phandle_args iommu_spec;
struct mtk_iommu_v1_data *data;
int err, idx = 0, larbid, larbidx;
struct device_link *link;
struct device *larbdev;
- /*
- * In the deferred case, free the existed fwspec.
- * Always initialize the fwspec internally.
- */
- if (fwspec) {
- iommu_fwspec_free(dev);
- fwspec = dev_iommu_fwspec_get(dev);
- }
-
while (!of_parse_phandle_with_args(dev->of_node, "iommus",
"#iommu-cells",
idx, &iommu_spec)) {
@@ -476,6 +476,9 @@ static struct iommu_device *mtk_iommu_v1_probe_device(struct device *dev)
idx++;
}
+ if (!fwspec)
+ return ERR_PTR(-ENODEV);
+
data = dev_iommu_priv_get(dev);
/* Link the consumer device with the smi-larb device(supplier) */
diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
index 97987cd78da9..6b989a62def2 100644
--- a/drivers/iommu/of_iommu.c
+++ b/drivers/iommu/of_iommu.c
@@ -116,6 +116,7 @@ static void of_pci_check_device_ats(struct device *dev, struct device_node *np)
int of_iommu_configure(struct device *dev, struct device_node *master_np,
const u32 *id)
{
+ bool dev_iommu_present;
int err;
if (!master_np)
@@ -127,6 +128,7 @@ int of_iommu_configure(struct device *dev, struct device_node *master_np,
mutex_unlock(&iommu_probe_device_lock);
return 0;
}
+ dev_iommu_present = dev->iommu;
/*
* We don't currently walk up the tree looking for a parent IOMMU.
@@ -147,11 +149,18 @@ int of_iommu_configure(struct device *dev, struct device_node *master_np,
err = of_iommu_configure_device(master_np, dev, id);
}
- if (err)
+ if (err && dev_iommu_present)
iommu_fwspec_free(dev);
+ else if (err && dev->iommu)
+ dev_iommu_free(dev);
mutex_unlock(&iommu_probe_device_lock);
- if (!err && dev->bus)
+ /*
+ * If we're not on the iommu_probe_device() path (as indicated by the
+ * initial dev->iommu) then try to simulate it. This should no longer
+ * happen unless of_dma_configure() is being misused outside bus code.
+ */
+ if (!err && dev->bus && !dev_iommu_present)
err = iommu_probe_device(dev);
if (err && err != -EPROBE_DEFER)
diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index 323cc665c357..af4cc91b2bbf 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -88,6 +88,7 @@ struct rk_iommu_domain {
dma_addr_t dt_dma;
spinlock_t iommus_lock; /* lock for iommus list */
spinlock_t dt_lock; /* lock for modifying page directory table */
+ struct device *dma_dev;
struct iommu_domain domain;
};
@@ -123,7 +124,6 @@ struct rk_iommudata {
struct rk_iommu *iommu;
};
-static struct device *dma_dev;
static const struct rk_iommu_ops *rk_ops;
static struct iommu_domain rk_identity_domain;
@@ -132,7 +132,7 @@ static inline void rk_table_flush(struct rk_iommu_domain *dom, dma_addr_t dma,
{
size_t size = count * sizeof(u32); /* count of u32 entry */
- dma_sync_single_for_device(dma_dev, dma, size, DMA_TO_DEVICE);
+ dma_sync_single_for_device(dom->dma_dev, dma, size, DMA_TO_DEVICE);
}
static struct rk_iommu_domain *to_rk_domain(struct iommu_domain *dom)
@@ -734,9 +734,9 @@ static u32 *rk_dte_get_page_table(struct rk_iommu_domain *rk_domain,
if (!page_table)
return ERR_PTR(-ENOMEM);
- pt_dma = dma_map_single(dma_dev, page_table, SPAGE_SIZE, DMA_TO_DEVICE);
- if (dma_mapping_error(dma_dev, pt_dma)) {
- dev_err(dma_dev, "DMA mapping error while allocating page table\n");
+ pt_dma = dma_map_single(rk_domain->dma_dev, page_table, SPAGE_SIZE, DMA_TO_DEVICE);
+ if (dma_mapping_error(rk_domain->dma_dev, pt_dma)) {
+ dev_err(rk_domain->dma_dev, "DMA mapping error while allocating page table\n");
iommu_free_page(page_table);
return ERR_PTR(-ENOMEM);
}
@@ -1051,9 +1051,7 @@ static int rk_iommu_attach_device(struct iommu_domain *domain,
static struct iommu_domain *rk_iommu_domain_alloc_paging(struct device *dev)
{
struct rk_iommu_domain *rk_domain;
-
- if (!dma_dev)
- return NULL;
+ struct rk_iommu *iommu;
rk_domain = kzalloc(sizeof(*rk_domain), GFP_KERNEL);
if (!rk_domain)
@@ -1068,10 +1066,12 @@ static struct iommu_domain *rk_iommu_domain_alloc_paging(struct device *dev)
if (!rk_domain->dt)
goto err_free_domain;
- rk_domain->dt_dma = dma_map_single(dma_dev, rk_domain->dt,
+ iommu = rk_iommu_from_dev(dev);
+ rk_domain->dma_dev = iommu->dev;
+ rk_domain->dt_dma = dma_map_single(rk_domain->dma_dev, rk_domain->dt,
SPAGE_SIZE, DMA_TO_DEVICE);
- if (dma_mapping_error(dma_dev, rk_domain->dt_dma)) {
- dev_err(dma_dev, "DMA map error for DT\n");
+ if (dma_mapping_error(rk_domain->dma_dev, rk_domain->dt_dma)) {
+ dev_err(rk_domain->dma_dev, "DMA map error for DT\n");
goto err_free_dt;
}
@@ -1105,13 +1105,13 @@ static void rk_iommu_domain_free(struct iommu_domain *domain)
if (rk_dte_is_pt_valid(dte)) {
phys_addr_t pt_phys = rk_ops->pt_address(dte);
u32 *page_table = phys_to_virt(pt_phys);
- dma_unmap_single(dma_dev, pt_phys,
+ dma_unmap_single(rk_domain->dma_dev, pt_phys,
SPAGE_SIZE, DMA_TO_DEVICE);
iommu_free_page(page_table);
}
}
- dma_unmap_single(dma_dev, rk_domain->dt_dma,
+ dma_unmap_single(rk_domain->dma_dev, rk_domain->dt_dma,
SPAGE_SIZE, DMA_TO_DEVICE);
iommu_free_page(rk_domain->dt);
@@ -1148,12 +1148,12 @@ static int rk_iommu_of_xlate(struct device *dev,
struct platform_device *iommu_dev;
struct rk_iommudata *data;
- data = devm_kzalloc(dma_dev, sizeof(*data), GFP_KERNEL);
+ iommu_dev = of_find_device_by_node(args->np);
+
+ data = devm_kzalloc(&iommu_dev->dev, sizeof(*data), GFP_KERNEL);
if (!data)
return -ENOMEM;
- iommu_dev = of_find_device_by_node(args->np);
-
data->iommu = platform_get_drvdata(iommu_dev);
data->iommu->domain = &rk_identity_domain;
dev_iommu_priv_set(dev, data);
@@ -1256,22 +1256,6 @@ static int rk_iommu_probe(struct platform_device *pdev)
if (err)
return err;
- err = iommu_device_sysfs_add(&iommu->iommu, dev, NULL, dev_name(dev));
- if (err)
- goto err_unprepare_clocks;
-
- err = iommu_device_register(&iommu->iommu, &rk_iommu_ops, dev);
- if (err)
- goto err_remove_sysfs;
-
- /*
- * Use the first registered IOMMU device for domain to use with DMA
- * API, since a domain might not physically correspond to a single
- * IOMMU device..
- */
- if (!dma_dev)
- dma_dev = &pdev->dev;
-
pm_runtime_enable(dev);
for (i = 0; i < iommu->num_irq; i++) {
@@ -1290,12 +1274,19 @@ static int rk_iommu_probe(struct platform_device *pdev)
dma_set_mask_and_coherent(dev, rk_ops->dma_bit_mask);
+ err = iommu_device_sysfs_add(&iommu->iommu, dev, NULL, dev_name(dev));
+ if (err)
+ goto err_pm_disable;
+
+ err = iommu_device_register(&iommu->iommu, &rk_iommu_ops, dev);
+ if (err)
+ goto err_remove_sysfs;
+
return 0;
-err_pm_disable:
- pm_runtime_disable(dev);
err_remove_sysfs:
iommu_device_sysfs_remove(&iommu->iommu);
-err_unprepare_clocks:
+err_pm_disable:
+ pm_runtime_disable(dev);
clk_bulk_unprepare(iommu->num_clocks, iommu->clocks);
return err;
}
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index fbdeded3d48b..e1c76e0f9c2b 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -16,7 +16,7 @@
#include "dma-iommu.h"
-static const struct iommu_ops s390_iommu_ops;
+static const struct iommu_ops s390_iommu_ops, s390_iommu_rtr_ops;
static struct kmem_cache *dma_region_table_cache;
static struct kmem_cache *dma_page_table_cache;
@@ -381,6 +381,46 @@ static void zdev_s390_domain_update(struct zpci_dev *zdev,
spin_unlock_irqrestore(&zdev->dom_lock, flags);
}
+static int s390_iommu_domain_reg_ioat(struct zpci_dev *zdev,
+ struct iommu_domain *domain, u8 *status)
+{
+ struct s390_domain *s390_domain;
+ int rc = 0;
+ u64 iota;
+
+ switch (domain->type) {
+ case IOMMU_DOMAIN_IDENTITY:
+ rc = zpci_register_ioat(zdev, 0, zdev->start_dma,
+ zdev->end_dma, 0, status);
+ break;
+ case IOMMU_DOMAIN_BLOCKED:
+ /* Nothing to do in this case */
+ break;
+ default:
+ s390_domain = to_s390_domain(domain);
+ iota = virt_to_phys(s390_domain->dma_table) |
+ ZPCI_IOTA_RTTO_FLAG;
+ rc = zpci_register_ioat(zdev, 0, zdev->start_dma,
+ zdev->end_dma, iota, status);
+ }
+
+ return rc;
+}
+
+int zpci_iommu_register_ioat(struct zpci_dev *zdev, u8 *status)
+{
+ unsigned long flags;
+ int rc;
+
+ spin_lock_irqsave(&zdev->dom_lock, flags);
+
+ rc = s390_iommu_domain_reg_ioat(zdev, zdev->s390_domain, status);
+
+ spin_unlock_irqrestore(&zdev->dom_lock, flags);
+
+ return rc;
+}
+
static int blocking_domain_attach_device(struct iommu_domain *domain,
struct device *dev)
{
@@ -392,9 +432,11 @@ static int blocking_domain_attach_device(struct iommu_domain *domain,
return 0;
s390_domain = to_s390_domain(zdev->s390_domain);
- spin_lock_irqsave(&s390_domain->list_lock, flags);
- list_del_rcu(&zdev->iommu_list);
- spin_unlock_irqrestore(&s390_domain->list_lock, flags);
+ if (zdev->dma_table) {
+ spin_lock_irqsave(&s390_domain->list_lock, flags);
+ list_del_rcu(&zdev->iommu_list);
+ spin_unlock_irqrestore(&s390_domain->list_lock, flags);
+ }
zpci_unregister_ioat(zdev, 0);
zdev->dma_table = NULL;
@@ -422,8 +464,7 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
blocking_domain_attach_device(&blocking_domain, dev);
/* If we fail now DMA remains blocked via blocking domain */
- cc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
- virt_to_phys(s390_domain->dma_table), &status);
+ cc = s390_iommu_domain_reg_ioat(zdev, domain, &status);
if (cc && status != ZPCI_PCI_ST_FUNC_NOT_AVAIL)
return -EIO;
zdev->dma_table = s390_domain->dma_table;
@@ -723,7 +764,13 @@ int zpci_init_iommu(struct zpci_dev *zdev)
if (rc)
goto out_err;
- rc = iommu_device_register(&zdev->iommu_dev, &s390_iommu_ops, NULL);
+ if (zdev->rtr_avail) {
+ rc = iommu_device_register(&zdev->iommu_dev,
+ &s390_iommu_rtr_ops, NULL);
+ } else {
+ rc = iommu_device_register(&zdev->iommu_dev, &s390_iommu_ops,
+ NULL);
+ }
if (rc)
goto out_sysfs;
@@ -787,6 +834,39 @@ static int __init s390_iommu_init(void)
}
subsys_initcall(s390_iommu_init);
+static int s390_attach_dev_identity(struct iommu_domain *domain,
+ struct device *dev)
+{
+ struct zpci_dev *zdev = to_zpci_dev(dev);
+ u8 status;
+ int cc;
+
+ blocking_domain_attach_device(&blocking_domain, dev);
+
+ /* If we fail now DMA remains blocked via blocking domain */
+ cc = s390_iommu_domain_reg_ioat(zdev, domain, &status);
+
+ /*
+ * If the device is undergoing error recovery the reset code
+ * will re-establish the new domain.
+ */
+ if (cc && status != ZPCI_PCI_ST_FUNC_NOT_AVAIL)
+ return -EIO;
+
+ zdev_s390_domain_update(zdev, domain);
+
+ return 0;
+}
+
+static const struct iommu_domain_ops s390_identity_ops = {
+ .attach_dev = s390_attach_dev_identity,
+};
+
+static struct iommu_domain s390_identity_domain = {
+ .type = IOMMU_DOMAIN_IDENTITY,
+ .ops = &s390_identity_ops,
+};
+
static struct iommu_domain blocking_domain = {
.type = IOMMU_DOMAIN_BLOCKED,
.ops = &(const struct iommu_domain_ops) {
@@ -794,23 +874,31 @@ static struct iommu_domain blocking_domain = {
}
};
-static const struct iommu_ops s390_iommu_ops = {
- .blocked_domain = &blocking_domain,
- .release_domain = &blocking_domain,
- .capable = s390_iommu_capable,
- .domain_alloc_paging = s390_domain_alloc_paging,
- .probe_device = s390_iommu_probe_device,
- .device_group = generic_device_group,
- .pgsize_bitmap = SZ_4K,
- .get_resv_regions = s390_iommu_get_resv_regions,
- .default_domain_ops = &(const struct iommu_domain_ops) {
- .attach_dev = s390_iommu_attach_device,
- .map_pages = s390_iommu_map_pages,
- .unmap_pages = s390_iommu_unmap_pages,
- .flush_iotlb_all = s390_iommu_flush_iotlb_all,
- .iotlb_sync = s390_iommu_iotlb_sync,
- .iotlb_sync_map = s390_iommu_iotlb_sync_map,
- .iova_to_phys = s390_iommu_iova_to_phys,
- .free = s390_domain_free,
+#define S390_IOMMU_COMMON_OPS() \
+ .blocked_domain = &blocking_domain, \
+ .release_domain = &blocking_domain, \
+ .capable = s390_iommu_capable, \
+ .domain_alloc_paging = s390_domain_alloc_paging, \
+ .probe_device = s390_iommu_probe_device, \
+ .device_group = generic_device_group, \
+ .pgsize_bitmap = SZ_4K, \
+ .get_resv_regions = s390_iommu_get_resv_regions, \
+ .default_domain_ops = &(const struct iommu_domain_ops) { \
+ .attach_dev = s390_iommu_attach_device, \
+ .map_pages = s390_iommu_map_pages, \
+ .unmap_pages = s390_iommu_unmap_pages, \
+ .flush_iotlb_all = s390_iommu_flush_iotlb_all, \
+ .iotlb_sync = s390_iommu_iotlb_sync, \
+ .iotlb_sync_map = s390_iommu_iotlb_sync_map, \
+ .iova_to_phys = s390_iommu_iova_to_phys, \
+ .free = s390_domain_free, \
}
+
+static const struct iommu_ops s390_iommu_ops = {
+ S390_IOMMU_COMMON_OPS()
+};
+
+static const struct iommu_ops s390_iommu_rtr_ops = {
+ .identity_domain = &s390_identity_domain,
+ S390_IOMMU_COMMON_OPS()
};
diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index 7f633bb5efef..69d353e1df84 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -846,7 +846,6 @@ static int tegra_smmu_configure(struct tegra_smmu *smmu, struct device *dev,
err = ops->of_xlate(dev, args);
if (err < 0) {
dev_err(dev, "failed to parse SW group ID: %d\n", err);
- iommu_fwspec_free(dev);
return err;
}