diff options
author | Alex Williamson <alex.williamson@redhat.com> | 2022-05-13 10:04:22 -0600 |
---|---|---|
committer | Alex Williamson <alex.williamson@redhat.com> | 2022-05-13 10:04:22 -0600 |
commit | c5e8c39282def775b707ccb5dfe644bd081aa7a8 (patch) | |
tree | 7496889d295df196e0560d777a36dfe36c54c884 | |
parent | ff806cbd90bd2cc3d08a026e26157e5b5a6b5e03 (diff) | |
parent | 0286300e60455534b23f4b86ce79247829ceddb8 (diff) | |
download | linux-stable-c5e8c39282def775b707ccb5dfe644bd081aa7a8.tar.gz linux-stable-c5e8c39282def775b707ccb5dfe644bd081aa7a8.tar.bz2 linux-stable-c5e8c39282def775b707ccb5dfe644bd081aa7a8.zip |
Merge remote-tracking branch 'iommu/vfio-notifier-fix' into v5.19/vfio/next
Merge IOMMU dependencies for vfio.
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
-rw-r--r-- | drivers/amba/bus.c | 37 | ||||
-rw-r--r-- | drivers/base/dd.c | 5 | ||||
-rw-r--r-- | drivers/base/platform.c | 21 | ||||
-rw-r--r-- | drivers/bus/fsl-mc/fsl-mc-bus.c | 24 | ||||
-rw-r--r-- | drivers/iommu/iommu.c | 331 | ||||
-rw-r--r-- | drivers/pci/pci-driver.c | 18 | ||||
-rw-r--r-- | drivers/pci/pci-stub.c | 1 | ||||
-rw-r--r-- | drivers/pci/pcie/portdrv_pci.c | 2 | ||||
-rw-r--r-- | drivers/vfio/fsl-mc/vfio_fsl_mc.c | 1 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci.c | 1 | ||||
-rw-r--r-- | drivers/vfio/platform/vfio_amba.c | 1 | ||||
-rw-r--r-- | drivers/vfio/platform/vfio_platform.c | 1 | ||||
-rw-r--r-- | drivers/vfio/vfio.c | 245 | ||||
-rw-r--r-- | include/linux/amba/bus.h | 8 | ||||
-rw-r--r-- | include/linux/device/bus.h | 3 | ||||
-rw-r--r-- | include/linux/fsl/mc.h | 8 | ||||
-rw-r--r-- | include/linux/iommu.h | 54 | ||||
-rw-r--r-- | include/linux/pci.h | 8 | ||||
-rw-r--r-- | include/linux/platform_device.h | 10 |
19 files changed, 417 insertions, 362 deletions
diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c index d3bd14aaabf6..a0ec61232b6c 100644 --- a/drivers/amba/bus.c +++ b/drivers/amba/bus.c @@ -20,6 +20,10 @@ #include <linux/platform_device.h> #include <linux/reset.h> #include <linux/of_irq.h> +#include <linux/of_device.h> +#include <linux/acpi.h> +#include <linux/iommu.h> +#include <linux/dma-map-ops.h> #define to_amba_driver(d) container_of(d, struct amba_driver, drv) @@ -273,6 +277,36 @@ static void amba_shutdown(struct device *dev) drv->shutdown(to_amba_device(dev)); } +static int amba_dma_configure(struct device *dev) +{ + struct amba_driver *drv = to_amba_driver(dev->driver); + enum dev_dma_attr attr; + int ret = 0; + + if (dev->of_node) { + ret = of_dma_configure(dev, dev->of_node, true); + } else if (has_acpi_companion(dev)) { + attr = acpi_get_dma_attr(to_acpi_device_node(dev->fwnode)); + ret = acpi_dma_configure(dev, attr); + } + + if (!ret && !drv->driver_managed_dma) { + ret = iommu_device_use_default_domain(dev); + if (ret) + arch_teardown_dma_ops(dev); + } + + return ret; +} + +static void amba_dma_cleanup(struct device *dev) +{ + struct amba_driver *drv = to_amba_driver(dev->driver); + + if (!drv->driver_managed_dma) + iommu_device_unuse_default_domain(dev); +} + #ifdef CONFIG_PM /* * Hooks to provide runtime PM of the pclk (bus clock). It is safe to @@ -341,7 +375,8 @@ struct bus_type amba_bustype = { .probe = amba_probe, .remove = amba_remove, .shutdown = amba_shutdown, - .dma_configure = platform_dma_configure, + .dma_configure = amba_dma_configure, + .dma_cleanup = amba_dma_cleanup, .pm = &amba_pm, }; EXPORT_SYMBOL_GPL(amba_bustype); diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 3fc3b5940bb3..94b7ac9bf459 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -671,6 +671,8 @@ sysfs_failed: if (dev->bus) blocking_notifier_call_chain(&dev->bus->p->bus_notifier, BUS_NOTIFY_DRIVER_NOT_BOUND, dev); + if (dev->bus && dev->bus->dma_cleanup) + dev->bus->dma_cleanup(dev); pinctrl_bind_failed: device_links_no_driver(dev); device_unbind_cleanup(dev); @@ -1199,6 +1201,9 @@ static void __device_release_driver(struct device *dev, struct device *parent) device_remove(dev); + if (dev->bus && dev->bus->dma_cleanup) + dev->bus->dma_cleanup(dev); + device_links_driver_cleanup(dev); device_unbind_cleanup(dev); diff --git a/drivers/base/platform.c b/drivers/base/platform.c index 8cc272fd5c99..70bc30cf575c 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -30,6 +30,8 @@ #include <linux/property.h> #include <linux/kmemleak.h> #include <linux/types.h> +#include <linux/iommu.h> +#include <linux/dma-map-ops.h> #include "base.h" #include "power/power.h" @@ -1454,9 +1456,9 @@ static void platform_shutdown(struct device *_dev) drv->shutdown(dev); } - -int platform_dma_configure(struct device *dev) +static int platform_dma_configure(struct device *dev) { + struct platform_driver *drv = to_platform_driver(dev->driver); enum dev_dma_attr attr; int ret = 0; @@ -1467,9 +1469,23 @@ int platform_dma_configure(struct device *dev) ret = acpi_dma_configure(dev, attr); } + if (!ret && !drv->driver_managed_dma) { + ret = iommu_device_use_default_domain(dev); + if (ret) + arch_teardown_dma_ops(dev); + } + return ret; } +static void platform_dma_cleanup(struct device *dev) +{ + struct platform_driver *drv = to_platform_driver(dev->driver); + + if (!drv->driver_managed_dma) + iommu_device_unuse_default_domain(dev); +} + static const struct dev_pm_ops platform_dev_pm_ops = { SET_RUNTIME_PM_OPS(pm_generic_runtime_suspend, pm_generic_runtime_resume, NULL) USE_PLATFORM_PM_SLEEP_OPS @@ -1484,6 +1500,7 @@ struct bus_type platform_bus_type = { .remove = platform_remove, .shutdown = platform_shutdown, .dma_configure = platform_dma_configure, + .dma_cleanup = platform_dma_cleanup, .pm = &platform_dev_pm_ops, }; EXPORT_SYMBOL_GPL(platform_bus_type); diff --git a/drivers/bus/fsl-mc/fsl-mc-bus.c b/drivers/bus/fsl-mc/fsl-mc-bus.c index 8fd4a356a86e..76648c4fdaf4 100644 --- a/drivers/bus/fsl-mc/fsl-mc-bus.c +++ b/drivers/bus/fsl-mc/fsl-mc-bus.c @@ -21,6 +21,7 @@ #include <linux/dma-mapping.h> #include <linux/acpi.h> #include <linux/iommu.h> +#include <linux/dma-map-ops.h> #include "fsl-mc-private.h" @@ -140,15 +141,33 @@ static int fsl_mc_dma_configure(struct device *dev) { struct device *dma_dev = dev; struct fsl_mc_device *mc_dev = to_fsl_mc_device(dev); + struct fsl_mc_driver *mc_drv = to_fsl_mc_driver(dev->driver); u32 input_id = mc_dev->icid; + int ret; while (dev_is_fsl_mc(dma_dev)) dma_dev = dma_dev->parent; if (dev_of_node(dma_dev)) - return of_dma_configure_id(dev, dma_dev->of_node, 0, &input_id); + ret = of_dma_configure_id(dev, dma_dev->of_node, 0, &input_id); + else + ret = acpi_dma_configure_id(dev, DEV_DMA_COHERENT, &input_id); + + if (!ret && !mc_drv->driver_managed_dma) { + ret = iommu_device_use_default_domain(dev); + if (ret) + arch_teardown_dma_ops(dev); + } + + return ret; +} + +static void fsl_mc_dma_cleanup(struct device *dev) +{ + struct fsl_mc_driver *mc_drv = to_fsl_mc_driver(dev->driver); - return acpi_dma_configure_id(dev, DEV_DMA_COHERENT, &input_id); + if (!mc_drv->driver_managed_dma) + iommu_device_unuse_default_domain(dev); } static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, @@ -312,6 +331,7 @@ struct bus_type fsl_mc_bus_type = { .match = fsl_mc_bus_match, .uevent = fsl_mc_bus_uevent, .dma_configure = fsl_mc_dma_configure, + .dma_cleanup = fsl_mc_dma_cleanup, .dev_groups = fsl_mc_dev_groups, .bus_groups = fsl_mc_bus_groups, }; diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 857d4c2fd1a2..d54d7ce8cb45 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -18,7 +18,6 @@ #include <linux/errno.h> #include <linux/iommu.h> #include <linux/idr.h> -#include <linux/notifier.h> #include <linux/err.h> #include <linux/pci.h> #include <linux/bitops.h> @@ -40,14 +39,16 @@ struct iommu_group { struct kobject *devices_kobj; struct list_head devices; struct mutex mutex; - struct blocking_notifier_head notifier; void *iommu_data; void (*iommu_data_release)(void *iommu_data); char *name; int id; struct iommu_domain *default_domain; + struct iommu_domain *blocking_domain; struct iommu_domain *domain; struct list_head entry; + unsigned int owner_cnt; + void *owner; }; struct group_device { @@ -82,8 +83,8 @@ static int __iommu_attach_device(struct iommu_domain *domain, struct device *dev); static int __iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group); -static void __iommu_detach_group(struct iommu_domain *domain, - struct iommu_group *group); +static int __iommu_group_set_domain(struct iommu_group *group, + struct iommu_domain *new_domain); static int iommu_create_device_direct_mappings(struct iommu_group *group, struct device *dev); static struct iommu_group *iommu_group_get_for_dev(struct device *dev); @@ -294,7 +295,11 @@ int iommu_probe_device(struct device *dev) mutex_lock(&group->mutex); iommu_alloc_default_domain(group, dev); - if (group->default_domain) { + /* + * If device joined an existing group which has been claimed, don't + * attach the default domain. + */ + if (group->default_domain && !group->owner) { ret = __iommu_attach_device(group->default_domain, dev); if (ret) { mutex_unlock(&group->mutex); @@ -599,6 +604,8 @@ static void iommu_group_release(struct kobject *kobj) if (group->default_domain) iommu_domain_free(group->default_domain); + if (group->blocking_domain) + iommu_domain_free(group->blocking_domain); kfree(group->name); kfree(group); @@ -633,7 +640,6 @@ struct iommu_group *iommu_group_alloc(void) mutex_init(&group->mutex); INIT_LIST_HEAD(&group->devices); INIT_LIST_HEAD(&group->entry); - BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier); ret = ida_simple_get(&iommu_group_ida, 0, 0, GFP_KERNEL); if (ret < 0) { @@ -906,10 +912,6 @@ rename: if (ret) goto err_put_group; - /* Notify any listeners about change to group. */ - blocking_notifier_call_chain(&group->notifier, - IOMMU_GROUP_NOTIFY_ADD_DEVICE, dev); - trace_add_device_to_group(group->id, dev); dev_info(dev, "Adding to iommu group %d\n", group->id); @@ -951,10 +953,6 @@ void iommu_group_remove_device(struct device *dev) dev_info(dev, "Removing from iommu group %d\n", group->id); - /* Pre-notify listeners that a device is being removed. */ - blocking_notifier_call_chain(&group->notifier, - IOMMU_GROUP_NOTIFY_DEL_DEVICE, dev); - mutex_lock(&group->mutex); list_for_each_entry(tmp_device, &group->devices, list) { if (tmp_device->dev == dev) { @@ -1077,36 +1075,6 @@ void iommu_group_put(struct iommu_group *group) EXPORT_SYMBOL_GPL(iommu_group_put); /** - * iommu_group_register_notifier - Register a notifier for group changes - * @group: the group to watch - * @nb: notifier block to signal - * - * This function allows iommu group users to track changes in a group. - * See include/linux/iommu.h for actions sent via this notifier. Caller - * should hold a reference to the group throughout notifier registration. - */ -int iommu_group_register_notifier(struct iommu_group *group, - struct notifier_block *nb) -{ - return blocking_notifier_chain_register(&group->notifier, nb); -} -EXPORT_SYMBOL_GPL(iommu_group_register_notifier); - -/** - * iommu_group_unregister_notifier - Unregister a notifier - * @group: the group to watch - * @nb: notifier block to signal - * - * Unregister a previously registered group notifier block. - */ -int iommu_group_unregister_notifier(struct iommu_group *group, - struct notifier_block *nb) -{ - return blocking_notifier_chain_unregister(&group->notifier, nb); -} -EXPORT_SYMBOL_GPL(iommu_group_unregister_notifier); - -/** * iommu_register_device_fault_handler() - Register a device fault handler * @dev: the device * @handler: the fault handler @@ -1651,14 +1619,8 @@ static int remove_iommu_group(struct device *dev, void *data) static int iommu_bus_notifier(struct notifier_block *nb, unsigned long action, void *data) { - unsigned long group_action = 0; struct device *dev = data; - struct iommu_group *group; - /* - * ADD/DEL call into iommu driver ops if provided, which may - * result in ADD/DEL notifiers to group->notifier - */ if (action == BUS_NOTIFY_ADD_DEVICE) { int ret; @@ -1669,34 +1631,6 @@ static int iommu_bus_notifier(struct notifier_block *nb, return NOTIFY_OK; } - /* - * Remaining BUS_NOTIFYs get filtered and republished to the - * group, if anyone is listening - */ - group = iommu_group_get(dev); - if (!group) - return 0; - - switch (action) { - case BUS_NOTIFY_BIND_DRIVER: - group_action = IOMMU_GROUP_NOTIFY_BIND_DRIVER; - break; - case BUS_NOTIFY_BOUND_DRIVER: - group_action = IOMMU_GROUP_NOTIFY_BOUND_DRIVER; - break; - case BUS_NOTIFY_UNBIND_DRIVER: - group_action = IOMMU_GROUP_NOTIFY_UNBIND_DRIVER; - break; - case BUS_NOTIFY_UNBOUND_DRIVER: - group_action = IOMMU_GROUP_NOTIFY_UNBOUND_DRIVER; - break; - } - - if (group_action) - blocking_notifier_call_chain(&group->notifier, - group_action, dev); - - iommu_group_put(group); return 0; } @@ -1983,6 +1917,24 @@ void iommu_domain_free(struct iommu_domain *domain) } EXPORT_SYMBOL_GPL(iommu_domain_free); +/* + * Put the group's domain back to the appropriate core-owned domain - either the + * standard kernel-mode DMA configuration or an all-DMA-blocked domain. + */ +static void __iommu_group_set_core_domain(struct iommu_group *group) +{ + struct iommu_domain *new_domain; + int ret; + + if (group->owner) + new_domain = group->blocking_domain; + else + new_domain = group->default_domain; + + ret = __iommu_group_set_domain(group, new_domain); + WARN(ret, "iommu driver failed to attach the default/blocking domain"); +} + static int __iommu_attach_device(struct iommu_domain *domain, struct device *dev) { @@ -2039,9 +1991,6 @@ static void __iommu_detach_device(struct iommu_domain *domain, if (iommu_is_attach_deferred(dev)) return; - if (unlikely(domain->ops->detach_dev == NULL)) - return; - domain->ops->detach_dev(domain, dev); trace_detach_device_from_domain(dev); } @@ -2055,12 +2004,10 @@ void iommu_detach_device(struct iommu_domain *domain, struct device *dev) return; mutex_lock(&group->mutex); - if (iommu_group_device_count(group) != 1) { - WARN_ON(1); + if (WARN_ON(domain != group->domain) || + WARN_ON(iommu_group_device_count(group) != 1)) goto out_unlock; - } - - __iommu_detach_group(domain, group); + __iommu_group_set_core_domain(group); out_unlock: mutex_unlock(&group->mutex); @@ -2116,7 +2063,8 @@ static int __iommu_attach_group(struct iommu_domain *domain, { int ret; - if (group->default_domain && group->domain != group->default_domain) + if (group->domain && group->domain != group->default_domain && + group->domain != group->blocking_domain) return -EBUSY; ret = __iommu_group_for_each_dev(group, domain, @@ -2148,34 +2096,49 @@ static int iommu_group_do_detach_device(struct device *dev, void *data) return 0; } -static void __iommu_detach_group(struct iommu_domain *domain, - struct iommu_group *group) +static int __iommu_group_set_domain(struct iommu_group *group, + struct iommu_domain *new_domain) { int ret; - if (!group->default_domain) { - __iommu_group_for_each_dev(group, domain, + if (group->domain == new_domain) + return 0; + + /* + * New drivers should support default domains and so the detach_dev() op + * will never be called. Otherwise the NULL domain represents some + * platform specific behavior. + */ + if (!new_domain) { + if (WARN_ON(!group->domain->ops->detach_dev)) + return -EINVAL; + __iommu_group_for_each_dev(group, group->domain, iommu_group_do_detach_device); group->domain = NULL; - return; + return 0; } - if (group->domain == group->default_domain) - return; - - /* Detach by re-attaching to the default domain */ - ret = __iommu_group_for_each_dev(group, group->default_domain, + /* + * Changing the domain is done by calling attach_dev() on the new + * domain. This switch does not have to be atomic and DMA can be + * discarded during the transition. DMA must only be able to access + * either new_domain or group->domain, never something else. + * + * Note that this is called in error unwind paths, attaching to a + * domain that has already been attached cannot fail. + */ + ret = __iommu_group_for_each_dev(group, new_domain, iommu_group_do_attach_device); - if (ret != 0) - WARN_ON(1); - else - group->domain = group->default_domain; + if (ret) + return ret; + group->domain = new_domain; + return 0; } void iommu_detach_group(struct iommu_domain *domain, struct iommu_group *group) { mutex_lock(&group->mutex); - __iommu_detach_group(domain, group); + __iommu_group_set_core_domain(group); mutex_unlock(&group->mutex); } EXPORT_SYMBOL_GPL(iommu_detach_group); @@ -3102,3 +3065,167 @@ out: return ret; } + +/** + * iommu_device_use_default_domain() - Device driver wants to handle device + * DMA through the kernel DMA API. + * @dev: The device. + * + * The device driver about to bind @dev wants to do DMA through the kernel + * DMA API. Return 0 if it is allowed, otherwise an error. + */ +int iommu_device_use_default_domain(struct device *dev) +{ + struct iommu_group *group = iommu_group_get(dev); + int ret = 0; + + if (!group) + return 0; + + mutex_lock(&group->mutex); + if (group->owner_cnt) { + if (group->domain != group->default_domain || + group->owner) { + ret = -EBUSY; + goto unlock_out; + } + } + + group->owner_cnt++; + +unlock_out: + mutex_unlock(&group->mutex); + iommu_group_put(group); + + return ret; +} + +/** + * iommu_device_unuse_default_domain() - Device driver stops handling device + * DMA through the kernel DMA API. + * @dev: The device. + * + * The device driver doesn't want to do DMA through kernel DMA API anymore. + * It must be called after iommu_device_use_default_domain(). + */ +void iommu_device_unuse_default_domain(struct device *dev) +{ + struct iommu_group *group = iommu_group_get(dev); + + if (!group) + return; + + mutex_lock(&group->mutex); + if (!WARN_ON(!group->owner_cnt)) + group->owner_cnt--; + + mutex_unlock(&group->mutex); + iommu_group_put(group); +} + +static int __iommu_group_alloc_blocking_domain(struct iommu_group *group) +{ + struct group_device *dev = + list_first_entry(&group->devices, struct group_device, list); + + if (group->blocking_domain) + return 0; + + group->blocking_domain = + __iommu_domain_alloc(dev->dev->bus, IOMMU_DOMAIN_BLOCKED); + if (!group->blocking_domain) { + /* + * For drivers that do not yet understand IOMMU_DOMAIN_BLOCKED + * create an empty domain instead. + */ + group->blocking_domain = __iommu_domain_alloc( + dev->dev->bus, IOMMU_DOMAIN_UNMANAGED); + if (!group->blocking_domain) + return -EINVAL; + } + return 0; +} + +/** + * iommu_group_claim_dma_owner() - Set DMA ownership of a group + * @group: The group. + * @owner: Caller specified pointer. Used for exclusive ownership. + * + * This is to support backward compatibility for vfio which manages + * the dma ownership in iommu_group level. New invocations on this + * interface should be prohibited. + */ +int iommu_group_claim_dma_owner(struct iommu_group *group, void *owner) +{ + int ret = 0; + + mutex_lock(&group->mutex); + if (group->owner_cnt) { + ret = -EPERM; + goto unlock_out; + } else { + if (group->domain && group->domain != group->default_domain) { + ret = -EBUSY; + goto unlock_out; + } + + ret = __iommu_group_alloc_blocking_domain(group); + if (ret) + goto unlock_out; + + ret = __iommu_group_set_domain(group, group->blocking_domain); + if (ret) + goto unlock_out; + group->owner = owner; + } + + group->owner_cnt++; +unlock_out: + mutex_unlock(&group->mutex); + + return ret; +} +EXPORT_SYMBOL_GPL(iommu_group_claim_dma_owner); + +/** + * iommu_group_release_dma_owner() - Release DMA ownership of a group + * @group: The group. + * + * Release the DMA ownership claimed by iommu_group_claim_dma_owner(). + */ +void iommu_group_release_dma_owner(struct iommu_group *group) +{ + int ret; + + mutex_lock(&group->mutex); + if (WARN_ON(!group->owner_cnt || !group->owner)) + goto unlock_out; + + group->owner_cnt = 0; + group->owner = NULL; + ret = __iommu_group_set_domain(group, group->default_domain); + WARN(ret, "iommu driver failed to attach the default domain"); + +unlock_out: + mutex_unlock(&group->mutex); +} +EXPORT_SYMBOL_GPL(iommu_group_release_dma_owner); + +/** + * iommu_group_dma_owner_claimed() - Query group dma ownership status + * @group: The group. + * + * This provides status query on a given group. It is racy and only for + * non-binding status reporting. + */ +bool iommu_group_dma_owner_claimed(struct iommu_group *group) +{ + unsigned int user; + + mutex_lock(&group->mutex); + user = group->owner_cnt; + mutex_unlock(&group->mutex); + + return user; +} +EXPORT_SYMBOL_GPL(iommu_group_dma_owner_claimed); diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 4ceeb75fc899..f83f7fbac68f 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -20,6 +20,7 @@ #include <linux/of_device.h> #include <linux/acpi.h> #include <linux/dma-map-ops.h> +#include <linux/iommu.h> #include "pci.h" #include "pcie/portdrv.h" @@ -1601,6 +1602,7 @@ static int pci_bus_num_vf(struct device *dev) */ static int pci_dma_configure(struct device *dev) { + struct pci_driver *driver = to_pci_driver(dev->driver); struct device *bridge; int ret = 0; @@ -1616,9 +1618,24 @@ static int pci_dma_configure(struct device *dev) } pci_put_host_bridge_device(bridge); + + if (!ret && !driver->driver_managed_dma) { + ret = iommu_device_use_default_domain(dev); + if (ret) + arch_teardown_dma_ops(dev); + } + return ret; } +static void pci_dma_cleanup(struct device *dev) +{ + struct pci_driver *driver = to_pci_driver(dev->driver); + + if (!driver->driver_managed_dma) + iommu_device_unuse_default_domain(dev); +} + struct bus_type pci_bus_type = { .name = "pci", .match = pci_bus_match, @@ -1632,6 +1649,7 @@ struct bus_type pci_bus_type = { .pm = PCI_PM_OPS_PTR, .num_vf = pci_bus_num_vf, .dma_configure = pci_dma_configure, + .dma_cleanup = pci_dma_cleanup, }; EXPORT_SYMBOL(pci_bus_type); diff --git a/drivers/pci/pci-stub.c b/drivers/pci/pci-stub.c index e408099fea52..d1f4c1ce7bd1 100644 --- a/drivers/pci/pci-stub.c +++ b/drivers/pci/pci-stub.c @@ -36,6 +36,7 @@ static struct pci_driver stub_driver = { .name = "pci-stub", .id_table = NULL, /* only dynamic id's */ .probe = pci_stub_probe, + .driver_managed_dma = true, }; static int __init pci_stub_init(void) diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c index 4b8801656ffb..7f8788a970ae 100644 --- a/drivers/pci/pcie/portdrv_pci.c +++ b/drivers/pci/pcie/portdrv_pci.c @@ -202,6 +202,8 @@ static struct pci_driver pcie_portdriver = { .err_handler = &pcie_portdrv_err_handler, + .driver_managed_dma = true, + .driver.pm = PCIE_PORTDRV_PM_OPS, }; diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc.c b/drivers/vfio/fsl-mc/vfio_fsl_mc.c index 6e2e62c6f47a..3feff729f3ce 100644 --- a/drivers/vfio/fsl-mc/vfio_fsl_mc.c +++ b/drivers/vfio/fsl-mc/vfio_fsl_mc.c @@ -588,6 +588,7 @@ static struct fsl_mc_driver vfio_fsl_mc_driver = { .name = "vfio-fsl-mc", .owner = THIS_MODULE, }, + .driver_managed_dma = true, }; static int __init vfio_fsl_mc_driver_init(void) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index cdee5f81f49d..4d1a97415a27 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -196,6 +196,7 @@ static struct pci_driver vfio_pci_driver = { .remove = vfio_pci_remove, .sriov_configure = vfio_pci_sriov_configure, .err_handler = &vfio_pci_core_err_handlers, + .driver_managed_dma = true, }; static void __init vfio_pci_fill_ids(void) diff --git a/drivers/vfio/platform/vfio_amba.c b/drivers/vfio/platform/vfio_amba.c index badfffea14fb..1aaa4f721bd2 100644 --- a/drivers/vfio/platform/vfio_amba.c +++ b/drivers/vfio/platform/vfio_amba.c @@ -95,6 +95,7 @@ static struct amba_driver vfio_amba_driver = { .name = "vfio-amba", .owner = THIS_MODULE, }, + .driver_managed_dma = true, }; module_amba_driver(vfio_amba_driver); diff --git a/drivers/vfio/platform/vfio_platform.c b/drivers/vfio/platform/vfio_platform.c index 68a1c87066d7..04f40c5acfd6 100644 --- a/drivers/vfio/platform/vfio_platform.c +++ b/drivers/vfio/platform/vfio_platform.c @@ -76,6 +76,7 @@ static struct platform_driver vfio_platform_driver = { .driver = { .name = "vfio-platform", }, + .driver_managed_dma = true, }; module_platform_driver(vfio_platform_driver); diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 56b80c39a3c0..cc0d337f7b13 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -62,11 +62,6 @@ struct vfio_container { bool noiommu; }; -struct vfio_unbound_dev { - struct device *dev; - struct list_head unbound_next; -}; - struct vfio_group { struct device dev; struct cdev cdev; @@ -76,11 +71,8 @@ struct vfio_group { struct vfio_container *container; struct list_head device_list; struct mutex device_lock; - struct notifier_block nb; struct list_head vfio_next; struct list_head container_next; - struct list_head unbound_list; - struct mutex unbound_lock; atomic_t opened; wait_queue_head_t container_q; enum vfio_group_type type; @@ -281,8 +273,6 @@ void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops) } EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver); -static int vfio_iommu_group_notifier(struct notifier_block *nb, - unsigned long action, void *data); static void vfio_group_get(struct vfio_group *group); /* @@ -340,16 +330,8 @@ vfio_group_get_from_iommu(struct iommu_group *iommu_group) static void vfio_group_release(struct device *dev) { struct vfio_group *group = container_of(dev, struct vfio_group, dev); - struct vfio_unbound_dev *unbound, *tmp; - - list_for_each_entry_safe(unbound, tmp, - &group->unbound_list, unbound_next) { - list_del(&unbound->unbound_next); - kfree(unbound); - } mutex_destroy(&group->device_lock); - mutex_destroy(&group->unbound_lock); iommu_group_put(group->iommu_group); ida_free(&vfio.group_ida, MINOR(group->dev.devt)); kfree(group); @@ -381,8 +363,6 @@ static struct vfio_group *vfio_group_alloc(struct iommu_group *iommu_group, refcount_set(&group->users, 1); INIT_LIST_HEAD(&group->device_list); mutex_init(&group->device_lock); - INIT_LIST_HEAD(&group->unbound_list); - mutex_init(&group->unbound_lock); init_waitqueue_head(&group->container_q); group->iommu_group = iommu_group; /* put in vfio_group_release() */ @@ -412,13 +392,6 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group, goto err_put; } - group->nb.notifier_call = vfio_iommu_group_notifier; - err = iommu_group_register_notifier(iommu_group, &group->nb); - if (err) { - ret = ERR_PTR(err); - goto err_put; - } - mutex_lock(&vfio.group_lock); /* Did we race creating this group? */ @@ -439,7 +412,6 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group, err_unlock: mutex_unlock(&vfio.group_lock); - iommu_group_unregister_notifier(group->iommu_group, &group->nb); err_put: put_device(&group->dev); return ret; @@ -464,7 +436,6 @@ static void vfio_group_put(struct vfio_group *group) cdev_device_del(&group->cdev, &group->dev); mutex_unlock(&vfio.group_lock); - iommu_group_unregister_notifier(group->iommu_group, &group->nb); put_device(&group->dev); } @@ -505,175 +476,6 @@ static struct vfio_device *vfio_group_get_device(struct vfio_group *group, } /* - * Some drivers, like pci-stub, are only used to prevent other drivers from - * claiming a device and are therefore perfectly legitimate for a user owned - * group. The pci-stub driver has no dependencies on DMA or the IOVA mapping - * of the device, but it does prevent the user from having direct access to - * the device, which is useful in some circumstances. - * - * We also assume that we can include PCI interconnect devices, ie. bridges. - * IOMMU grouping on PCI necessitates that if we lack isolation on a bridge - * then all of the downstream devices will be part of the same IOMMU group as - * the bridge. Thus, if placing the bridge into the user owned IOVA space - * breaks anything, it only does so for user owned devices downstream. Note - * that error notification via MSI can be affected for platforms that handle - * MSI within the same IOVA space as DMA. - */ -static const char * const vfio_driver_allowed[] = { "pci-stub" }; - -static bool vfio_dev_driver_allowed(struct device *dev, - struct device_driver *drv) -{ - if (dev_is_pci(dev)) { - struct pci_dev *pdev = to_pci_dev(dev); - - if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL) - return true; - } - - return match_string(vfio_driver_allowed, - ARRAY_SIZE(vfio_driver_allowed), - drv->name) >= 0; -} - -/* - * A vfio group is viable for use by userspace if all devices are in - * one of the following states: - * - driver-less - * - bound to a vfio driver - * - bound to an otherwise allowed driver - * - a PCI interconnect device - * - * We use two methods to determine whether a device is bound to a vfio - * driver. The first is to test whether the device exists in the vfio - * group. The second is to test if the device exists on the group - * unbound_list, indicating it's in the middle of transitioning from - * a vfio driver to driver-less. - */ -static int vfio_dev_viable(struct device *dev, void *data) -{ - struct vfio_group *group = data; - struct vfio_device *device; - struct device_driver *drv = READ_ONCE(dev->driver); - struct vfio_unbound_dev *unbound; - int ret = -EINVAL; - - mutex_lock(&group->unbound_lock); - list_for_each_entry(unbound, &group->unbound_list, unbound_next) { - if (dev == unbound->dev) { - ret = 0; - break; - } - } - mutex_unlock(&group->unbound_lock); - - if (!ret || !drv || vfio_dev_driver_allowed(dev, drv)) - return 0; - - device = vfio_group_get_device(group, dev); - if (device) { - vfio_device_put(device); - return 0; - } - - return ret; -} - -/* - * Async device support - */ -static int vfio_group_nb_add_dev(struct vfio_group *group, struct device *dev) -{ - struct vfio_device *device; - - /* Do we already know about it? We shouldn't */ - device = vfio_group_get_device(group, dev); - if (WARN_ON_ONCE(device)) { - vfio_device_put(device); - return 0; - } - - /* Nothing to do for idle groups */ - if (!atomic_read(&group->container_users)) - return 0; - - /* TODO Prevent device auto probing */ - dev_WARN(dev, "Device added to live group %d!\n", - iommu_group_id(group->iommu_group)); - - return 0; -} - -static int vfio_group_nb_verify(struct vfio_group *group, struct device *dev) -{ - /* We don't care what happens when the group isn't in use */ - if (!atomic_read(&group->container_users)) - return 0; - - return vfio_dev_viable(dev, group); -} - -static int vfio_iommu_group_notifier(struct notifier_block *nb, - unsigned long action, void *data) -{ - struct vfio_group *group = container_of(nb, struct vfio_group, nb); - struct device *dev = data; - struct vfio_unbound_dev *unbound; - - switch (action) { - case IOMMU_GROUP_NOTIFY_ADD_DEVICE: - vfio_group_nb_add_dev(group, dev); - break; - case IOMMU_GROUP_NOTIFY_DEL_DEVICE: - /* - * Nothing to do here. If the device is in use, then the - * vfio sub-driver should block the remove callback until - * it is unused. If the device is unused or attached to a - * stub driver, then it should be released and we don't - * care that it will be going away. - */ - break; - case IOMMU_GROUP_NOTIFY_BIND_DRIVER: - dev_dbg(dev, "%s: group %d binding to driver\n", __func__, - iommu_group_id(group->iommu_group)); - break; - case IOMMU_GROUP_NOTIFY_BOUND_DRIVER: - dev_dbg(dev, "%s: group %d bound to driver %s\n", __func__, - iommu_group_id(group->iommu_group), dev->driver->name); - BUG_ON(vfio_group_nb_verify(group, dev)); - break; - case IOMMU_GROUP_NOTIFY_UNBIND_DRIVER: - dev_dbg(dev, "%s: group %d unbinding from driver %s\n", - __func__, iommu_group_id(group->iommu_group), - dev->driver->name); - break; - case IOMMU_GROUP_NOTIFY_UNBOUND_DRIVER: - dev_dbg(dev, "%s: group %d unbound from driver\n", __func__, - iommu_group_id(group->iommu_group)); - /* - * XXX An unbound device in a live group is ok, but we'd - * really like to avoid the above BUG_ON by preventing other - * drivers from binding to it. Once that occurs, we have to - * stop the system to maintain isolation. At a minimum, we'd - * want a toggle to disable driver auto probe for this device. - */ - - mutex_lock(&group->unbound_lock); - list_for_each_entry(unbound, - &group->unbound_list, unbound_next) { - if (dev == unbound->dev) { - list_del(&unbound->unbound_next); - kfree(unbound); - break; - } - } - mutex_unlock(&group->unbound_lock); - break; - } - return NOTIFY_OK; -} - -/* * VFIO driver API */ void vfio_init_group_dev(struct vfio_device *device, struct device *dev, @@ -850,29 +652,10 @@ static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group, void vfio_unregister_group_dev(struct vfio_device *device) { struct vfio_group *group = device->group; - struct vfio_unbound_dev *unbound; unsigned int i = 0; bool interrupted = false; long rc; - /* - * When the device is removed from the group, the group suddenly - * becomes non-viable; the device has a driver (until the unbind - * completes), but it's not present in the group. This is bad news - * for any external users that need to re-acquire a group reference - * in order to match and release their existing reference. To - * solve this, we track such devices on the unbound_list to bridge - * the gap until they're fully unbound. - */ - unbound = kzalloc(sizeof(*unbound), GFP_KERNEL); - if (unbound) { - unbound->dev = device->dev; - mutex_lock(&group->unbound_lock); - list_add(&unbound->unbound_next, &group->unbound_list); - mutex_unlock(&group->unbound_lock); - } - WARN_ON(!unbound); - vfio_device_put(device); rc = try_wait_for_completion(&device->comp); while (rc <= 0) { @@ -1159,6 +942,8 @@ static void __vfio_group_unset_container(struct vfio_group *group) driver->ops->detach_group(container->iommu_data, group->iommu_group); + iommu_group_release_dma_owner(group->iommu_group); + group->container = NULL; wake_up(&group->container_q); list_del(&group->container_next); @@ -1243,13 +1028,19 @@ static int vfio_group_set_container(struct vfio_group *group, int container_fd) goto unlock_out; } + ret = iommu_group_claim_dma_owner(group->iommu_group, f.file); + if (ret) + goto unlock_out; + driver = container->iommu_driver; if (driver) { ret = driver->ops->attach_group(container->iommu_data, group->iommu_group, group->type); - if (ret) + if (ret) { + iommu_group_release_dma_owner(group->iommu_group); goto unlock_out; + } } group->container = container; @@ -1266,12 +1057,6 @@ unlock_out: return ret; } -static bool vfio_group_viable(struct vfio_group *group) -{ - return (iommu_group_for_each_dev(group->iommu_group, - group, vfio_dev_viable) == 0); -} - static int vfio_group_add_container_user(struct vfio_group *group) { if (!atomic_inc_not_zero(&group->container_users)) @@ -1281,7 +1066,7 @@ static int vfio_group_add_container_user(struct vfio_group *group) atomic_dec(&group->container_users); return -EPERM; } - if (!group->container->iommu_driver || !vfio_group_viable(group)) { + if (!group->container->iommu_driver) { atomic_dec(&group->container_users); return -EINVAL; } @@ -1305,7 +1090,7 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf) int ret = 0; if (0 == atomic_read(&group->container_users) || - !group->container->iommu_driver || !vfio_group_viable(group)) + !group->container->iommu_driver) return -EINVAL; if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) @@ -1397,11 +1182,11 @@ static long vfio_group_fops_unl_ioctl(struct file *filep, status.flags = 0; - if (vfio_group_viable(group)) - status.flags |= VFIO_GROUP_FLAGS_VIABLE; - if (group->container) - status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET; + status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET | + VFIO_GROUP_FLAGS_VIABLE; + else if (!iommu_group_dma_owner_claimed(group->iommu_group)) + status.flags |= VFIO_GROUP_FLAGS_VIABLE; if (copy_to_user((void __user *)arg, &status, minsz)) return -EFAULT; diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h index 6562f543c3e0..2ddce9bcd00e 100644 --- a/include/linux/amba/bus.h +++ b/include/linux/amba/bus.h @@ -79,6 +79,14 @@ struct amba_driver { void (*remove)(struct amba_device *); void (*shutdown)(struct amba_device *); const struct amba_id *id_table; + /* + * For most device drivers, no need to care about this flag as long as + * all DMAs are handled through the kernel DMA API. For some special + * ones, for example VFIO drivers, they know how to manage the DMA + * themselves and set this flag so that the IOMMU layer will allow them + * to setup and manage their own I/O address space. + */ + bool driver_managed_dma; }; /* diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h index a039ab809753..d8b29ccd07e5 100644 --- a/include/linux/device/bus.h +++ b/include/linux/device/bus.h @@ -59,6 +59,8 @@ struct fwnode_handle; * bus supports. * @dma_configure: Called to setup DMA configuration on a device on * this bus. + * @dma_cleanup: Called to cleanup DMA configuration on a device on + * this bus. * @pm: Power management operations of this bus, callback the specific * device driver's pm-ops. * @iommu_ops: IOMMU specific operations for this bus, used to attach IOMMU @@ -103,6 +105,7 @@ struct bus_type { int (*num_vf)(struct device *dev); int (*dma_configure)(struct device *dev); + void (*dma_cleanup)(struct device *dev); const struct dev_pm_ops *pm; diff --git a/include/linux/fsl/mc.h b/include/linux/fsl/mc.h index 7b6c42bfb660..27efef8affb1 100644 --- a/include/linux/fsl/mc.h +++ b/include/linux/fsl/mc.h @@ -32,6 +32,13 @@ struct fsl_mc_io; * @shutdown: Function called at shutdown time to quiesce the device * @suspend: Function called when a device is stopped * @resume: Function called when a device is resumed + * @driver_managed_dma: Device driver doesn't use kernel DMA API for DMA. + * For most device drivers, no need to care about this flag + * as long as all DMAs are handled through the kernel DMA API. + * For some special ones, for example VFIO drivers, they know + * how to manage the DMA themselves and set this flag so that + * the IOMMU layer will allow them to setup and manage their + * own I/O address space. * * Generic DPAA device driver object for device drivers that are registered * with a DPRC bus. This structure is to be embedded in each device-specific @@ -45,6 +52,7 @@ struct fsl_mc_driver { void (*shutdown)(struct fsl_mc_device *dev); int (*suspend)(struct fsl_mc_device *dev, pm_message_t state); int (*resume)(struct fsl_mc_device *dev); + bool driver_managed_dma; }; #define to_fsl_mc_driver(_drv) \ diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 9208eca4b0d1..6ef2df258673 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -407,13 +407,6 @@ static inline const struct iommu_ops *dev_iommu_ops(struct device *dev) return dev->iommu->iommu_dev->ops; } -#define IOMMU_GROUP_NOTIFY_ADD_DEVICE 1 /* Device added */ -#define IOMMU_GROUP_NOTIFY_DEL_DEVICE 2 /* Pre Device removed */ -#define IOMMU_GROUP_NOTIFY_BIND_DRIVER 3 /* Pre Driver bind */ -#define IOMMU_GROUP_NOTIFY_BOUND_DRIVER 4 /* Post Driver bind */ -#define IOMMU_GROUP_NOTIFY_UNBIND_DRIVER 5 /* Pre Driver unbind */ -#define IOMMU_GROUP_NOTIFY_UNBOUND_DRIVER 6 /* Post Driver unbind */ - extern int bus_set_iommu(struct bus_type *bus, const struct iommu_ops *ops); extern int bus_iommu_probe(struct bus_type *bus); extern bool iommu_present(struct bus_type *bus); @@ -478,10 +471,6 @@ extern int iommu_group_for_each_dev(struct iommu_group *group, void *data, extern struct iommu_group *iommu_group_get(struct device *dev); extern struct iommu_group *iommu_group_ref_get(struct iommu_group *group); extern void iommu_group_put(struct iommu_group *group); -extern int iommu_group_register_notifier(struct iommu_group *group, - struct notifier_block *nb); -extern int iommu_group_unregister_notifier(struct iommu_group *group, - struct notifier_block *nb); extern int iommu_register_device_fault_handler(struct device *dev, iommu_dev_fault_handler_t handler, void *data); @@ -675,6 +664,13 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, void iommu_sva_unbind_device(struct iommu_sva *handle); u32 iommu_sva_get_pasid(struct iommu_sva *handle); +int iommu_device_use_default_domain(struct device *dev); +void iommu_device_unuse_default_domain(struct device *dev); + +int iommu_group_claim_dma_owner(struct iommu_group *group, void *owner); +void iommu_group_release_dma_owner(struct iommu_group *group); +bool iommu_group_dma_owner_claimed(struct iommu_group *group); + #else /* CONFIG_IOMMU_API */ struct iommu_ops {}; @@ -871,18 +867,6 @@ static inline void iommu_group_put(struct iommu_group *group) { } -static inline int iommu_group_register_notifier(struct iommu_group *group, - struct notifier_block *nb) -{ - return -ENODEV; -} - -static inline int iommu_group_unregister_notifier(struct iommu_group *group, - struct notifier_block *nb) -{ - return 0; -} - static inline int iommu_register_device_fault_handler(struct device *dev, iommu_dev_fault_handler_t handler, @@ -1031,6 +1015,30 @@ static inline struct iommu_fwspec *dev_iommu_fwspec_get(struct device *dev) { return NULL; } + +static inline int iommu_device_use_default_domain(struct device *dev) +{ + return 0; +} + +static inline void iommu_device_unuse_default_domain(struct device *dev) +{ +} + +static inline int +iommu_group_claim_dma_owner(struct iommu_group *group, void *owner) +{ + return -ENODEV; +} + +static inline void iommu_group_release_dma_owner(struct iommu_group *group) +{ +} + +static inline bool iommu_group_dma_owner_claimed(struct iommu_group *group) +{ + return false; +} #endif /* CONFIG_IOMMU_API */ /** diff --git a/include/linux/pci.h b/include/linux/pci.h index 60adf42460ab..b933d2b08d4d 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -895,6 +895,13 @@ struct module; * created once it is bound to the driver. * @driver: Driver model structure. * @dynids: List of dynamically added device IDs. + * @driver_managed_dma: Device driver doesn't use kernel DMA API for DMA. + * For most device drivers, no need to care about this flag + * as long as all DMAs are handled through the kernel DMA API. + * For some special ones, for example VFIO drivers, they know + * how to manage the DMA themselves and set this flag so that + * the IOMMU layer will allow them to setup and manage their + * own I/O address space. */ struct pci_driver { struct list_head node; @@ -913,6 +920,7 @@ struct pci_driver { const struct attribute_group **dev_groups; struct device_driver driver; struct pci_dynids dynids; + bool driver_managed_dma; }; static inline struct pci_driver *to_pci_driver(struct device_driver *drv) diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 7c96f169d274..b3d9c744f1e5 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -210,6 +210,14 @@ struct platform_driver { struct device_driver driver; const struct platform_device_id *id_table; bool prevent_deferred_probe; + /* + * For most device drivers, no need to care about this flag as long as + * all DMAs are handled through the kernel DMA API. For some special + * ones, for example VFIO drivers, they know how to manage the DMA + * themselves and set this flag so that the IOMMU layer will allow them + * to setup and manage their own I/O address space. + */ + bool driver_managed_dma; }; #define to_platform_driver(drv) (container_of((drv), struct platform_driver, \ @@ -328,8 +336,6 @@ extern int platform_pm_restore(struct device *dev); #define platform_pm_restore NULL #endif -extern int platform_dma_configure(struct device *dev); - #ifdef CONFIG_PM_SLEEP #define USE_PLATFORM_PM_SLEEP_OPS \ .suspend = platform_pm_suspend, \ |