From ded467374a34eb80020c2213456b1d9ca946b88c Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 6 Apr 2011 10:53:48 +0200 Subject: x86/amd-iommu: Move compl-wait command building to own function This patch introduces a seperate function for building completion-wait commands. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel/amd_iommu.c') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 57ca77787220..eebd504519c6 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -383,6 +383,13 @@ irqreturn_t amd_iommu_int_handler(int irq, void *data) * ****************************************************************************/ +static void build_completion_wait(struct iommu_cmd *cmd) +{ + memset(cmd, 0, sizeof(*cmd)); + cmd->data[0] = CMD_COMPL_WAIT_INT_MASK; + CMD_SET_TYPE(cmd, CMD_COMPL_WAIT); +} + /* * Writes the command to the IOMMUs command buffer and informs the * hardware about the new command. Must be called with iommu->lock held. @@ -458,9 +465,7 @@ static int __iommu_completion_wait(struct amd_iommu *iommu) { struct iommu_cmd cmd; - memset(&cmd, 0, sizeof(cmd)); - cmd.data[0] = CMD_COMPL_WAIT_INT_MASK; - CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT); + build_completion_wait(&cmd); return __iommu_queue_command(iommu, &cmd); } -- cgit v1.2.3 From 94fe79e2f100bfcd8e7689cbf8838634779b80a2 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 6 Apr 2011 11:07:21 +0200 Subject: x86/amd-iommu: Move inv-dte command building to own function This patch moves command building for the invalidate-dte command into its own function. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel/amd_iommu.c') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index eebd504519c6..4e5631a433aa 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -390,6 +390,13 @@ static void build_completion_wait(struct iommu_cmd *cmd) CMD_SET_TYPE(cmd, CMD_COMPL_WAIT); } +static void build_inv_dte(struct iommu_cmd *cmd, u16 devid) +{ + memset(cmd, 0, sizeof(*cmd)); + cmd->data[0] = devid; + CMD_SET_TYPE(cmd, CMD_INV_DEV_ENTRY); +} + /* * Writes the command to the IOMMUs command buffer and informs the * hardware about the new command. Must be called with iommu->lock held. @@ -533,10 +540,7 @@ static int iommu_flush_device(struct device *dev) devid = get_device_id(dev); iommu = amd_iommu_rlookup_table[devid]; - /* Build command */ - memset(&cmd, 0, sizeof(cmd)); - CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY); - cmd.data[0] = devid; + build_inv_dte(&cmd, devid); return iommu_queue_command(iommu, &cmd); } -- cgit v1.2.3 From 11b6402c6673b530fac9920c5640c75e99fee956 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 6 Apr 2011 11:49:28 +0200 Subject: x86/amd-iommu: Cleanup inv_pages command handling This patch reworks the processing of invalidate-pages commands to the IOMMU. The function building the the command is extended so we can get rid of another function. It was also renamed to match with the other function names. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 83 ++++++++++++++++++++------------------------- 1 file changed, 36 insertions(+), 47 deletions(-) (limited to 'arch/x86/kernel/amd_iommu.c') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 4e5631a433aa..f8ec28ea3314 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -397,6 +397,37 @@ static void build_inv_dte(struct iommu_cmd *cmd, u16 devid) CMD_SET_TYPE(cmd, CMD_INV_DEV_ENTRY); } +static void build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address, + size_t size, u16 domid, int pde) +{ + u64 pages; + int s; + + pages = iommu_num_pages(address, size, PAGE_SIZE); + s = 0; + + if (pages > 1) { + /* + * If we have to flush more than one page, flush all + * TLB entries for this domain + */ + address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; + s = 1; + } + + address &= PAGE_MASK; + + memset(cmd, 0, sizeof(*cmd)); + cmd->data[1] |= domid; + cmd->data[2] = lower_32_bits(address); + cmd->data[3] = upper_32_bits(address); + CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES); + if (s) /* size bit - we flush more than one 4kb page */ + cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; + if (pde) /* PDE bit - we wan't flush everything not only the PTEs */ + cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; +} + /* * Writes the command to the IOMMUs command buffer and informs the * hardware about the new command. Must be called with iommu->lock held. @@ -545,37 +576,6 @@ static int iommu_flush_device(struct device *dev) return iommu_queue_command(iommu, &cmd); } -static void __iommu_build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address, - u16 domid, int pde, int s) -{ - memset(cmd, 0, sizeof(*cmd)); - address &= PAGE_MASK; - CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES); - cmd->data[1] |= domid; - cmd->data[2] = lower_32_bits(address); - cmd->data[3] = upper_32_bits(address); - if (s) /* size bit - we flush more than one 4kb page */ - cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; - if (pde) /* PDE bit - we wan't flush everything not only the PTEs */ - cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; -} - -/* - * Generic command send function for invalidaing TLB entries - */ -static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu, - u64 address, u16 domid, int pde, int s) -{ - struct iommu_cmd cmd; - int ret; - - __iommu_build_inv_iommu_pages(&cmd, address, domid, pde, s); - - ret = iommu_queue_command(iommu, &cmd); - - return ret; -} - /* * TLB invalidation function which is called from the mapping functions. * It invalidates a single PTE if the range to flush is within a single @@ -584,20 +584,10 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu, static void __iommu_flush_pages(struct protection_domain *domain, u64 address, size_t size, int pde) { - int s = 0, i; - unsigned long pages = iommu_num_pages(address, size, PAGE_SIZE); - - address &= PAGE_MASK; - - if (pages > 1) { - /* - * If we have to flush more than one page, flush all - * TLB entries for this domain - */ - address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; - s = 1; - } + struct iommu_cmd cmd; + int ret = 0, i; + build_inv_iommu_pages(&cmd, address, size, domain->id, pde); for (i = 0; i < amd_iommus_present; ++i) { if (!domain->dev_iommu[i]) @@ -607,11 +597,10 @@ static void __iommu_flush_pages(struct protection_domain *domain, * Devices of this domain are behind this IOMMU * We need a TLB flush */ - iommu_queue_inv_iommu_pages(amd_iommus[i], address, - domain->id, pde, s); + ret |= iommu_queue_command(amd_iommus[i], &cmd); } - return; + WARN_ON(ret); } static void iommu_flush_pages(struct protection_domain *domain, -- cgit v1.2.3 From 815b33fdc279d34ab40a8bfe1866623a4cc5669b Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 6 Apr 2011 17:26:49 +0200 Subject: x86/amd-iommu: Cleanup completion-wait handling This patch cleans up the implementation of completion-wait command sending. It also switches the completion indicator from the MMIO bit to a memory store which can be checked without IOMMU locking. As a side effect this patch makes the __iommu_queue_command function obsolete and so it is removed too. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 107 ++++++++++++-------------------------------- 1 file changed, 28 insertions(+), 79 deletions(-) (limited to 'arch/x86/kernel/amd_iommu.c') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index f8ec28ea3314..073c64b1994b 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -34,7 +35,7 @@ #define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28)) -#define EXIT_LOOP_COUNT 10000000 +#define LOOP_TIMEOUT 100000 static DEFINE_RWLOCK(amd_iommu_devtable_lock); @@ -383,10 +384,14 @@ irqreturn_t amd_iommu_int_handler(int irq, void *data) * ****************************************************************************/ -static void build_completion_wait(struct iommu_cmd *cmd) +static void build_completion_wait(struct iommu_cmd *cmd, u64 address) { + WARN_ON(address & 0x7ULL); + memset(cmd, 0, sizeof(*cmd)); - cmd->data[0] = CMD_COMPL_WAIT_INT_MASK; + cmd->data[0] = lower_32_bits(__pa(address)) | CMD_COMPL_WAIT_STORE_MASK; + cmd->data[1] = upper_32_bits(__pa(address)); + cmd->data[2] = 1; CMD_SET_TYPE(cmd, CMD_COMPL_WAIT); } @@ -432,12 +437,14 @@ static void build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address, * Writes the command to the IOMMUs command buffer and informs the * hardware about the new command. Must be called with iommu->lock held. */ -static int __iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) +static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) { + unsigned long flags; u32 tail, head; u8 *target; WARN_ON(iommu->cmd_buf_size & CMD_BUFFER_UNINITIALIZED); + spin_lock_irqsave(&iommu->lock, flags); tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); target = iommu->cmd_buf + tail; memcpy_toio(target, cmd, sizeof(*cmd)); @@ -446,99 +453,41 @@ static int __iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) if (tail == head) return -ENOMEM; writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); - - return 0; -} - -/* - * General queuing function for commands. Takes iommu->lock and calls - * __iommu_queue_command(). - */ -static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) -{ - unsigned long flags; - int ret; - - spin_lock_irqsave(&iommu->lock, flags); - ret = __iommu_queue_command(iommu, cmd); - if (!ret) - iommu->need_sync = true; + iommu->need_sync = true; spin_unlock_irqrestore(&iommu->lock, flags); - return ret; -} - -/* - * This function waits until an IOMMU has completed a completion - * wait command - */ -static void __iommu_wait_for_completion(struct amd_iommu *iommu) -{ - int ready = 0; - unsigned status = 0; - unsigned long i = 0; - - INC_STATS_COUNTER(compl_wait); - - while (!ready && (i < EXIT_LOOP_COUNT)) { - ++i; - /* wait for the bit to become one */ - status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); - ready = status & MMIO_STATUS_COM_WAIT_INT_MASK; - } - - /* set bit back to zero */ - status &= ~MMIO_STATUS_COM_WAIT_INT_MASK; - writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET); - - if (unlikely(i == EXIT_LOOP_COUNT)) - iommu->reset_in_progress = true; + return 0; } /* * This function queues a completion wait command into the command * buffer of an IOMMU */ -static int __iommu_completion_wait(struct amd_iommu *iommu) -{ - struct iommu_cmd cmd; - - build_completion_wait(&cmd); - - return __iommu_queue_command(iommu, &cmd); -} - -/* - * This function is called whenever we need to ensure that the IOMMU has - * completed execution of all commands we sent. It sends a - * COMPLETION_WAIT command and waits for it to finish. The IOMMU informs - * us about that by writing a value to a physical address we pass with - * the command. - */ static int iommu_completion_wait(struct amd_iommu *iommu) { - int ret = 0; - unsigned long flags; - - spin_lock_irqsave(&iommu->lock, flags); + struct iommu_cmd cmd; + volatile u64 sem = 0; + int ret, i = 0; if (!iommu->need_sync) - goto out; - - ret = __iommu_completion_wait(iommu); + return 0; - iommu->need_sync = false; + build_completion_wait(&cmd, (u64)&sem); + ret = iommu_queue_command(iommu, &cmd); if (ret) - goto out; - - __iommu_wait_for_completion(iommu); + return ret; -out: - spin_unlock_irqrestore(&iommu->lock, flags); + while (sem == 0 && i < LOOP_TIMEOUT) { + udelay(1); + i += 1; + } - if (iommu->reset_in_progress) + if (i == LOOP_TIMEOUT) { + pr_alert("AMD-Vi: Completion-Wait loop timed out\n"); + iommu->reset_in_progress = true; reset_iommu_command_buffer(iommu); + } return 0; } -- cgit v1.2.3 From 61985a040f17c03b09a2772508ee02729571365b Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 6 Apr 2011 17:46:49 +0200 Subject: x86/amd-iommu: Remove command buffer resetting logic The logic to reset the command buffer caused more problems than it actually helped. The logic jumped in when the IOMMU hardware doesn't execute commands anymore but the reasons for this are usually not fixed by just resetting the command buffer. So the code can be removed to reduce complexity. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) (limited to 'arch/x86/kernel/amd_iommu.c') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 073c64b1994b..0147c5c87aa8 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -58,7 +58,6 @@ struct iommu_cmd { u32 data[4]; }; -static void reset_iommu_command_buffer(struct amd_iommu *iommu); static void update_domain(struct protection_domain *domain); /**************************************************************************** @@ -323,8 +322,6 @@ static void iommu_print_event(struct amd_iommu *iommu, void *__evt) break; case EVENT_TYPE_ILL_CMD: printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address); - iommu->reset_in_progress = true; - reset_iommu_command_buffer(iommu); dump_command(address); break; case EVENT_TYPE_CMD_HARD_ERR: @@ -485,8 +482,7 @@ static int iommu_completion_wait(struct amd_iommu *iommu) if (i == LOOP_TIMEOUT) { pr_alert("AMD-Vi: Completion-Wait loop timed out\n"); - iommu->reset_in_progress = true; - reset_iommu_command_buffer(iommu); + ret = -EIO; } return 0; @@ -628,20 +624,6 @@ void amd_iommu_flush_all_domains(void) spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); } -static void reset_iommu_command_buffer(struct amd_iommu *iommu) -{ - pr_err("AMD-Vi: Resetting IOMMU command buffer\n"); - - if (iommu->reset_in_progress) - panic("AMD-Vi: ILLEGAL_COMMAND_ERROR while resetting command buffer\n"); - - amd_iommu_reset_cmd_buffer(iommu); - amd_iommu_flush_all_devices(); - amd_iommu_flush_all_domains(); - - iommu->reset_in_progress = false; -} - /**************************************************************************** * * The functions below are used the create the page table mappings for -- cgit v1.2.3 From 17b124bf1463582005d662d4dd95f037ad863c57 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 6 Apr 2011 18:01:35 +0200 Subject: x86/amd-iommu: Rename iommu_flush* to domain_flush* These functions all operate on protection domains and not on singe IOMMUs. Represent that in their name. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 87 +++++++++++++++++++++++---------------------- 1 file changed, 44 insertions(+), 43 deletions(-) (limited to 'arch/x86/kernel/amd_iommu.c') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 0147c5c87aa8..9d66b2092ae1 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -488,22 +488,6 @@ static int iommu_completion_wait(struct amd_iommu *iommu) return 0; } -static void iommu_flush_complete(struct protection_domain *domain) -{ - int i; - - for (i = 0; i < amd_iommus_present; ++i) { - if (!domain->dev_iommu[i]) - continue; - - /* - * Devices of this domain are behind this IOMMU - * We need to wait for completion of all commands. - */ - iommu_completion_wait(amd_iommus[i]); - } -} - /* * Command send function for invalidating a device table entry */ @@ -526,8 +510,8 @@ static int iommu_flush_device(struct device *dev) * It invalidates a single PTE if the range to flush is within a single * page. Otherwise it flushes the whole TLB of the IOMMU. */ -static void __iommu_flush_pages(struct protection_domain *domain, - u64 address, size_t size, int pde) +static void __domain_flush_pages(struct protection_domain *domain, + u64 address, size_t size, int pde) { struct iommu_cmd cmd; int ret = 0, i; @@ -548,29 +532,45 @@ static void __iommu_flush_pages(struct protection_domain *domain, WARN_ON(ret); } -static void iommu_flush_pages(struct protection_domain *domain, - u64 address, size_t size) +static void domain_flush_pages(struct protection_domain *domain, + u64 address, size_t size) { - __iommu_flush_pages(domain, address, size, 0); + __domain_flush_pages(domain, address, size, 0); } /* Flush the whole IO/TLB for a given protection domain */ -static void iommu_flush_tlb(struct protection_domain *domain) +static void domain_flush_tlb(struct protection_domain *domain) { - __iommu_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 0); + __domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 0); } /* Flush the whole IO/TLB for a given protection domain - including PDE */ -static void iommu_flush_tlb_pde(struct protection_domain *domain) +static void domain_flush_tlb_pde(struct protection_domain *domain) { - __iommu_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1); + __domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1); +} + +static void domain_flush_complete(struct protection_domain *domain) +{ + int i; + + for (i = 0; i < amd_iommus_present; ++i) { + if (!domain->dev_iommu[i]) + continue; + + /* + * Devices of this domain are behind this IOMMU + * We need to wait for completion of all commands. + */ + iommu_completion_wait(amd_iommus[i]); + } } /* * This function flushes the DTEs for all devices in domain */ -static void iommu_flush_domain_devices(struct protection_domain *domain) +static void domain_flush_devices(struct protection_domain *domain) { struct iommu_dev_data *dev_data; unsigned long flags; @@ -591,8 +591,8 @@ static void iommu_flush_all_domain_devices(void) spin_lock_irqsave(&amd_iommu_pd_lock, flags); list_for_each_entry(domain, &amd_iommu_pd_list, list) { - iommu_flush_domain_devices(domain); - iommu_flush_complete(domain); + domain_flush_devices(domain); + domain_flush_complete(domain); } spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); @@ -616,8 +616,8 @@ void amd_iommu_flush_all_domains(void) list_for_each_entry(domain, &amd_iommu_pd_list, list) { spin_lock(&domain->lock); - iommu_flush_tlb_pde(domain); - iommu_flush_complete(domain); + domain_flush_tlb_pde(domain); + domain_flush_complete(domain); spin_unlock(&domain->lock); } @@ -1480,7 +1480,7 @@ static int attach_device(struct device *dev, * left the caches in the IOMMU dirty. So we have to flush * here to evict all dirty stuff. */ - iommu_flush_tlb_pde(domain); + domain_flush_tlb_pde(domain); return ret; } @@ -1693,8 +1693,9 @@ static void update_domain(struct protection_domain *domain) return; update_device_table(domain); - iommu_flush_domain_devices(domain); - iommu_flush_tlb_pde(domain); + + domain_flush_devices(domain); + domain_flush_tlb_pde(domain); domain->updated = false; } @@ -1853,10 +1854,10 @@ retry: ADD_STATS_COUNTER(alloced_io_mem, size); if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) { - iommu_flush_tlb(&dma_dom->domain); + domain_flush_tlb(&dma_dom->domain); dma_dom->need_flush = false; } else if (unlikely(amd_iommu_np_cache)) - iommu_flush_pages(&dma_dom->domain, address, size); + domain_flush_pages(&dma_dom->domain, address, size); out: return address; @@ -1905,7 +1906,7 @@ static void __unmap_single(struct dma_ops_domain *dma_dom, dma_ops_free_addresses(dma_dom, dma_addr, pages); if (amd_iommu_unmap_flush || dma_dom->need_flush) { - iommu_flush_pages(&dma_dom->domain, flush_addr, size); + domain_flush_pages(&dma_dom->domain, flush_addr, size); dma_dom->need_flush = false; } } @@ -1941,7 +1942,7 @@ static dma_addr_t map_page(struct device *dev, struct page *page, if (addr == DMA_ERROR_CODE) goto out; - iommu_flush_complete(domain); + domain_flush_complete(domain); out: spin_unlock_irqrestore(&domain->lock, flags); @@ -1968,7 +1969,7 @@ static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, __unmap_single(domain->priv, dma_addr, size, dir); - iommu_flush_complete(domain); + domain_flush_complete(domain); spin_unlock_irqrestore(&domain->lock, flags); } @@ -2033,7 +2034,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, goto unmap; } - iommu_flush_complete(domain); + domain_flush_complete(domain); out: spin_unlock_irqrestore(&domain->lock, flags); @@ -2079,7 +2080,7 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, s->dma_address = s->dma_length = 0; } - iommu_flush_complete(domain); + domain_flush_complete(domain); spin_unlock_irqrestore(&domain->lock, flags); } @@ -2129,7 +2130,7 @@ static void *alloc_coherent(struct device *dev, size_t size, goto out_free; } - iommu_flush_complete(domain); + domain_flush_complete(domain); spin_unlock_irqrestore(&domain->lock, flags); @@ -2161,7 +2162,7 @@ static void free_coherent(struct device *dev, size_t size, __unmap_single(domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); - iommu_flush_complete(domain); + domain_flush_complete(domain); spin_unlock_irqrestore(&domain->lock, flags); @@ -2471,7 +2472,7 @@ static int amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova, unmap_size = iommu_unmap_page(domain, iova, page_size); mutex_unlock(&domain->api_lock); - iommu_flush_tlb_pde(domain); + domain_flush_tlb_pde(domain); return get_order(unmap_size); } -- cgit v1.2.3 From ac0ea6e92b2227c86fe4f7f9eb429071d617a25d Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 6 Apr 2011 18:38:20 +0200 Subject: x86/amd-iommu: Improve handling of full command buffer This patch improved the handling of commands when the IOMMU command buffer is nearly full. In this case it issues an completion wait command and waits until the IOMMU has processed it before continuing queuing new commands. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 88 +++++++++++++++++++++++++++++++++------------ 1 file changed, 65 insertions(+), 23 deletions(-) (limited to 'arch/x86/kernel/amd_iommu.c') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 9d66b2092ae1..75c7f8c3fe12 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -381,6 +381,39 @@ irqreturn_t amd_iommu_int_handler(int irq, void *data) * ****************************************************************************/ +static int wait_on_sem(volatile u64 *sem) +{ + int i = 0; + + while (*sem == 0 && i < LOOP_TIMEOUT) { + udelay(1); + i += 1; + } + + if (i == LOOP_TIMEOUT) { + pr_alert("AMD-Vi: Completion-Wait loop timed out\n"); + return -EIO; + } + + return 0; +} + +static void copy_cmd_to_buffer(struct amd_iommu *iommu, + struct iommu_cmd *cmd, + u32 tail) +{ + u8 *target; + + target = iommu->cmd_buf + tail; + tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size; + + /* Copy command to buffer */ + memcpy(target, cmd, sizeof(*cmd)); + + /* Tell the IOMMU about it */ + writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); +} + static void build_completion_wait(struct iommu_cmd *cmd, u64 address) { WARN_ON(address & 0x7ULL); @@ -432,25 +465,44 @@ static void build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address, /* * Writes the command to the IOMMUs command buffer and informs the - * hardware about the new command. Must be called with iommu->lock held. + * hardware about the new command. */ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) { + u32 left, tail, head, next_tail; unsigned long flags; - u32 tail, head; - u8 *target; WARN_ON(iommu->cmd_buf_size & CMD_BUFFER_UNINITIALIZED); + +again: spin_lock_irqsave(&iommu->lock, flags); - tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); - target = iommu->cmd_buf + tail; - memcpy_toio(target, cmd, sizeof(*cmd)); - tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size; - head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); - if (tail == head) - return -ENOMEM; - writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); + + head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); + tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); + next_tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size; + left = (head - next_tail) % iommu->cmd_buf_size; + + if (left <= 2) { + struct iommu_cmd sync_cmd; + volatile u64 sem = 0; + int ret; + + build_completion_wait(&sync_cmd, (u64)&sem); + copy_cmd_to_buffer(iommu, &sync_cmd, tail); + + spin_unlock_irqrestore(&iommu->lock, flags); + + if ((ret = wait_on_sem(&sem)) != 0) + return ret; + + goto again; + } + + copy_cmd_to_buffer(iommu, cmd, tail); + + /* We need to sync now to make sure all commands are processed */ iommu->need_sync = true; + spin_unlock_irqrestore(&iommu->lock, flags); return 0; @@ -464,7 +516,7 @@ static int iommu_completion_wait(struct amd_iommu *iommu) { struct iommu_cmd cmd; volatile u64 sem = 0; - int ret, i = 0; + int ret; if (!iommu->need_sync) return 0; @@ -475,17 +527,7 @@ static int iommu_completion_wait(struct amd_iommu *iommu) if (ret) return ret; - while (sem == 0 && i < LOOP_TIMEOUT) { - udelay(1); - i += 1; - } - - if (i == LOOP_TIMEOUT) { - pr_alert("AMD-Vi: Completion-Wait loop timed out\n"); - ret = -EIO; - } - - return 0; + return wait_on_sem(&sem); } /* -- cgit v1.2.3 From d8c13085775c72e2d46edc54ed0c803c3a944ddb Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 6 Apr 2011 18:51:26 +0200 Subject: x86/amd-iommu: Rename iommu_flush_device This function operates on a struct device, so give it a name that represents that. As a side effect a new function is introduced which operates on am iommu and a device-id. It will be used again in a later patch. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) (limited to 'arch/x86/kernel/amd_iommu.c') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 75c7f8c3fe12..3557f223f40b 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -530,21 +530,27 @@ static int iommu_completion_wait(struct amd_iommu *iommu) return wait_on_sem(&sem); } +static int iommu_flush_dte(struct amd_iommu *iommu, u16 devid) +{ + struct iommu_cmd cmd; + + build_inv_dte(&cmd, devid); + + return iommu_queue_command(iommu, &cmd); +} + /* * Command send function for invalidating a device table entry */ -static int iommu_flush_device(struct device *dev) +static int device_flush_dte(struct device *dev) { struct amd_iommu *iommu; - struct iommu_cmd cmd; u16 devid; devid = get_device_id(dev); iommu = amd_iommu_rlookup_table[devid]; - build_inv_dte(&cmd, devid); - - return iommu_queue_command(iommu, &cmd); + return iommu_flush_dte(iommu, devid); } /* @@ -620,7 +626,7 @@ static void domain_flush_devices(struct protection_domain *domain) spin_lock_irqsave(&domain->lock, flags); list_for_each_entry(dev_data, &domain->dev_list, list) - iommu_flush_device(dev_data->dev); + device_flush_dte(dev_data->dev); spin_unlock_irqrestore(&domain->lock, flags); } @@ -1424,7 +1430,7 @@ static void do_attach(struct device *dev, struct protection_domain *domain) domain->dev_cnt += 1; /* Flush the DTE entry */ - iommu_flush_device(dev); + device_flush_dte(dev); } static void do_detach(struct device *dev) @@ -1447,7 +1453,7 @@ static void do_detach(struct device *dev) clear_dte_entry(devid); /* Flush the DTE entry */ - iommu_flush_device(dev); + device_flush_dte(dev); } /* @@ -1663,7 +1669,7 @@ static int device_change_notifier(struct notifier_block *nb, goto out; } - iommu_flush_device(dev); + device_flush_dte(dev); iommu_completion_wait(iommu); out: @@ -2448,7 +2454,7 @@ static void amd_iommu_detach_device(struct iommu_domain *dom, if (!iommu) return; - iommu_flush_device(dev); + device_flush_dte(dev); iommu_completion_wait(iommu); } -- cgit v1.2.3 From 7d0c5cc5be73f7ce26fdcca7b8ec2203f661eb93 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 7 Apr 2011 08:16:10 +0200 Subject: x86/amd-iommu: Flush all internal TLBs when IOMMUs are enabled The old code only flushed a DTE or a domain TLB before it is actually used by the IOMMU driver. While this is efficient and works when done right it is more likely to introduce new bugs when changing code (which happened in the past). This patch adds code to flush all DTEs and all domain TLBs in each IOMMU right after it is enabled (at boot and after resume). This reduces the complexity of the driver and makes it less likely to introduce stale-TLB bugs in the future. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 75 ++++++++++++++++++++------------------------- 1 file changed, 34 insertions(+), 41 deletions(-) (limited to 'arch/x86/kernel/amd_iommu.c') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 3557f223f40b..bcf58ea55cfa 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -539,6 +539,40 @@ static int iommu_flush_dte(struct amd_iommu *iommu, u16 devid) return iommu_queue_command(iommu, &cmd); } +static void iommu_flush_dte_all(struct amd_iommu *iommu) +{ + u32 devid; + + for (devid = 0; devid <= 0xffff; ++devid) + iommu_flush_dte(iommu, devid); + + iommu_completion_wait(iommu); +} + +/* + * This function uses heavy locking and may disable irqs for some time. But + * this is no issue because it is only called during resume. + */ +static void iommu_flush_tlb_all(struct amd_iommu *iommu) +{ + u32 dom_id; + + for (dom_id = 0; dom_id <= 0xffff; ++dom_id) { + struct iommu_cmd cmd; + build_inv_iommu_pages(&cmd, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, + dom_id, 1); + iommu_queue_command(iommu, &cmd); + } + + iommu_completion_wait(iommu); +} + +void iommu_flush_all_caches(struct amd_iommu *iommu) +{ + iommu_flush_dte_all(iommu); + iommu_flush_tlb_all(iommu); +} + /* * Command send function for invalidating a device table entry */ @@ -631,47 +665,6 @@ static void domain_flush_devices(struct protection_domain *domain) spin_unlock_irqrestore(&domain->lock, flags); } -static void iommu_flush_all_domain_devices(void) -{ - struct protection_domain *domain; - unsigned long flags; - - spin_lock_irqsave(&amd_iommu_pd_lock, flags); - - list_for_each_entry(domain, &amd_iommu_pd_list, list) { - domain_flush_devices(domain); - domain_flush_complete(domain); - } - - spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); -} - -void amd_iommu_flush_all_devices(void) -{ - iommu_flush_all_domain_devices(); -} - -/* - * This function uses heavy locking and may disable irqs for some time. But - * this is no issue because it is only called during resume. - */ -void amd_iommu_flush_all_domains(void) -{ - struct protection_domain *domain; - unsigned long flags; - - spin_lock_irqsave(&amd_iommu_pd_lock, flags); - - list_for_each_entry(domain, &amd_iommu_pd_list, list) { - spin_lock(&domain->lock); - domain_flush_tlb_pde(domain); - domain_flush_complete(domain); - spin_unlock(&domain->lock); - } - - spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); -} - /**************************************************************************** * * The functions below are used the create the page table mappings for -- cgit v1.2.3 From cb41ed85efa01e633388314c03a4f3004c6b783b Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 5 Apr 2011 11:00:53 +0200 Subject: x86/amd-iommu: Flush device IOTLB if ATS is enabled This patch implements a function to flush the IOTLB on devices supporting ATS and makes sure that this TLB is also flushed if necessary. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 74 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 73 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel/amd_iommu.c') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index bcf58ea55cfa..f3ce4338dade 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -18,6 +18,7 @@ */ #include +#include #include #include #include @@ -463,6 +464,37 @@ static void build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address, cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; } +static void build_inv_iotlb_pages(struct iommu_cmd *cmd, u16 devid, int qdep, + u64 address, size_t size) +{ + u64 pages; + int s; + + pages = iommu_num_pages(address, size, PAGE_SIZE); + s = 0; + + if (pages > 1) { + /* + * If we have to flush more than one page, flush all + * TLB entries for this domain + */ + address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; + s = 1; + } + + address &= PAGE_MASK; + + memset(cmd, 0, sizeof(*cmd)); + cmd->data[0] = devid; + cmd->data[0] |= (qdep & 0xff) << 24; + cmd->data[1] = devid; + cmd->data[2] = lower_32_bits(address); + cmd->data[3] = upper_32_bits(address); + CMD_SET_TYPE(cmd, CMD_INV_IOTLB_PAGES); + if (s) + cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; +} + /* * Writes the command to the IOMMUs command buffer and informs the * hardware about the new command. @@ -573,18 +605,48 @@ void iommu_flush_all_caches(struct amd_iommu *iommu) iommu_flush_tlb_all(iommu); } +/* + * Command send function for flushing on-device TLB + */ +static int device_flush_iotlb(struct device *dev, u64 address, size_t size) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct amd_iommu *iommu; + struct iommu_cmd cmd; + u16 devid; + int qdep; + + qdep = pci_ats_queue_depth(pdev); + devid = get_device_id(dev); + iommu = amd_iommu_rlookup_table[devid]; + + build_inv_iotlb_pages(&cmd, devid, qdep, address, size); + + return iommu_queue_command(iommu, &cmd); +} + /* * Command send function for invalidating a device table entry */ static int device_flush_dte(struct device *dev) { struct amd_iommu *iommu; + struct pci_dev *pdev; u16 devid; + int ret; + pdev = to_pci_dev(dev); devid = get_device_id(dev); iommu = amd_iommu_rlookup_table[devid]; - return iommu_flush_dte(iommu, devid); + ret = iommu_flush_dte(iommu, devid); + if (ret) + return ret; + + if (pci_ats_enabled(pdev)) + ret = device_flush_iotlb(dev, 0, ~0UL); + + return ret; } /* @@ -595,6 +657,7 @@ static int device_flush_dte(struct device *dev) static void __domain_flush_pages(struct protection_domain *domain, u64 address, size_t size, int pde) { + struct iommu_dev_data *dev_data; struct iommu_cmd cmd; int ret = 0, i; @@ -611,6 +674,15 @@ static void __domain_flush_pages(struct protection_domain *domain, ret |= iommu_queue_command(amd_iommus[i], &cmd); } + list_for_each_entry(dev_data, &domain->dev_list, list) { + struct pci_dev *pdev = to_pci_dev(dev_data->dev); + + if (!pci_ats_enabled(pdev)) + continue; + + ret |= device_flush_iotlb(dev_data->dev, address, size); + } + WARN_ON(ret); } -- cgit v1.2.3 From fd7b5535e10ce820f030842da3f289f80ec0d4f3 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 5 Apr 2011 15:31:08 +0200 Subject: x86/amd-iommu: Add ATS enable/disable code This patch adds the necessary code to the AMD IOMMU driver for enabling and disabling the ATS capability on a device and to setup the IOMMU data structures correctly. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 34 ++++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel/amd_iommu.c') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index f3ce4338dade..e4791f66aa38 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1452,17 +1452,22 @@ static bool dma_ops_domain(struct protection_domain *domain) return domain->flags & PD_DMA_OPS_MASK; } -static void set_dte_entry(u16 devid, struct protection_domain *domain) +static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats) { u64 pte_root = virt_to_phys(domain->pt_root); + u32 flags = 0; pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) << DEV_ENTRY_MODE_SHIFT; pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV; - amd_iommu_dev_table[devid].data[2] = domain->id; - amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root); - amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root); + if (ats) + flags |= DTE_FLAG_IOTLB; + + amd_iommu_dev_table[devid].data[3] |= flags; + amd_iommu_dev_table[devid].data[2] = domain->id; + amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root); + amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root); } static void clear_dte_entry(u16 devid) @@ -1479,16 +1484,22 @@ static void do_attach(struct device *dev, struct protection_domain *domain) { struct iommu_dev_data *dev_data; struct amd_iommu *iommu; + struct pci_dev *pdev; + bool ats = false; u16 devid; devid = get_device_id(dev); iommu = amd_iommu_rlookup_table[devid]; dev_data = get_dev_data(dev); + pdev = to_pci_dev(dev); + + if (amd_iommu_iotlb_sup) + ats = pci_ats_enabled(pdev); /* Update data structures */ dev_data->domain = domain; list_add(&dev_data->list, &domain->dev_list); - set_dte_entry(devid, domain); + set_dte_entry(devid, domain, ats); /* Do reference counting */ domain->dev_iommu[iommu->index] += 1; @@ -1502,11 +1513,13 @@ static void do_detach(struct device *dev) { struct iommu_dev_data *dev_data; struct amd_iommu *iommu; + struct pci_dev *pdev; u16 devid; devid = get_device_id(dev); iommu = amd_iommu_rlookup_table[devid]; dev_data = get_dev_data(dev); + pdev = to_pci_dev(dev); /* decrease reference counters */ dev_data->domain->dev_iommu[iommu->index] -= 1; @@ -1581,9 +1594,13 @@ out_unlock: static int attach_device(struct device *dev, struct protection_domain *domain) { + struct pci_dev *pdev = to_pci_dev(dev); unsigned long flags; int ret; + if (amd_iommu_iotlb_sup) + pci_enable_ats(pdev, PAGE_SHIFT); + write_lock_irqsave(&amd_iommu_devtable_lock, flags); ret = __attach_device(dev, domain); write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); @@ -1640,12 +1657,16 @@ static void __detach_device(struct device *dev) */ static void detach_device(struct device *dev) { + struct pci_dev *pdev = to_pci_dev(dev); unsigned long flags; /* lock device table */ write_lock_irqsave(&amd_iommu_devtable_lock, flags); __detach_device(dev); write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); + + if (amd_iommu_iotlb_sup && pci_ats_enabled(pdev)) + pci_disable_ats(pdev); } /* @@ -1795,8 +1816,9 @@ static void update_device_table(struct protection_domain *domain) struct iommu_dev_data *dev_data; list_for_each_entry(dev_data, &domain->dev_list, list) { + struct pci_dev *pdev = to_pci_dev(dev_data->dev); u16 devid = get_device_id(dev_data->dev); - set_dte_entry(devid, domain); + set_dte_entry(devid, domain, pci_ats_enabled(pdev)); } } -- cgit v1.2.3 From 58fc7f1419560efa9c426b829c195050e0147d7f Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 11 Apr 2011 11:13:24 +0200 Subject: x86/amd-iommu: Add support for invalidate_all command This patch adds support for the invalidate_all command present in new versions of the AMD IOMMU. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel/amd_iommu.c') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index bcf58ea55cfa..d6192bcf9f09 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -463,6 +463,12 @@ static void build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address, cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; } +static void build_inv_all(struct iommu_cmd *cmd) +{ + memset(cmd, 0, sizeof(*cmd)); + CMD_SET_TYPE(cmd, CMD_INV_ALL); +} + /* * Writes the command to the IOMMUs command buffer and informs the * hardware about the new command. @@ -567,10 +573,24 @@ static void iommu_flush_tlb_all(struct amd_iommu *iommu) iommu_completion_wait(iommu); } +static void iommu_flush_all(struct amd_iommu *iommu) +{ + struct iommu_cmd cmd; + + build_inv_all(&cmd); + + iommu_queue_command(iommu, &cmd); + iommu_completion_wait(iommu); +} + void iommu_flush_all_caches(struct amd_iommu *iommu) { - iommu_flush_dte_all(iommu); - iommu_flush_tlb_all(iommu); + if (iommu_feature(iommu, FEATURE_IA)) { + iommu_flush_all(iommu); + } else { + iommu_flush_dte_all(iommu); + iommu_flush_tlb_all(iommu); + } } /* -- cgit v1.2.3