From 273da5a046965ccf0ec79eb63f2d5173467e20fa Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Tue, 4 Feb 2020 14:59:25 +0100 Subject: drm/tegra: Reuse IOVA mapping where possible This partially reverts the DMA API support that was recently merged because it was causing performance regressions on older Tegra devices. Unfortunately, the cache maintenance performed by dma_map_sg() and dma_unmap_sg() causes performance to drop by a factor of 10. The right solution for this would be to cache mappings for buffers per consumer device, but that's a bit involved. Instead, we simply revert to the old behaviour of sharing IOVA mappings when we know that devices can do so (i.e. they share the same IOMMU domain). Cc: # v5.5 Reported-by: Dmitry Osipenko Signed-off-by: Thierry Reding Tested-by: Dmitry Osipenko Reviewed-by: Dmitry Osipenko --- drivers/gpu/host1x/job.c | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/host1x') diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c index 25ca54de8fc5..0d53c08e9972 100644 --- a/drivers/gpu/host1x/job.c +++ b/drivers/gpu/host1x/job.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -101,9 +102,11 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) { struct host1x_client *client = job->client; struct device *dev = client->dev; + struct iommu_domain *domain; unsigned int i; int err; + domain = iommu_get_domain_for_dev(dev); job->num_unpins = 0; for (i = 0; i < job->num_relocs; i++) { @@ -117,7 +120,19 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) goto unpin; } - if (client->group) + /* + * If the client device is not attached to an IOMMU, the + * physical address of the buffer object can be used. + * + * Similarly, when an IOMMU domain is shared between all + * host1x clients, the IOVA is already available, so no + * need to map the buffer object again. + * + * XXX Note that this isn't always safe to do because it + * relies on an assumption that no cache maintenance is + * needed on the buffer objects. + */ + if (!domain || client->group) phys = &phys_addr; else phys = NULL; @@ -176,6 +191,7 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) dma_addr_t phys_addr; unsigned long shift; struct iova *alloc; + dma_addr_t *phys; unsigned int j; g->bo = host1x_bo_get(g->bo); @@ -184,7 +200,17 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) goto unpin; } - sgt = host1x_bo_pin(host->dev, g->bo, NULL); + /** + * If the host1x is not attached to an IOMMU, there is no need + * to map the buffer object for the host1x, since the physical + * address can simply be used. + */ + if (!iommu_get_domain_for_dev(host->dev)) + phys = &phys_addr; + else + phys = NULL; + + sgt = host1x_bo_pin(host->dev, g->bo, phys); if (IS_ERR(sgt)) { err = PTR_ERR(sgt); goto unpin; @@ -214,7 +240,7 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) job->unpins[job->num_unpins].size = gather_size; phys_addr = iova_dma_addr(&host->iova, alloc); - } else { + } else if (sgt) { err = dma_map_sg(host->dev, sgt->sgl, sgt->nents, DMA_TO_DEVICE); if (!err) { -- cgit v1.2.3 From 98ae41adb252866158dd4e998551dfa85e612bed Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Tue, 4 Feb 2020 14:59:26 +0100 Subject: gpu: host1x: Set DMA direction only for DMA-mapped buffer objects The DMA direction is only used by the DMA API, so there is no use in setting it when a buffer object isn't mapped with the DMA API. Signed-off-by: Thierry Reding Tested-by: Dmitry Osipenko Reviewed-by: Dmitry Osipenko --- drivers/gpu/host1x/job.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/host1x') diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c index 0d53c08e9972..6c689b5cc32c 100644 --- a/drivers/gpu/host1x/job.c +++ b/drivers/gpu/host1x/job.c @@ -248,6 +248,7 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) goto unpin; } + job->unpins[job->num_unpins].dir = DMA_TO_DEVICE; job->unpins[job->num_unpins].dev = host->dev; phys_addr = sg_dma_address(sgt->sgl); } @@ -255,7 +256,6 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) job->addr_phys[job->num_unpins] = phys_addr; job->gather_addr_phys[i] = phys_addr; - job->unpins[job->num_unpins].dir = DMA_TO_DEVICE; job->unpins[job->num_unpins].bo = g->bo; job->unpins[job->num_unpins].sgt = sgt; job->num_unpins++; -- cgit v1.2.3