summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/host1x
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/host1x')
-rw-r--r--drivers/gpu/host1x/bus.c35
-rw-r--r--drivers/gpu/host1x/cdma.c189
-rw-r--r--drivers/gpu/host1x/cdma.h8
-rw-r--r--drivers/gpu/host1x/dev.c49
-rw-r--r--drivers/gpu/host1x/dev.h8
-rw-r--r--drivers/gpu/host1x/hw/cdma_hw.c46
-rw-r--r--drivers/gpu/host1x/hw/channel_hw.c43
-rw-r--r--drivers/gpu/host1x/hw/host1x06_hardware.h6
-rw-r--r--drivers/gpu/host1x/hw/host1x07_hardware.h6
-rw-r--r--drivers/gpu/host1x/hw/hw_host1x06_channel.h11
-rw-r--r--drivers/gpu/host1x/hw/hw_host1x07_channel.h11
11 files changed, 336 insertions, 76 deletions
diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c
index b4c385d4a6af..103fffc1904b 100644
--- a/drivers/gpu/host1x/bus.c
+++ b/drivers/gpu/host1x/bus.c
@@ -15,8 +15,10 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#include <linux/debugfs.h>
#include <linux/host1x.h>
#include <linux/of.h>
+#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/of_device.h>
@@ -500,6 +502,36 @@ static void host1x_detach_driver(struct host1x *host1x,
mutex_unlock(&host1x->devices_lock);
}
+static int host1x_devices_show(struct seq_file *s, void *data)
+{
+ struct host1x *host1x = s->private;
+ struct host1x_device *device;
+
+ mutex_lock(&host1x->devices_lock);
+
+ list_for_each_entry(device, &host1x->devices, list) {
+ struct host1x_subdev *subdev;
+
+ seq_printf(s, "%s\n", dev_name(&device->dev));
+
+ mutex_lock(&device->subdevs_lock);
+
+ list_for_each_entry(subdev, &device->active, list)
+ seq_printf(s, " %pOFf: %s\n", subdev->np,
+ dev_name(subdev->client->dev));
+
+ list_for_each_entry(subdev, &device->subdevs, list)
+ seq_printf(s, " %pOFf:\n", subdev->np);
+
+ mutex_unlock(&device->subdevs_lock);
+ }
+
+ mutex_unlock(&host1x->devices_lock);
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(host1x_devices);
+
/**
* host1x_register() - register a host1x controller
* @host1x: host1x controller
@@ -523,6 +555,9 @@ int host1x_register(struct host1x *host1x)
mutex_unlock(&drivers_lock);
+ debugfs_create_file("devices", S_IRUGO, host1x->debugfs, host1x,
+ &host1x_devices_fops);
+
return 0;
}
diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c
index 91df51e631b2..f45b7c69b694 100644
--- a/drivers/gpu/host1x/cdma.c
+++ b/drivers/gpu/host1x/cdma.c
@@ -41,7 +41,17 @@
* means that the push buffer is full, not empty.
*/
-#define HOST1X_PUSHBUFFER_SLOTS 512
+/*
+ * Typically the commands written into the push buffer are a pair of words. We
+ * use slots to represent each of these pairs and to simplify things. Note the
+ * strange number of slots allocated here. 512 slots will fit exactly within a
+ * single memory page. We also need one additional word at the end of the push
+ * buffer for the RESTART opcode that will instruct the CDMA to jump back to
+ * the beginning of the push buffer. With 512 slots, this means that we'll use
+ * 2 memory pages and waste 4092 bytes of the second page that will never be
+ * used.
+ */
+#define HOST1X_PUSHBUFFER_SLOTS 511
/*
* Clean up push buffer resources
@@ -143,7 +153,10 @@ static void host1x_pushbuffer_push(struct push_buffer *pb, u32 op1, u32 op2)
WARN_ON(pb->pos == pb->fence);
*(p++) = op1;
*(p++) = op2;
- pb->pos = (pb->pos + 8) & (pb->size - 1);
+ pb->pos += 8;
+
+ if (pb->pos >= pb->size)
+ pb->pos -= pb->size;
}
/*
@@ -153,7 +166,10 @@ static void host1x_pushbuffer_push(struct push_buffer *pb, u32 op1, u32 op2)
static void host1x_pushbuffer_pop(struct push_buffer *pb, unsigned int slots)
{
/* Advance the next write position */
- pb->fence = (pb->fence + slots * 8) & (pb->size - 1);
+ pb->fence += slots * 8;
+
+ if (pb->fence >= pb->size)
+ pb->fence -= pb->size;
}
/*
@@ -161,7 +177,12 @@ static void host1x_pushbuffer_pop(struct push_buffer *pb, unsigned int slots)
*/
static u32 host1x_pushbuffer_space(struct push_buffer *pb)
{
- return ((pb->fence - pb->pos) & (pb->size - 1)) / 8;
+ unsigned int fence = pb->fence;
+
+ if (pb->fence < pb->pos)
+ fence += pb->size;
+
+ return (fence - pb->pos) / 8;
}
/*
@@ -210,7 +231,7 @@ unsigned int host1x_cdma_wait_locked(struct host1x_cdma *cdma,
cdma->event = event;
mutex_unlock(&cdma->lock);
- down(&cdma->sem);
+ wait_for_completion(&cdma->complete);
mutex_lock(&cdma->lock);
}
@@ -218,6 +239,45 @@ unsigned int host1x_cdma_wait_locked(struct host1x_cdma *cdma,
}
/*
+ * Sleep (if necessary) until the push buffer has enough free space.
+ *
+ * Must be called with the cdma lock held.
+ */
+int host1x_cdma_wait_pushbuffer_space(struct host1x *host1x,
+ struct host1x_cdma *cdma,
+ unsigned int needed)
+{
+ while (true) {
+ struct push_buffer *pb = &cdma->push_buffer;
+ unsigned int space;
+
+ space = host1x_pushbuffer_space(pb);
+ if (space >= needed)
+ break;
+
+ trace_host1x_wait_cdma(dev_name(cdma_to_channel(cdma)->dev),
+ CDMA_EVENT_PUSH_BUFFER_SPACE);
+
+ host1x_hw_cdma_flush(host1x, cdma);
+
+ /* If somebody has managed to already start waiting, yield */
+ if (cdma->event != CDMA_EVENT_NONE) {
+ mutex_unlock(&cdma->lock);
+ schedule();
+ mutex_lock(&cdma->lock);
+ continue;
+ }
+
+ cdma->event = CDMA_EVENT_PUSH_BUFFER_SPACE;
+
+ mutex_unlock(&cdma->lock);
+ wait_for_completion(&cdma->complete);
+ mutex_lock(&cdma->lock);
+ }
+
+ return 0;
+}
+/*
* Start timer that tracks the time spent by the job.
* Must be called with the cdma lock held.
*/
@@ -314,7 +374,7 @@ static void update_cdma_locked(struct host1x_cdma *cdma)
if (signal) {
cdma->event = CDMA_EVENT_NONE;
- up(&cdma->sem);
+ complete(&cdma->complete);
}
}
@@ -323,7 +383,7 @@ void host1x_cdma_update_sync_queue(struct host1x_cdma *cdma,
{
struct host1x *host1x = cdma_to_host1x(cdma);
u32 restart_addr, syncpt_incrs, syncpt_val;
- struct host1x_job *job = NULL;
+ struct host1x_job *job, *next_job = NULL;
syncpt_val = host1x_syncpt_load(cdma->timeout.syncpt);
@@ -341,40 +401,37 @@ void host1x_cdma_update_sync_queue(struct host1x_cdma *cdma,
__func__);
list_for_each_entry(job, &cdma->sync_queue, list) {
- if (syncpt_val < job->syncpt_end)
- break;
+ if (syncpt_val < job->syncpt_end) {
+
+ if (!list_is_last(&job->list, &cdma->sync_queue))
+ next_job = list_next_entry(job, list);
+
+ goto syncpt_incr;
+ }
host1x_job_dump(dev, job);
}
+ /* all jobs have been completed */
+ job = NULL;
+
+syncpt_incr:
+
/*
- * Walk the sync_queue, first incrementing with the CPU syncpts that
- * are partially executed (the first buffer) or fully skipped while
- * still in the current context (slots are also NOP-ed).
+ * Increment with CPU the remaining syncpts of a partially executed job.
*
- * At the point contexts are interleaved, syncpt increments must be
- * done inline with the pushbuffer from a GATHER buffer to maintain
- * the order (slots are modified to be a GATHER of syncpt incrs).
- *
- * Note: save in restart_addr the location where the timed out buffer
- * started in the PB, so we can start the refetch from there (with the
- * modified NOP-ed PB slots). This lets things appear to have completed
- * properly for this buffer and resources are freed.
+ * CDMA will continue execution starting with the next job or will get
+ * into idle state.
*/
-
- dev_dbg(dev, "%s: perform CPU incr on pending same ctx buffers\n",
- __func__);
-
- if (!list_empty(&cdma->sync_queue))
- restart_addr = job->first_get;
+ if (next_job)
+ restart_addr = next_job->first_get;
else
restart_addr = cdma->last_pos;
- /* do CPU increments as long as this context continues */
- list_for_each_entry_from(job, &cdma->sync_queue, list) {
- /* different context, gets us out of this loop */
- if (job->client != cdma->timeout.client)
- break;
+ /* do CPU increments for the remaining syncpts */
+ if (job) {
+ dev_dbg(dev, "%s: perform CPU incr on pending buffers\n",
+ __func__);
/* won't need a timeout when replayed */
job->timeout = 0;
@@ -389,21 +446,10 @@ void host1x_cdma_update_sync_queue(struct host1x_cdma *cdma,
syncpt_incrs, job->syncpt_end,
job->num_slots);
- syncpt_val += syncpt_incrs;
+ dev_dbg(dev, "%s: finished sync_queue modification\n",
+ __func__);
}
- /*
- * The following sumbits from the same client may be dependent on the
- * failed submit and therefore they may fail. Force a small timeout
- * to make the queue cleanup faster.
- */
-
- list_for_each_entry_from(job, &cdma->sync_queue, list)
- if (job->client == cdma->timeout.client)
- job->timeout = min_t(unsigned int, job->timeout, 500);
-
- dev_dbg(dev, "%s: finished sync_queue modification\n", __func__);
-
/* roll back DMAGET and start up channel again */
host1x_hw_cdma_resume(host1x, cdma, restart_addr);
}
@@ -416,7 +462,7 @@ int host1x_cdma_init(struct host1x_cdma *cdma)
int err;
mutex_init(&cdma->lock);
- sema_init(&cdma->sem, 0);
+ init_completion(&cdma->complete);
INIT_LIST_HEAD(&cdma->sync_queue);
@@ -510,6 +556,59 @@ void host1x_cdma_push(struct host1x_cdma *cdma, u32 op1, u32 op2)
}
/*
+ * Push four words into two consecutive push buffer slots. Note that extra
+ * care needs to be taken not to split the two slots across the end of the
+ * push buffer. Otherwise the RESTART opcode at the end of the push buffer
+ * that ensures processing will restart at the beginning will break up the
+ * four words.
+ *
+ * Blocks as necessary if the push buffer is full.
+ */
+void host1x_cdma_push_wide(struct host1x_cdma *cdma, u32 op1, u32 op2,
+ u32 op3, u32 op4)
+{
+ struct host1x_channel *channel = cdma_to_channel(cdma);
+ struct host1x *host1x = cdma_to_host1x(cdma);
+ struct push_buffer *pb = &cdma->push_buffer;
+ unsigned int needed = 2, extra = 0, i;
+ unsigned int space = cdma->slots_free;
+
+ if (host1x_debug_trace_cmdbuf)
+ trace_host1x_cdma_push_wide(dev_name(channel->dev), op1, op2,
+ op3, op4);
+
+ /* compute number of extra slots needed for padding */
+ if (pb->pos + 16 > pb->size) {
+ extra = (pb->size - pb->pos) / 8;
+ needed += extra;
+ }
+
+ host1x_cdma_wait_pushbuffer_space(host1x, cdma, needed);
+ space = host1x_pushbuffer_space(pb);
+
+ cdma->slots_free = space - needed;
+ cdma->slots_used += needed;
+
+ /*
+ * Note that we rely on the fact that this is only used to submit wide
+ * gather opcodes, which consist of 3 words, and they are padded with
+ * a NOP to avoid having to deal with fractional slots (a slot always
+ * represents 2 words). The fourth opcode passed to this function will
+ * therefore always be a NOP.
+ *
+ * This works around a slight ambiguity when it comes to opcodes. For
+ * all current host1x incarnations the NOP opcode uses the exact same
+ * encoding (0x20000000), so we could hard-code the value here, but a
+ * new incarnation may change it and break that assumption.
+ */
+ for (i = 0; i < extra; i++)
+ host1x_pushbuffer_push(pb, op4, op4);
+
+ host1x_pushbuffer_push(pb, op1, op2);
+ host1x_pushbuffer_push(pb, op3, op4);
+}
+
+/*
* End a cdma submit
* Kick off DMA, add job to the sync queue, and a number of slots to be freed
* from the pushbuffer. The handles for a submit must all be pinned at the same
diff --git a/drivers/gpu/host1x/cdma.h b/drivers/gpu/host1x/cdma.h
index e97e17b82370..3a5e0408b8d1 100644
--- a/drivers/gpu/host1x/cdma.h
+++ b/drivers/gpu/host1x/cdma.h
@@ -20,7 +20,7 @@
#define __HOST1X_CDMA_H
#include <linux/sched.h>
-#include <linux/semaphore.h>
+#include <linux/completion.h>
#include <linux/list.h>
struct host1x_syncpt;
@@ -69,8 +69,8 @@ enum cdma_event {
struct host1x_cdma {
struct mutex lock; /* controls access to shared state */
- struct semaphore sem; /* signalled when event occurs */
- enum cdma_event event; /* event that sem is waiting for */
+ struct completion complete; /* signalled when event occurs */
+ enum cdma_event event; /* event that complete is waiting for */
unsigned int slots_used; /* pb slots used in current submit */
unsigned int slots_free; /* pb slots free in current submit */
unsigned int first_get; /* DMAGET value, where submit begins */
@@ -90,6 +90,8 @@ int host1x_cdma_init(struct host1x_cdma *cdma);
int host1x_cdma_deinit(struct host1x_cdma *cdma);
int host1x_cdma_begin(struct host1x_cdma *cdma, struct host1x_job *job);
void host1x_cdma_push(struct host1x_cdma *cdma, u32 op1, u32 op2);
+void host1x_cdma_push_wide(struct host1x_cdma *cdma, u32 op1, u32 op2,
+ u32 op3, u32 op4);
void host1x_cdma_end(struct host1x_cdma *cdma, struct host1x_job *job);
void host1x_cdma_update(struct host1x_cdma *cdma);
void host1x_cdma_peek(struct host1x_cdma *cdma, u32 dmaget, int slot,
diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index 419d8929a98f..ee3c7b81a29d 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -120,6 +120,15 @@ static const struct host1x_info host1x05_info = {
.dma_mask = DMA_BIT_MASK(34),
};
+static const struct host1x_sid_entry tegra186_sid_table[] = {
+ {
+ /* VIC */
+ .base = 0x1af0,
+ .offset = 0x30,
+ .limit = 0x34
+ },
+};
+
static const struct host1x_info host1x06_info = {
.nb_channels = 63,
.nb_pts = 576,
@@ -127,8 +136,19 @@ static const struct host1x_info host1x06_info = {
.nb_bases = 16,
.init = host1x06_init,
.sync_offset = 0x0,
- .dma_mask = DMA_BIT_MASK(34),
+ .dma_mask = DMA_BIT_MASK(40),
.has_hypervisor = true,
+ .num_sid_entries = ARRAY_SIZE(tegra186_sid_table),
+ .sid_table = tegra186_sid_table,
+};
+
+static const struct host1x_sid_entry tegra194_sid_table[] = {
+ {
+ /* VIC */
+ .base = 0x1af0,
+ .offset = 0x30,
+ .limit = 0x34
+ },
};
static const struct host1x_info host1x07_info = {
@@ -140,6 +160,8 @@ static const struct host1x_info host1x07_info = {
.sync_offset = 0x0,
.dma_mask = DMA_BIT_MASK(40),
.has_hypervisor = true,
+ .num_sid_entries = ARRAY_SIZE(tegra194_sid_table),
+ .sid_table = tegra194_sid_table,
};
static const struct of_device_id host1x_of_match[] = {
@@ -154,6 +176,19 @@ static const struct of_device_id host1x_of_match[] = {
};
MODULE_DEVICE_TABLE(of, host1x_of_match);
+static void host1x_setup_sid_table(struct host1x *host)
+{
+ const struct host1x_info *info = host->info;
+ unsigned int i;
+
+ for (i = 0; i < info->num_sid_entries; i++) {
+ const struct host1x_sid_entry *entry = &info->sid_table[i];
+
+ host1x_hypervisor_writel(host, entry->offset, entry->base);
+ host1x_hypervisor_writel(host, entry->limit, entry->base + 4);
+ }
+}
+
static int host1x_probe(struct platform_device *pdev)
{
struct host1x *host;
@@ -248,6 +283,8 @@ static int host1x_probe(struct platform_device *pdev)
host->group = iommu_group_get(&pdev->dev);
if (host->group) {
struct iommu_domain_geometry *geometry;
+ u64 mask = dma_get_mask(host->dev);
+ dma_addr_t start, end;
unsigned long order;
err = iova_cache_get();
@@ -275,11 +312,12 @@ static int host1x_probe(struct platform_device *pdev)
}
geometry = &host->domain->geometry;
+ start = geometry->aperture_start & mask;
+ end = geometry->aperture_end & mask;
order = __ffs(host->domain->pgsize_bitmap);
- init_iova_domain(&host->iova, 1UL << order,
- geometry->aperture_start >> order);
- host->iova_end = geometry->aperture_end;
+ init_iova_domain(&host->iova, 1UL << order, start >> order);
+ host->iova_end = end;
}
skip_iommu:
@@ -316,6 +354,9 @@ skip_iommu:
host1x_debug_init(host);
+ if (host->info->has_hypervisor)
+ host1x_setup_sid_table(host);
+
err = host1x_register(host);
if (err < 0)
goto fail_deinit_intr;
diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
index 36f44ffebe73..05216a7e4830 100644
--- a/drivers/gpu/host1x/dev.h
+++ b/drivers/gpu/host1x/dev.h
@@ -94,6 +94,12 @@ struct host1x_intr_ops {
int (*free_syncpt_irq)(struct host1x *host);
};
+struct host1x_sid_entry {
+ unsigned int base;
+ unsigned int offset;
+ unsigned int limit;
+};
+
struct host1x_info {
unsigned int nb_channels; /* host1x: number of channels supported */
unsigned int nb_pts; /* host1x: number of syncpoints supported */
@@ -103,6 +109,8 @@ struct host1x_info {
unsigned int sync_offset; /* offset of syncpoint registers */
u64 dma_mask; /* mask of addressable memory */
bool has_hypervisor; /* has hypervisor registers */
+ unsigned int num_sid_entries;
+ const struct host1x_sid_entry *sid_table;
};
struct host1x {
diff --git a/drivers/gpu/host1x/hw/cdma_hw.c b/drivers/gpu/host1x/hw/cdma_hw.c
index ce320534cbed..5d61088db2bb 100644
--- a/drivers/gpu/host1x/hw/cdma_hw.c
+++ b/drivers/gpu/host1x/hw/cdma_hw.c
@@ -39,8 +39,6 @@ static void push_buffer_init(struct push_buffer *pb)
static void cdma_timeout_cpu_incr(struct host1x_cdma *cdma, u32 getptr,
u32 syncpt_incrs, u32 syncval, u32 nr_slots)
{
- struct host1x *host1x = cdma_to_host1x(cdma);
- struct push_buffer *pb = &cdma->push_buffer;
unsigned int i;
for (i = 0; i < syncpt_incrs; i++)
@@ -48,18 +46,6 @@ static void cdma_timeout_cpu_incr(struct host1x_cdma *cdma, u32 getptr,
/* after CPU incr, ensure shadow is up to date */
host1x_syncpt_load(cdma->timeout.syncpt);
-
- /* NOP all the PB slots */
- while (nr_slots--) {
- u32 *p = (u32 *)(pb->mapped + getptr);
- *(p++) = HOST1X_OPCODE_NOP;
- *(p++) = HOST1X_OPCODE_NOP;
- dev_dbg(host1x->dev, "%s: NOP at %pad+%#x\n", __func__,
- &pb->dma, getptr);
- getptr = (getptr + 8) & (pb->size - 1);
- }
-
- wmb();
}
/*
@@ -68,20 +54,31 @@ static void cdma_timeout_cpu_incr(struct host1x_cdma *cdma, u32 getptr,
static void cdma_start(struct host1x_cdma *cdma)
{
struct host1x_channel *ch = cdma_to_channel(cdma);
+ u64 start, end;
if (cdma->running)
return;
cdma->last_pos = cdma->push_buffer.pos;
+ start = cdma->push_buffer.dma;
+ end = cdma->push_buffer.size + 4;
host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP,
HOST1X_CHANNEL_DMACTRL);
/* set base, put and end pointer */
- host1x_ch_writel(ch, cdma->push_buffer.dma, HOST1X_CHANNEL_DMASTART);
+ host1x_ch_writel(ch, lower_32_bits(start), HOST1X_CHANNEL_DMASTART);
+#if HOST1X_HW >= 6
+ host1x_ch_writel(ch, upper_32_bits(start), HOST1X_CHANNEL_DMASTART_HI);
+#endif
host1x_ch_writel(ch, cdma->push_buffer.pos, HOST1X_CHANNEL_DMAPUT);
- host1x_ch_writel(ch, cdma->push_buffer.dma + cdma->push_buffer.size + 4,
- HOST1X_CHANNEL_DMAEND);
+#if HOST1X_HW >= 6
+ host1x_ch_writel(ch, 0, HOST1X_CHANNEL_DMAPUT_HI);
+#endif
+ host1x_ch_writel(ch, lower_32_bits(end), HOST1X_CHANNEL_DMAEND);
+#if HOST1X_HW >= 6
+ host1x_ch_writel(ch, upper_32_bits(end), HOST1X_CHANNEL_DMAEND_HI);
+#endif
/* reset GET */
host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP |
@@ -104,6 +101,7 @@ static void cdma_timeout_restart(struct host1x_cdma *cdma, u32 getptr)
{
struct host1x *host1x = cdma_to_host1x(cdma);
struct host1x_channel *ch = cdma_to_channel(cdma);
+ u64 start, end;
if (cdma->running)
return;
@@ -113,10 +111,18 @@ static void cdma_timeout_restart(struct host1x_cdma *cdma, u32 getptr)
host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP,
HOST1X_CHANNEL_DMACTRL);
+ start = cdma->push_buffer.dma;
+ end = cdma->push_buffer.size + 4;
+
/* set base, end pointer (all of memory) */
- host1x_ch_writel(ch, cdma->push_buffer.dma, HOST1X_CHANNEL_DMASTART);
- host1x_ch_writel(ch, cdma->push_buffer.dma + cdma->push_buffer.size,
- HOST1X_CHANNEL_DMAEND);
+ host1x_ch_writel(ch, lower_32_bits(start), HOST1X_CHANNEL_DMASTART);
+#if HOST1X_HW >= 6
+ host1x_ch_writel(ch, upper_32_bits(start), HOST1X_CHANNEL_DMASTART_HI);
+#endif
+ host1x_ch_writel(ch, lower_32_bits(end), HOST1X_CHANNEL_DMAEND);
+#if HOST1X_HW >= 6
+ host1x_ch_writel(ch, upper_32_bits(end), HOST1X_CHANNEL_DMAEND_HI);
+#endif
/* set GET, by loading the value in PUT (then reset GET) */
host1x_ch_writel(ch, getptr, HOST1X_CHANNEL_DMAPUT);
diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c
index 95ea81172a83..27101c04a827 100644
--- a/drivers/gpu/host1x/hw/channel_hw.c
+++ b/drivers/gpu/host1x/hw/channel_hw.c
@@ -17,6 +17,7 @@
*/
#include <linux/host1x.h>
+#include <linux/iommu.h>
#include <linux/slab.h>
#include <trace/events/host1x.h>
@@ -60,15 +61,37 @@ static void trace_write_gather(struct host1x_cdma *cdma, struct host1x_bo *bo,
static void submit_gathers(struct host1x_job *job)
{
struct host1x_cdma *cdma = &job->channel->cdma;
+#if HOST1X_HW < 6
+ struct device *dev = job->channel->dev;
+#endif
unsigned int i;
for (i = 0; i < job->num_gathers; i++) {
struct host1x_job_gather *g = &job->gathers[i];
- u32 op1 = host1x_opcode_gather(g->words);
- u32 op2 = g->base + g->offset;
+ dma_addr_t addr = g->base + g->offset;
+ u32 op2, op3;
+
+ op2 = lower_32_bits(addr);
+ op3 = upper_32_bits(addr);
+
+ trace_write_gather(cdma, g->bo, g->offset, g->words);
+
+ if (op3 != 0) {
+#if HOST1X_HW >= 6
+ u32 op1 = host1x_opcode_gather_wide(g->words);
+ u32 op4 = HOST1X_OPCODE_NOP;
+
+ host1x_cdma_push_wide(cdma, op1, op2, op3, op4);
+#else
+ dev_err(dev, "invalid gather for push buffer %pad\n",
+ &addr);
+ continue;
+#endif
+ } else {
+ u32 op1 = host1x_opcode_gather(g->words);
- trace_write_gather(cdma, g->bo, g->offset, op1 & 0xffff);
- host1x_cdma_push(cdma, op1, op2);
+ host1x_cdma_push(cdma, op1, op2);
+ }
}
}
@@ -89,6 +112,16 @@ static inline void synchronize_syncpt_base(struct host1x_job *job)
HOST1X_UCLASS_LOAD_SYNCPT_BASE_VALUE_F(value));
}
+static void host1x_channel_set_streamid(struct host1x_channel *channel)
+{
+#if HOST1X_HW >= 6
+ struct iommu_fwspec *spec = dev_iommu_fwspec_get(channel->dev->parent);
+ u32 sid = spec ? spec->ids[0] & 0xffff : 0x7f;
+
+ host1x_ch_writel(channel, sid, HOST1X_CHANNEL_SMMU_STREAMID);
+#endif
+}
+
static int channel_submit(struct host1x_job *job)
{
struct host1x_channel *ch = job->channel;
@@ -120,6 +153,8 @@ static int channel_submit(struct host1x_job *job)
goto error;
}
+ host1x_channel_set_streamid(ch);
+
/* begin a CDMA submit */
err = host1x_cdma_begin(&ch->cdma, job);
if (err) {
diff --git a/drivers/gpu/host1x/hw/host1x06_hardware.h b/drivers/gpu/host1x/hw/host1x06_hardware.h
index 3039c92ea605..dd37b10c8d04 100644
--- a/drivers/gpu/host1x/hw/host1x06_hardware.h
+++ b/drivers/gpu/host1x/hw/host1x06_hardware.h
@@ -22,6 +22,7 @@
#include <linux/types.h>
#include <linux/bitops.h>
+#include "hw_host1x06_channel.h"
#include "hw_host1x06_uclass.h"
#include "hw_host1x06_vm.h"
#include "hw_host1x06_hypervisor.h"
@@ -137,6 +138,11 @@ static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count)
return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count;
}
+static inline u32 host1x_opcode_gather_wide(unsigned count)
+{
+ return (12 << 28) | count;
+}
+
#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0)
#endif
diff --git a/drivers/gpu/host1x/hw/host1x07_hardware.h b/drivers/gpu/host1x/hw/host1x07_hardware.h
index 1353e7ab71dd..9f6da4ee5443 100644
--- a/drivers/gpu/host1x/hw/host1x07_hardware.h
+++ b/drivers/gpu/host1x/hw/host1x07_hardware.h
@@ -22,6 +22,7 @@
#include <linux/types.h>
#include <linux/bitops.h>
+#include "hw_host1x07_channel.h"
#include "hw_host1x07_uclass.h"
#include "hw_host1x07_vm.h"
#include "hw_host1x07_hypervisor.h"
@@ -137,6 +138,11 @@ static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count)
return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count;
}
+static inline u32 host1x_opcode_gather_wide(unsigned count)
+{
+ return (12 << 28) | count;
+}
+
#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0)
#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x06_channel.h b/drivers/gpu/host1x/hw/hw_host1x06_channel.h
new file mode 100644
index 000000000000..18ae1c57bbea
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x06_channel.h
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 NVIDIA Corporation.
+ */
+
+#ifndef HOST1X_HW_HOST1X06_CHANNEL_H
+#define HOST1X_HW_HOST1X06_CHANNEL_H
+
+#define HOST1X_CHANNEL_SMMU_STREAMID 0x084
+
+#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x07_channel.h b/drivers/gpu/host1x/hw/hw_host1x07_channel.h
new file mode 100644
index 000000000000..96fa72bbd7ab
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x07_channel.h
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 NVIDIA Corporation.
+ */
+
+#ifndef HOST1X_HW_HOST1X07_CHANNEL_H
+#define HOST1X_HW_HOST1X07_CHANNEL_H
+
+#define HOST1X_CHANNEL_SMMU_STREAMID 0x084
+
+#endif