summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/host1x
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-09-01 11:26:46 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-09-01 11:26:46 -0700
commit477f70cd2a67904e04c2c2b9bd0fa2e95222f2f6 (patch)
tree1897dd1de49e1ea24897163533e2d8ead5dad0ad /drivers/gpu/host1x
parent835d31d319d9c8c4eb6cac074643360ba0ecab10 (diff)
parent8f0284f190e6a0aa09015090568c03f18288231a (diff)
downloadlinux-stable-477f70cd2a67904e04c2c2b9bd0fa2e95222f2f6.tar.gz
linux-stable-477f70cd2a67904e04c2c2b9bd0fa2e95222f2f6.tar.bz2
linux-stable-477f70cd2a67904e04c2c2b9bd0fa2e95222f2f6.zip
Merge tag 'drm-next-2021-08-31-1' of git://anongit.freedesktop.org/drm/drm
Pull drm updates from Dave Airlie: "Highlights: - i915 has seen a lot of refactoring and uAPI cleanups due to a change in the upstream direction going forward This has all been audited with known userspace, but there may be some pitfalls that were missed. - i915 now uses common TTM to enable discrete memory on DG1/2 GPUs - i915 enables Jasper and Elkhart Lake by default and has preliminary XeHP/DG2 support - amdgpu adds support for Cyan Skillfish - lots of implicit fencing rules documented and fixed up in drivers - msm now uses the core scheduler - the irq midlayer has been removed for non-legacy drivers - the sysfb code now works on more than x86. Otherwise the usual smattering of stuff everywhere, panels, bridges, refactorings. Detailed summary: core: - extract i915 eDP backlight into core - DP aux bus support - drm_device.irq_enabled removed - port drivers to native irq interfaces - export gem shadow plane handling for vgem - print proper driver name in framebuffer registration - driver fixes for implicit fencing rules - ARM fixed rate compression modifier added - updated fb damage handling - rmfb ioctl logging/docs - drop drm_gem_object_put_locked - define DRM_FORMAT_MAX_PLANES - add gem fb vmap/vunmap helpers - add lockdep_assert(once) helpers - mark drm irq midlayer as legacy - use offset adjusted bo mapping conversion vgaarb: - cleanups fbdev: - extend efifb handling to all arches - div by 0 fixes for multiple drivers udmabuf: - add hugepage mapping support dma-buf: - non-dynamic exporter fixups - document implicit fencing rules amdgpu: - Initial Cyan Skillfish support - switch virtual DCE over to vkms based atomic - VCN/JPEG power down fixes - NAVI PCIE link handling fixes - AMD HDMI freesync fixes - Yellow Carp + Beige Goby fixes - Clockgating/S0ix/SMU/EEPROM fixes - embed hw fence in job - rework dma-resv handling - ensure eviction to system ram amdkfd: - uapi: SVM address range query added - sysfs leak fix - GPUVM TLB optimizations - vmfault/migration counters i915: - Enable JSL and EHL by default - preliminary XeHP/DG2 support - remove all CNL support (never shipped) - move to TTM for discrete memory support - allow mixed object mmap handling - GEM uAPI spring cleaning - add I915_MMAP_OBJECT_FIXED - reinstate ADL-P mmap ioctls - drop a bunch of unused by userspace features - disable and remove GPU relocations - revert some i915 misfeatures - major refactoring of GuC for Gen11+ - execbuffer object locking separate step - reject caching/set-domain on discrete - Enable pipe DMC loading on XE-LPD and ADL-P - add PSF GV point support - Refactor and fix DDI buffer translations - Clean up FBC CFB allocation code - Finish INTEL_GEN() and friends macro conversions nouveau: - add eDP backlight support - implicit fence fix msm: - a680/7c3 support - drm/scheduler conversion panfrost: - rework GPU reset virtio: - fix fencing for planes ast: - add detect support bochs: - move to tiny GPU driver vc4: - use hotplug irqs - HDMI codec support vmwgfx: - use internal vmware device headers ingenic: - demidlayering irq rcar-du: - shutdown fixes - convert to bridge connector helpers zynqmp-dsub: - misc fixes mgag200: - convert PLL handling to atomic mediatek: - MT8133 AAL support - gem mmap object support - MT8167 support etnaviv: - NXP Layerscape LS1028A SoC support - GEM mmap cleanups tegra: - new user API exynos: - missing unlock fix - build warning fix - use refcount_t" * tag 'drm-next-2021-08-31-1' of git://anongit.freedesktop.org/drm/drm: (1318 commits) drm/amd/display: Move AllowDRAMSelfRefreshOrDRAMClockChangeInVblank to bounding box drm/amd/display: Remove duplicate dml init drm/amd/display: Update bounding box states (v2) drm/amd/display: Update number of DCN3 clock states drm/amdgpu: disable GFX CGCG in aldebaran drm/amdgpu: Clear RAS interrupt status on aldebaran drm/amdgpu: Add support for RAS XGMI err query drm/amdkfd: Account for SH/SE count when setting up cu masks. drm/amdgpu: rename amdgpu_bo_get_preferred_pin_domain drm/amdgpu: drop redundant cancel_delayed_work_sync call drm/amdgpu: add missing cleanups for more ASICs on UVD/VCE suspend drm/amdgpu: add missing cleanups for Polaris12 UVD/VCE on suspend drm/amdkfd: map SVM range with correct access permission drm/amdkfd: check access permisson to restore retry fault drm/amdgpu: Update RAS XGMI Error Query drm/amdgpu: Add driver infrastructure for MCA RAS drm/amd/display: Add Logging for HDMI color depth information drm/amd/amdgpu: consolidate PSP TA init shared buf functions drm/amd/amdgpu: add name field back to ras_common_if drm/amdgpu: Fix build with missing pm_suspend_target_state module export ...
Diffstat (limited to 'drivers/gpu/host1x')
-rw-r--r--drivers/gpu/host1x/Makefile1
-rw-r--r--drivers/gpu/host1x/cdma.c58
-rw-r--r--drivers/gpu/host1x/fence.c168
-rw-r--r--drivers/gpu/host1x/fence.h13
-rw-r--r--drivers/gpu/host1x/hw/channel_hw.c87
-rw-r--r--drivers/gpu/host1x/hw/debug_hw.c32
-rw-r--r--drivers/gpu/host1x/hw/debug_hw_1x01.c8
-rw-r--r--drivers/gpu/host1x/hw/debug_hw_1x06.c16
-rw-r--r--drivers/gpu/host1x/hw/hw_host1x02_uclass.h12
-rw-r--r--drivers/gpu/host1x/hw/hw_host1x04_uclass.h12
-rw-r--r--drivers/gpu/host1x/hw/hw_host1x05_uclass.h12
-rw-r--r--drivers/gpu/host1x/hw/hw_host1x06_uclass.h12
-rw-r--r--drivers/gpu/host1x/hw/hw_host1x07_uclass.h12
-rw-r--r--drivers/gpu/host1x/intr.c9
-rw-r--r--drivers/gpu/host1x/intr.h2
-rw-r--r--drivers/gpu/host1x/job.c98
-rw-r--r--drivers/gpu/host1x/job.h16
-rw-r--r--drivers/gpu/host1x/syncpt.c2
-rw-r--r--drivers/gpu/host1x/syncpt.h12
19 files changed, 510 insertions, 72 deletions
diff --git a/drivers/gpu/host1x/Makefile b/drivers/gpu/host1x/Makefile
index 096017b8789d..d2b6f7de0498 100644
--- a/drivers/gpu/host1x/Makefile
+++ b/drivers/gpu/host1x/Makefile
@@ -9,6 +9,7 @@ host1x-y = \
job.o \
debug.o \
mipi.o \
+ fence.o \
hw/host1x01.o \
hw/host1x02.o \
hw/host1x04.o \
diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c
index 6e6ca774f68d..765e5aa64eb6 100644
--- a/drivers/gpu/host1x/cdma.c
+++ b/drivers/gpu/host1x/cdma.c
@@ -312,10 +312,6 @@ static void update_cdma_locked(struct host1x_cdma *cdma)
bool signal = false;
struct host1x_job *job, *n;
- /* If CDMA is stopped, queue is cleared and we can return */
- if (!cdma->running)
- return;
-
/*
* Walk the sync queue, reading the sync point registers as necessary,
* to consume as many sync queue entries as possible without blocking
@@ -324,7 +320,8 @@ static void update_cdma_locked(struct host1x_cdma *cdma)
struct host1x_syncpt *sp = job->syncpt;
/* Check whether this syncpt has completed, and bail if not */
- if (!host1x_syncpt_is_expired(sp, job->syncpt_end)) {
+ if (!host1x_syncpt_is_expired(sp, job->syncpt_end) &&
+ !job->cancelled) {
/* Start timer on next pending syncpt */
if (job->timeout)
cdma_start_timer_locked(cdma, job);
@@ -413,8 +410,11 @@ syncpt_incr:
else
restart_addr = cdma->last_pos;
+ if (!job)
+ goto resume;
+
/* do CPU increments for the remaining syncpts */
- if (job) {
+ if (job->syncpt_recovery) {
dev_dbg(dev, "%s: perform CPU incr on pending buffers\n",
__func__);
@@ -433,8 +433,44 @@ syncpt_incr:
dev_dbg(dev, "%s: finished sync_queue modification\n",
__func__);
+ } else {
+ struct host1x_job *failed_job = job;
+
+ host1x_job_dump(dev, job);
+
+ host1x_syncpt_set_locked(job->syncpt);
+ failed_job->cancelled = true;
+
+ list_for_each_entry_continue(job, &cdma->sync_queue, list) {
+ unsigned int i;
+
+ if (job->syncpt != failed_job->syncpt)
+ continue;
+
+ for (i = 0; i < job->num_slots; i++) {
+ unsigned int slot = (job->first_get/8 + i) %
+ HOST1X_PUSHBUFFER_SLOTS;
+ u32 *mapped = cdma->push_buffer.mapped;
+
+ /*
+ * Overwrite opcodes with 0 word writes
+ * to offset 0xbad. This does nothing but
+ * has a easily detected signature in debug
+ * traces.
+ */
+ mapped[2*slot+0] = 0x1bad0000;
+ mapped[2*slot+1] = 0x1bad0000;
+ }
+
+ job->cancelled = true;
+ }
+
+ wmb();
+
+ update_cdma_locked(cdma);
}
+resume:
/* roll back DMAGET and start up channel again */
host1x_hw_cdma_resume(host1x, cdma, restart_addr);
}
@@ -490,6 +526,16 @@ int host1x_cdma_begin(struct host1x_cdma *cdma, struct host1x_job *job)
mutex_lock(&cdma->lock);
+ /*
+ * Check if syncpoint was locked due to previous job timeout.
+ * This needs to be done within the cdma lock to avoid a race
+ * with the timeout handler.
+ */
+ if (job->syncpt->locked) {
+ mutex_unlock(&cdma->lock);
+ return -EPERM;
+ }
+
if (job->timeout) {
/* init state on first submit with timeout value */
if (!cdma->timeout.initialized) {
diff --git a/drivers/gpu/host1x/fence.c b/drivers/gpu/host1x/fence.c
new file mode 100644
index 000000000000..6941add95d0f
--- /dev/null
+++ b/drivers/gpu/host1x/fence.c
@@ -0,0 +1,168 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Syncpoint dma_fence implementation
+ *
+ * Copyright (c) 2020, NVIDIA Corporation.
+ */
+
+#include <linux/dma-fence.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/sync_file.h>
+
+#include "fence.h"
+#include "intr.h"
+#include "syncpt.h"
+
+DEFINE_SPINLOCK(lock);
+
+struct host1x_syncpt_fence {
+ struct dma_fence base;
+
+ atomic_t signaling;
+
+ struct host1x_syncpt *sp;
+ u32 threshold;
+
+ struct host1x_waitlist *waiter;
+ void *waiter_ref;
+
+ struct delayed_work timeout_work;
+};
+
+static const char *host1x_syncpt_fence_get_driver_name(struct dma_fence *f)
+{
+ return "host1x";
+}
+
+static const char *host1x_syncpt_fence_get_timeline_name(struct dma_fence *f)
+{
+ return "syncpoint";
+}
+
+static struct host1x_syncpt_fence *to_host1x_fence(struct dma_fence *f)
+{
+ return container_of(f, struct host1x_syncpt_fence, base);
+}
+
+static bool host1x_syncpt_fence_enable_signaling(struct dma_fence *f)
+{
+ struct host1x_syncpt_fence *sf = to_host1x_fence(f);
+ int err;
+
+ if (host1x_syncpt_is_expired(sf->sp, sf->threshold))
+ return false;
+
+ dma_fence_get(f);
+
+ /*
+ * The dma_fence framework requires the fence driver to keep a
+ * reference to any fences for which 'enable_signaling' has been
+ * called (and that have not been signalled).
+ *
+ * We provide a userspace API to create arbitrary syncpoint fences,
+ * so we cannot normally guarantee that all fences get signalled.
+ * As such, setup a timeout, so that long-lasting fences will get
+ * reaped eventually.
+ */
+ schedule_delayed_work(&sf->timeout_work, msecs_to_jiffies(30000));
+
+ err = host1x_intr_add_action(sf->sp->host, sf->sp, sf->threshold,
+ HOST1X_INTR_ACTION_SIGNAL_FENCE, f,
+ sf->waiter, &sf->waiter_ref);
+ if (err) {
+ cancel_delayed_work_sync(&sf->timeout_work);
+ dma_fence_put(f);
+ return false;
+ }
+
+ /* intr framework takes ownership of waiter */
+ sf->waiter = NULL;
+
+ /*
+ * The fence may get signalled at any time after the above call,
+ * so we need to initialize all state used by signalling
+ * before it.
+ */
+
+ return true;
+}
+
+static void host1x_syncpt_fence_release(struct dma_fence *f)
+{
+ struct host1x_syncpt_fence *sf = to_host1x_fence(f);
+
+ if (sf->waiter)
+ kfree(sf->waiter);
+
+ dma_fence_free(f);
+}
+
+const struct dma_fence_ops host1x_syncpt_fence_ops = {
+ .get_driver_name = host1x_syncpt_fence_get_driver_name,
+ .get_timeline_name = host1x_syncpt_fence_get_timeline_name,
+ .enable_signaling = host1x_syncpt_fence_enable_signaling,
+ .release = host1x_syncpt_fence_release,
+};
+
+void host1x_fence_signal(struct host1x_syncpt_fence *f)
+{
+ if (atomic_xchg(&f->signaling, 1))
+ return;
+
+ /*
+ * Cancel pending timeout work - if it races, it will
+ * not get 'f->signaling' and return.
+ */
+ cancel_delayed_work_sync(&f->timeout_work);
+
+ host1x_intr_put_ref(f->sp->host, f->sp->id, f->waiter_ref, false);
+
+ dma_fence_signal(&f->base);
+ dma_fence_put(&f->base);
+}
+
+static void do_fence_timeout(struct work_struct *work)
+{
+ struct delayed_work *dwork = (struct delayed_work *)work;
+ struct host1x_syncpt_fence *f =
+ container_of(dwork, struct host1x_syncpt_fence, timeout_work);
+
+ if (atomic_xchg(&f->signaling, 1))
+ return;
+
+ /*
+ * Cancel pending timeout work - if it races, it will
+ * not get 'f->signaling' and return.
+ */
+ host1x_intr_put_ref(f->sp->host, f->sp->id, f->waiter_ref, true);
+
+ dma_fence_set_error(&f->base, -ETIMEDOUT);
+ dma_fence_signal(&f->base);
+ dma_fence_put(&f->base);
+}
+
+struct dma_fence *host1x_fence_create(struct host1x_syncpt *sp, u32 threshold)
+{
+ struct host1x_syncpt_fence *fence;
+
+ fence = kzalloc(sizeof(*fence), GFP_KERNEL);
+ if (!fence)
+ return ERR_PTR(-ENOMEM);
+
+ fence->waiter = kzalloc(sizeof(*fence->waiter), GFP_KERNEL);
+ if (!fence->waiter)
+ return ERR_PTR(-ENOMEM);
+
+ fence->sp = sp;
+ fence->threshold = threshold;
+
+ dma_fence_init(&fence->base, &host1x_syncpt_fence_ops, &lock,
+ dma_fence_context_alloc(1), 0);
+
+ INIT_DELAYED_WORK(&fence->timeout_work, do_fence_timeout);
+
+ return &fence->base;
+}
+EXPORT_SYMBOL(host1x_fence_create);
diff --git a/drivers/gpu/host1x/fence.h b/drivers/gpu/host1x/fence.h
new file mode 100644
index 000000000000..70c91de82f14
--- /dev/null
+++ b/drivers/gpu/host1x/fence.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2020, NVIDIA Corporation.
+ */
+
+#ifndef HOST1X_FENCE_H
+#define HOST1X_FENCE_H
+
+struct host1x_syncpt_fence;
+
+void host1x_fence_signal(struct host1x_syncpt_fence *fence);
+
+#endif
diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c
index d4c28faf27d1..1999780a7203 100644
--- a/drivers/gpu/host1x/hw/channel_hw.c
+++ b/drivers/gpu/host1x/hw/channel_hw.c
@@ -47,39 +47,84 @@ static void trace_write_gather(struct host1x_cdma *cdma, struct host1x_bo *bo,
}
}
-static void submit_gathers(struct host1x_job *job)
+static void submit_wait(struct host1x_cdma *cdma, u32 id, u32 threshold,
+ u32 next_class)
+{
+#if HOST1X_HW >= 2
+ host1x_cdma_push_wide(cdma,
+ host1x_opcode_setclass(
+ HOST1X_CLASS_HOST1X,
+ HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32,
+ /* WAIT_SYNCPT_32 is at SYNCPT_PAYLOAD_32+2 */
+ BIT(0) | BIT(2)
+ ),
+ threshold,
+ id,
+ host1x_opcode_setclass(next_class, 0, 0)
+ );
+#else
+ /* TODO add waitchk or use waitbases or other mitigation */
+ host1x_cdma_push(cdma,
+ host1x_opcode_setclass(
+ HOST1X_CLASS_HOST1X,
+ host1x_uclass_wait_syncpt_r(),
+ BIT(0)
+ ),
+ host1x_class_host_wait_syncpt(id, threshold)
+ );
+ host1x_cdma_push(cdma,
+ host1x_opcode_setclass(next_class, 0, 0),
+ HOST1X_OPCODE_NOP
+ );
+#endif
+}
+
+static void submit_gathers(struct host1x_job *job, u32 job_syncpt_base)
{
struct host1x_cdma *cdma = &job->channel->cdma;
#if HOST1X_HW < 6
struct device *dev = job->channel->dev;
#endif
unsigned int i;
+ u32 threshold;
- for (i = 0; i < job->num_gathers; i++) {
- struct host1x_job_gather *g = &job->gathers[i];
- dma_addr_t addr = g->base + g->offset;
- u32 op2, op3;
+ for (i = 0; i < job->num_cmds; i++) {
+ struct host1x_job_cmd *cmd = &job->cmds[i];
- op2 = lower_32_bits(addr);
- op3 = upper_32_bits(addr);
+ if (cmd->is_wait) {
+ if (cmd->wait.relative)
+ threshold = job_syncpt_base + cmd->wait.threshold;
+ else
+ threshold = cmd->wait.threshold;
- trace_write_gather(cdma, g->bo, g->offset, g->words);
+ submit_wait(cdma, cmd->wait.id, threshold, cmd->wait.next_class);
+ } else {
+ struct host1x_job_gather *g = &cmd->gather;
+
+ dma_addr_t addr = g->base + g->offset;
+ u32 op2, op3;
+
+ op2 = lower_32_bits(addr);
+ op3 = upper_32_bits(addr);
- if (op3 != 0) {
+ trace_write_gather(cdma, g->bo, g->offset, g->words);
+
+ if (op3 != 0) {
#if HOST1X_HW >= 6
- u32 op1 = host1x_opcode_gather_wide(g->words);
- u32 op4 = HOST1X_OPCODE_NOP;
+ u32 op1 = host1x_opcode_gather_wide(g->words);
+ u32 op4 = HOST1X_OPCODE_NOP;
- host1x_cdma_push_wide(cdma, op1, op2, op3, op4);
+ host1x_cdma_push_wide(cdma, op1, op2, op3, op4);
#else
- dev_err(dev, "invalid gather for push buffer %pad\n",
- &addr);
- continue;
+ dev_err(dev, "invalid gather for push buffer %pad\n",
+ &addr);
+ continue;
#endif
- } else {
- u32 op1 = host1x_opcode_gather(g->words);
+ } else {
+ u32 op1 = host1x_opcode_gather(g->words);
- host1x_cdma_push(cdma, op1, op2);
+ host1x_cdma_push(cdma, op1, op2);
+ }
}
}
}
@@ -126,7 +171,7 @@ static int channel_submit(struct host1x_job *job)
struct host1x *host = dev_get_drvdata(ch->dev->parent);
trace_host1x_channel_submit(dev_name(ch->dev),
- job->num_gathers, job->num_relocs,
+ job->num_cmds, job->num_relocs,
job->syncpt->id, job->syncpt_incrs);
/* before error checks, return current max */
@@ -181,7 +226,7 @@ static int channel_submit(struct host1x_job *job)
host1x_opcode_setclass(job->class, 0, 0),
HOST1X_OPCODE_NOP);
- submit_gathers(job);
+ submit_gathers(job, syncval - user_syncpt_incrs);
/* end CDMA submit & stash pinned hMems into sync queue */
host1x_cdma_end(&ch->cdma, job);
@@ -191,7 +236,7 @@ static int channel_submit(struct host1x_job *job)
/* schedule a submit complete interrupt */
err = host1x_intr_add_action(host, sp, syncval,
HOST1X_INTR_ACTION_SUBMIT_COMPLETE, ch,
- completed_waiter, NULL);
+ completed_waiter, &job->waiter);
completed_waiter = NULL;
WARN(err, "Failed to set submit complete interrupt");
diff --git a/drivers/gpu/host1x/hw/debug_hw.c b/drivers/gpu/host1x/hw/debug_hw.c
index ceb48229d14b..54e31d81517b 100644
--- a/drivers/gpu/host1x/hw/debug_hw.c
+++ b/drivers/gpu/host1x/hw/debug_hw.c
@@ -156,9 +156,9 @@ static unsigned int show_channel_command(struct output *o, u32 val,
}
}
-static void show_gather(struct output *o, phys_addr_t phys_addr,
+static void show_gather(struct output *o, dma_addr_t phys_addr,
unsigned int words, struct host1x_cdma *cdma,
- phys_addr_t pin_addr, u32 *map_addr)
+ dma_addr_t pin_addr, u32 *map_addr)
{
/* Map dmaget cursor to corresponding mem handle */
u32 offset = phys_addr - pin_addr;
@@ -176,11 +176,11 @@ static void show_gather(struct output *o, phys_addr_t phys_addr,
}
for (i = 0; i < words; i++) {
- u32 addr = phys_addr + i * 4;
+ dma_addr_t addr = phys_addr + i * 4;
u32 val = *(map_addr + offset / 4 + i);
if (!data_count) {
- host1x_debug_output(o, "%08x: %08x: ", addr, val);
+ host1x_debug_output(o, " %pad: %08x: ", &addr, val);
data_count = show_channel_command(o, val, &payload);
} else {
host1x_debug_cont(o, "%08x%s", val,
@@ -195,23 +195,25 @@ static void show_channel_gathers(struct output *o, struct host1x_cdma *cdma)
struct push_buffer *pb = &cdma->push_buffer;
struct host1x_job *job;
- host1x_debug_output(o, "PUSHBUF at %pad, %u words\n",
- &pb->dma, pb->size / 4);
-
- show_gather(o, pb->dma, pb->size / 4, cdma, pb->dma, pb->mapped);
-
list_for_each_entry(job, &cdma->sync_queue, list) {
unsigned int i;
- host1x_debug_output(o, "\n%p: JOB, syncpt_id=%d, syncpt_val=%d, first_get=%08x, timeout=%d num_slots=%d, num_handles=%d\n",
- job, job->syncpt->id, job->syncpt_end,
- job->first_get, job->timeout,
+ host1x_debug_output(o, "JOB, syncpt %u: %u timeout: %u num_slots: %u num_handles: %u\n",
+ job->syncpt->id, job->syncpt_end, job->timeout,
job->num_slots, job->num_unpins);
- for (i = 0; i < job->num_gathers; i++) {
- struct host1x_job_gather *g = &job->gathers[i];
+ show_gather(o, pb->dma + job->first_get, job->num_slots * 2, cdma,
+ pb->dma + job->first_get, pb->mapped + job->first_get);
+
+ for (i = 0; i < job->num_cmds; i++) {
+ struct host1x_job_gather *g;
u32 *mapped;
+ if (job->cmds[i].is_wait)
+ continue;
+
+ g = &job->cmds[i].gather;
+
if (job->gather_copy_mapped)
mapped = (u32 *)job->gather_copy_mapped;
else
@@ -222,7 +224,7 @@ static void show_channel_gathers(struct output *o, struct host1x_cdma *cdma)
continue;
}
- host1x_debug_output(o, " GATHER at %pad+%#x, %d words\n",
+ host1x_debug_output(o, " GATHER at %pad+%#x, %d words\n",
&g->base, g->offset, g->words);
show_gather(o, g->base + g->offset, g->words, cdma,
diff --git a/drivers/gpu/host1x/hw/debug_hw_1x01.c b/drivers/gpu/host1x/hw/debug_hw_1x01.c
index 02a93305ac7b..85242a59fa6a 100644
--- a/drivers/gpu/host1x/hw/debug_hw_1x01.c
+++ b/drivers/gpu/host1x/hw/debug_hw_1x01.c
@@ -16,10 +16,13 @@ static void host1x_debug_show_channel_cdma(struct host1x *host,
struct output *o)
{
struct host1x_cdma *cdma = &ch->cdma;
+ dma_addr_t dmastart, dmaend;
u32 dmaput, dmaget, dmactrl;
u32 cbstat, cbread;
u32 val, base, baseval;
+ dmastart = host1x_ch_readl(ch, HOST1X_CHANNEL_DMASTART);
+ dmaend = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAEND);
dmaput = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAPUT);
dmaget = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAGET);
dmactrl = host1x_ch_readl(ch, HOST1X_CHANNEL_DMACTRL);
@@ -56,9 +59,10 @@ static void host1x_debug_show_channel_cdma(struct host1x *host,
HOST1X_SYNC_CBSTAT_CBOFFSET_V(cbstat),
cbread);
- host1x_debug_output(o, "DMAPUT %08x, DMAGET %08x, DMACTL %08x\n",
+ host1x_debug_output(o, "DMASTART %pad, DMAEND %pad\n", &dmastart, &dmaend);
+ host1x_debug_output(o, "DMAPUT %08x DMAGET %08x DMACTL %08x\n",
dmaput, dmaget, dmactrl);
- host1x_debug_output(o, "CBREAD %08x, CBSTAT %08x\n", cbread, cbstat);
+ host1x_debug_output(o, "CBREAD %08x CBSTAT %08x\n", cbread, cbstat);
show_channel_gathers(o, cdma);
host1x_debug_output(o, "\n");
diff --git a/drivers/gpu/host1x/hw/debug_hw_1x06.c b/drivers/gpu/host1x/hw/debug_hw_1x06.c
index 6d1b583aa90f..9d0667879a19 100644
--- a/drivers/gpu/host1x/hw/debug_hw_1x06.c
+++ b/drivers/gpu/host1x/hw/debug_hw_1x06.c
@@ -16,10 +16,23 @@ static void host1x_debug_show_channel_cdma(struct host1x *host,
struct output *o)
{
struct host1x_cdma *cdma = &ch->cdma;
+ dma_addr_t dmastart = 0, dmaend = 0;
u32 dmaput, dmaget, dmactrl;
u32 offset, class;
u32 ch_stat;
+#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT) && HOST1X_HW >= 6
+ dmastart = host1x_ch_readl(ch, HOST1X_CHANNEL_DMASTART_HI);
+ dmastart <<= 32;
+#endif
+ dmastart |= host1x_ch_readl(ch, HOST1X_CHANNEL_DMASTART);
+
+#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT) && HOST1X_HW >= 6
+ dmaend = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAEND_HI);
+ dmaend <<= 32;
+#endif
+ dmaend |= host1x_ch_readl(ch, HOST1X_CHANNEL_DMAEND);
+
dmaput = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAPUT);
dmaget = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAGET);
dmactrl = host1x_ch_readl(ch, HOST1X_CHANNEL_DMACTRL);
@@ -41,7 +54,8 @@ static void host1x_debug_show_channel_cdma(struct host1x *host,
host1x_debug_output(o, "active class %02x, offset %04x\n",
class, offset);
- host1x_debug_output(o, "DMAPUT %08x, DMAGET %08x, DMACTL %08x\n",
+ host1x_debug_output(o, "DMASTART %pad, DMAEND %pad\n", &dmastart, &dmaend);
+ host1x_debug_output(o, "DMAPUT %08x DMAGET %08x DMACTL %08x\n",
dmaput, dmaget, dmactrl);
host1x_debug_output(o, "CHANNELSTAT %02x\n", ch_stat);
diff --git a/drivers/gpu/host1x/hw/hw_host1x02_uclass.h b/drivers/gpu/host1x/hw/hw_host1x02_uclass.h
index 4fc51f70496b..0a2ab8f1da6f 100644
--- a/drivers/gpu/host1x/hw/hw_host1x02_uclass.h
+++ b/drivers/gpu/host1x/hw/hw_host1x02_uclass.h
@@ -165,5 +165,17 @@ static inline u32 host1x_uclass_indoff_rwn_read_v(void)
}
#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \
host1x_uclass_indoff_indroffset_f(v)
+static inline u32 host1x_uclass_load_syncpt_payload_32_r(void)
+{
+ return 0x4e;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \
+ host1x_uclass_load_syncpt_payload_32_r()
+static inline u32 host1x_uclass_wait_syncpt_32_r(void)
+{
+ return 0x50;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_32 \
+ host1x_uclass_wait_syncpt_32_r()
#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x04_uclass.h b/drivers/gpu/host1x/hw/hw_host1x04_uclass.h
index 9e84a4adca9f..60c692b92955 100644
--- a/drivers/gpu/host1x/hw/hw_host1x04_uclass.h
+++ b/drivers/gpu/host1x/hw/hw_host1x04_uclass.h
@@ -165,5 +165,17 @@ static inline u32 host1x_uclass_indoff_rwn_read_v(void)
}
#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \
host1x_uclass_indoff_indroffset_f(v)
+static inline u32 host1x_uclass_load_syncpt_payload_32_r(void)
+{
+ return 0x4e;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \
+ host1x_uclass_load_syncpt_payload_32_r()
+static inline u32 host1x_uclass_wait_syncpt_32_r(void)
+{
+ return 0x50;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_32 \
+ host1x_uclass_wait_syncpt_32_r()
#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x05_uclass.h b/drivers/gpu/host1x/hw/hw_host1x05_uclass.h
index aee5a4e32877..2fcc9a2ad3ef 100644
--- a/drivers/gpu/host1x/hw/hw_host1x05_uclass.h
+++ b/drivers/gpu/host1x/hw/hw_host1x05_uclass.h
@@ -165,5 +165,17 @@ static inline u32 host1x_uclass_indoff_rwn_read_v(void)
}
#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \
host1x_uclass_indoff_indroffset_f(v)
+static inline u32 host1x_uclass_load_syncpt_payload_32_r(void)
+{
+ return 0x4e;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \
+ host1x_uclass_load_syncpt_payload_32_r()
+static inline u32 host1x_uclass_wait_syncpt_32_r(void)
+{
+ return 0x50;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_32 \
+ host1x_uclass_wait_syncpt_32_r()
#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x06_uclass.h b/drivers/gpu/host1x/hw/hw_host1x06_uclass.h
index c4bacdb7155f..5f831438d19b 100644
--- a/drivers/gpu/host1x/hw/hw_host1x06_uclass.h
+++ b/drivers/gpu/host1x/hw/hw_host1x06_uclass.h
@@ -165,5 +165,17 @@ static inline u32 host1x_uclass_indoff_rwn_read_v(void)
}
#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \
host1x_uclass_indoff_indroffset_f(v)
+static inline u32 host1x_uclass_load_syncpt_payload_32_r(void)
+{
+ return 0x4e;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \
+ host1x_uclass_load_syncpt_payload_32_r()
+static inline u32 host1x_uclass_wait_syncpt_32_r(void)
+{
+ return 0x50;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_32 \
+ host1x_uclass_wait_syncpt_32_r()
#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x07_uclass.h b/drivers/gpu/host1x/hw/hw_host1x07_uclass.h
index c74070f3f203..8cd2ef087d5d 100644
--- a/drivers/gpu/host1x/hw/hw_host1x07_uclass.h
+++ b/drivers/gpu/host1x/hw/hw_host1x07_uclass.h
@@ -165,5 +165,17 @@ static inline u32 host1x_uclass_indoff_rwn_read_v(void)
}
#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \
host1x_uclass_indoff_indroffset_f(v)
+static inline u32 host1x_uclass_load_syncpt_payload_32_r(void)
+{
+ return 0x4e;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \
+ host1x_uclass_load_syncpt_payload_32_r()
+static inline u32 host1x_uclass_wait_syncpt_32_r(void)
+{
+ return 0x50;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_32 \
+ host1x_uclass_wait_syncpt_32_r()
#endif
diff --git a/drivers/gpu/host1x/intr.c b/drivers/gpu/host1x/intr.c
index 6d1f3c0fdbe7..45b6be927ec4 100644
--- a/drivers/gpu/host1x/intr.c
+++ b/drivers/gpu/host1x/intr.c
@@ -13,6 +13,7 @@
#include <trace/events/host1x.h>
#include "channel.h"
#include "dev.h"
+#include "fence.h"
#include "intr.h"
/* Wait list management */
@@ -121,12 +122,20 @@ static void action_wakeup_interruptible(struct host1x_waitlist *waiter)
wake_up_interruptible(wq);
}
+static void action_signal_fence(struct host1x_waitlist *waiter)
+{
+ struct host1x_syncpt_fence *f = waiter->data;
+
+ host1x_fence_signal(f);
+}
+
typedef void (*action_handler)(struct host1x_waitlist *waiter);
static const action_handler action_handlers[HOST1X_INTR_ACTION_COUNT] = {
action_submit_complete,
action_wakeup,
action_wakeup_interruptible,
+ action_signal_fence,
};
static void run_handlers(struct list_head completed[HOST1X_INTR_ACTION_COUNT])
diff --git a/drivers/gpu/host1x/intr.h b/drivers/gpu/host1x/intr.h
index 6ea55e615e3a..e4c346099273 100644
--- a/drivers/gpu/host1x/intr.h
+++ b/drivers/gpu/host1x/intr.h
@@ -33,6 +33,8 @@ enum host1x_intr_action {
*/
HOST1X_INTR_ACTION_WAKEUP_INTERRUPTIBLE,
+ HOST1X_INTR_ACTION_SIGNAL_FENCE,
+
HOST1X_INTR_ACTION_COUNT
};
diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c
index adbdc225de8d..0eef6df7c89e 100644
--- a/drivers/gpu/host1x/job.c
+++ b/drivers/gpu/host1x/job.c
@@ -24,21 +24,25 @@
#define HOST1X_WAIT_SYNCPT_OFFSET 0x8
struct host1x_job *host1x_job_alloc(struct host1x_channel *ch,
- u32 num_cmdbufs, u32 num_relocs)
+ u32 num_cmdbufs, u32 num_relocs,
+ bool skip_firewall)
{
struct host1x_job *job = NULL;
unsigned int num_unpins = num_relocs;
+ bool enable_firewall;
u64 total;
void *mem;
- if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL))
+ enable_firewall = IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && !skip_firewall;
+
+ if (!enable_firewall)
num_unpins += num_cmdbufs;
/* Check that we're not going to overflow */
total = sizeof(struct host1x_job) +
(u64)num_relocs * sizeof(struct host1x_reloc) +
(u64)num_unpins * sizeof(struct host1x_job_unpin_data) +
- (u64)num_cmdbufs * sizeof(struct host1x_job_gather) +
+ (u64)num_cmdbufs * sizeof(struct host1x_job_cmd) +
(u64)num_unpins * sizeof(dma_addr_t) +
(u64)num_unpins * sizeof(u32 *);
if (total > ULONG_MAX)
@@ -48,6 +52,8 @@ struct host1x_job *host1x_job_alloc(struct host1x_channel *ch,
if (!job)
return NULL;
+ job->enable_firewall = enable_firewall;
+
kref_init(&job->ref);
job->channel = ch;
@@ -57,8 +63,8 @@ struct host1x_job *host1x_job_alloc(struct host1x_channel *ch,
mem += num_relocs * sizeof(struct host1x_reloc);
job->unpins = num_unpins ? mem : NULL;
mem += num_unpins * sizeof(struct host1x_job_unpin_data);
- job->gathers = num_cmdbufs ? mem : NULL;
- mem += num_cmdbufs * sizeof(struct host1x_job_gather);
+ job->cmds = num_cmdbufs ? mem : NULL;
+ mem += num_cmdbufs * sizeof(struct host1x_job_cmd);
job->addr_phys = num_unpins ? mem : NULL;
job->reloc_addr_phys = job->addr_phys;
@@ -79,6 +85,13 @@ static void job_free(struct kref *ref)
{
struct host1x_job *job = container_of(ref, struct host1x_job, ref);
+ if (job->release)
+ job->release(job);
+
+ if (job->waiter)
+ host1x_intr_put_ref(job->syncpt->host, job->syncpt->id,
+ job->waiter, false);
+
if (job->syncpt)
host1x_syncpt_put(job->syncpt);
@@ -94,22 +107,38 @@ EXPORT_SYMBOL(host1x_job_put);
void host1x_job_add_gather(struct host1x_job *job, struct host1x_bo *bo,
unsigned int words, unsigned int offset)
{
- struct host1x_job_gather *gather = &job->gathers[job->num_gathers];
+ struct host1x_job_gather *gather = &job->cmds[job->num_cmds].gather;
gather->words = words;
gather->bo = bo;
gather->offset = offset;
- job->num_gathers++;
+ job->num_cmds++;
}
EXPORT_SYMBOL(host1x_job_add_gather);
+void host1x_job_add_wait(struct host1x_job *job, u32 id, u32 thresh,
+ bool relative, u32 next_class)
+{
+ struct host1x_job_cmd *cmd = &job->cmds[job->num_cmds];
+
+ cmd->is_wait = true;
+ cmd->wait.id = id;
+ cmd->wait.threshold = thresh;
+ cmd->wait.next_class = next_class;
+ cmd->wait.relative = relative;
+
+ job->num_cmds++;
+}
+EXPORT_SYMBOL(host1x_job_add_wait);
+
static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
{
struct host1x_client *client = job->client;
struct device *dev = client->dev;
struct host1x_job_gather *g;
struct iommu_domain *domain;
+ struct sg_table *sgt;
unsigned int i;
int err;
@@ -119,7 +148,6 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
for (i = 0; i < job->num_relocs; i++) {
struct host1x_reloc *reloc = &job->relocs[i];
dma_addr_t phys_addr, *phys;
- struct sg_table *sgt;
reloc->target.bo = host1x_bo_get(reloc->target.bo);
if (!reloc->target.bo) {
@@ -192,20 +220,23 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
* We will copy gathers BO content later, so there is no need to
* hold and pin them.
*/
- if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL))
+ if (job->enable_firewall)
return 0;
- for (i = 0; i < job->num_gathers; i++) {
+ for (i = 0; i < job->num_cmds; i++) {
size_t gather_size = 0;
struct scatterlist *sg;
- struct sg_table *sgt;
dma_addr_t phys_addr;
unsigned long shift;
struct iova *alloc;
dma_addr_t *phys;
unsigned int j;
- g = &job->gathers[i];
+ if (job->cmds[i].is_wait)
+ continue;
+
+ g = &job->cmds[i].gather;
+
g->bo = host1x_bo_get(g->bo);
if (!g->bo) {
err = -EINVAL;
@@ -296,7 +327,7 @@ static int do_relocs(struct host1x_job *job, struct host1x_job_gather *g)
if (cmdbuf != reloc->cmdbuf.bo)
continue;
- if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) {
+ if (job->enable_firewall) {
target = (u32 *)job->gather_copy_mapped +
reloc->cmdbuf.offset / sizeof(u32) +
g->offset / sizeof(u32);
@@ -538,8 +569,13 @@ static inline int copy_gathers(struct device *host, struct host1x_job *job,
fw.num_relocs = job->num_relocs;
fw.class = job->class;
- for (i = 0; i < job->num_gathers; i++) {
- struct host1x_job_gather *g = &job->gathers[i];
+ for (i = 0; i < job->num_cmds; i++) {
+ struct host1x_job_gather *g;
+
+ if (job->cmds[i].is_wait)
+ continue;
+
+ g = &job->cmds[i].gather;
size += g->words * sizeof(u32);
}
@@ -561,10 +597,14 @@ static inline int copy_gathers(struct device *host, struct host1x_job *job,
job->gather_copy_size = size;
- for (i = 0; i < job->num_gathers; i++) {
- struct host1x_job_gather *g = &job->gathers[i];
+ for (i = 0; i < job->num_cmds; i++) {
+ struct host1x_job_gather *g;
void *gather;
+ if (job->cmds[i].is_wait)
+ continue;
+ g = &job->cmds[i].gather;
+
/* Copy the gather */
gather = host1x_bo_mmap(g->bo);
memcpy(job->gather_copy_mapped + offset, gather + g->offset,
@@ -600,28 +640,33 @@ int host1x_job_pin(struct host1x_job *job, struct device *dev)
if (err)
goto out;
- if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) {
+ if (job->enable_firewall) {
err = copy_gathers(host->dev, job, dev);
if (err)
goto out;
}
/* patch gathers */
- for (i = 0; i < job->num_gathers; i++) {
- struct host1x_job_gather *g = &job->gathers[i];
+ for (i = 0; i < job->num_cmds; i++) {
+ struct host1x_job_gather *g;
+
+ if (job->cmds[i].is_wait)
+ continue;
+ g = &job->cmds[i].gather;
/* process each gather mem only once */
if (g->handled)
continue;
/* copy_gathers() sets gathers base if firewall is enabled */
- if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL))
+ if (!job->enable_firewall)
g->base = job->gather_addr_phys[i];
- for (j = i + 1; j < job->num_gathers; j++) {
- if (job->gathers[j].bo == g->bo) {
- job->gathers[j].handled = true;
- job->gathers[j].base = g->base;
+ for (j = i + 1; j < job->num_cmds; j++) {
+ if (!job->cmds[j].is_wait &&
+ job->cmds[j].gather.bo == g->bo) {
+ job->cmds[j].gather.handled = true;
+ job->cmds[j].gather.base = g->base;
}
}
@@ -649,8 +694,7 @@ void host1x_job_unpin(struct host1x_job *job)
struct device *dev = unpin->dev ?: host->dev;
struct sg_table *sgt = unpin->sgt;
- if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) &&
- unpin->size && host->domain) {
+ if (!job->enable_firewall && unpin->size && host->domain) {
iommu_unmap(host->domain, job->addr_phys[i],
unpin->size);
free_iova(&host->iova,
diff --git a/drivers/gpu/host1x/job.h b/drivers/gpu/host1x/job.h
index 94bc2e4ae241..b4428c5495c9 100644
--- a/drivers/gpu/host1x/job.h
+++ b/drivers/gpu/host1x/job.h
@@ -18,6 +18,22 @@ struct host1x_job_gather {
bool handled;
};
+struct host1x_job_wait {
+ u32 id;
+ u32 threshold;
+ u32 next_class;
+ bool relative;
+};
+
+struct host1x_job_cmd {
+ bool is_wait;
+
+ union {
+ struct host1x_job_gather gather;
+ struct host1x_job_wait wait;
+ };
+};
+
struct host1x_job_unpin_data {
struct host1x_bo *bo;
struct sg_table *sgt;
diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
index e648ebbb2027..d198a10848c6 100644
--- a/drivers/gpu/host1x/syncpt.c
+++ b/drivers/gpu/host1x/syncpt.c
@@ -407,6 +407,8 @@ static void syncpt_release(struct kref *ref)
atomic_set(&sp->max_val, host1x_syncpt_read(sp));
+ sp->locked = false;
+
mutex_lock(&sp->host->syncpt_mutex);
host1x_syncpt_base_free(sp->base);
diff --git a/drivers/gpu/host1x/syncpt.h b/drivers/gpu/host1x/syncpt.h
index a6766f8d55ee..95cd29b79d6d 100644
--- a/drivers/gpu/host1x/syncpt.h
+++ b/drivers/gpu/host1x/syncpt.h
@@ -40,6 +40,13 @@ struct host1x_syncpt {
/* interrupt data */
struct host1x_syncpt_intr intr;
+
+ /*
+ * If a submission incrementing this syncpoint fails, lock it so that
+ * further submission cannot be made until application has handled the
+ * failure.
+ */
+ bool locked;
};
/* Initialize sync point array */
@@ -115,4 +122,9 @@ static inline int host1x_syncpt_is_valid(struct host1x_syncpt *sp)
return sp->id < host1x_syncpt_nb_pts(sp->host);
}
+static inline void host1x_syncpt_set_locked(struct host1x_syncpt *sp)
+{
+ sp->locked = true;
+}
+
#endif