summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/drm/vc4/vc4_crtc.c99
-rw-r--r--drivers/gpu/drm/vc4/vc4_drv.h16
-rw-r--r--drivers/gpu/drm/vc4/vc4_gem.c40
-rw-r--r--drivers/gpu/drm/vc4/vc4_kms.c149
-rw-r--r--drivers/gpu/drm/vc4/vc4_plane.c40
5 files changed, 342 insertions, 2 deletions
diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c
index 7a9f4768591e..a3193321df39 100644
--- a/drivers/gpu/drm/vc4/vc4_crtc.c
+++ b/drivers/gpu/drm/vc4/vc4_crtc.c
@@ -35,6 +35,7 @@
#include "drm_atomic_helper.h"
#include "drm_crtc_helper.h"
#include "linux/clk.h"
+#include "drm_fb_cma_helper.h"
#include "linux/component.h"
#include "linux/of_device.h"
#include "vc4_drv.h"
@@ -475,10 +476,106 @@ static irqreturn_t vc4_crtc_irq_handler(int irq, void *data)
return ret;
}
+struct vc4_async_flip_state {
+ struct drm_crtc *crtc;
+ struct drm_framebuffer *fb;
+ struct drm_pending_vblank_event *event;
+
+ struct vc4_seqno_cb cb;
+};
+
+/* Called when the V3D execution for the BO being flipped to is done, so that
+ * we can actually update the plane's address to point to it.
+ */
+static void
+vc4_async_page_flip_complete(struct vc4_seqno_cb *cb)
+{
+ struct vc4_async_flip_state *flip_state =
+ container_of(cb, struct vc4_async_flip_state, cb);
+ struct drm_crtc *crtc = flip_state->crtc;
+ struct drm_device *dev = crtc->dev;
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ struct drm_plane *plane = crtc->primary;
+
+ vc4_plane_async_set_fb(plane, flip_state->fb);
+ if (flip_state->event) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&dev->event_lock, flags);
+ drm_crtc_send_vblank_event(crtc, flip_state->event);
+ spin_unlock_irqrestore(&dev->event_lock, flags);
+ }
+
+ drm_framebuffer_unreference(flip_state->fb);
+ kfree(flip_state);
+
+ up(&vc4->async_modeset);
+}
+
+/* Implements async (non-vblank-synced) page flips.
+ *
+ * The page flip ioctl needs to return immediately, so we grab the
+ * modeset semaphore on the pipe, and queue the address update for
+ * when V3D is done with the BO being flipped to.
+ */
+static int vc4_async_page_flip(struct drm_crtc *crtc,
+ struct drm_framebuffer *fb,
+ struct drm_pending_vblank_event *event,
+ uint32_t flags)
+{
+ struct drm_device *dev = crtc->dev;
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ struct drm_plane *plane = crtc->primary;
+ int ret = 0;
+ struct vc4_async_flip_state *flip_state;
+ struct drm_gem_cma_object *cma_bo = drm_fb_cma_get_gem_obj(fb, 0);
+ struct vc4_bo *bo = to_vc4_bo(&cma_bo->base);
+
+ flip_state = kzalloc(sizeof(*flip_state), GFP_KERNEL);
+ if (!flip_state)
+ return -ENOMEM;
+
+ drm_framebuffer_reference(fb);
+ flip_state->fb = fb;
+ flip_state->crtc = crtc;
+ flip_state->event = event;
+
+ /* Make sure all other async modesetes have landed. */
+ ret = down_interruptible(&vc4->async_modeset);
+ if (ret) {
+ kfree(flip_state);
+ return ret;
+ }
+
+ /* Immediately update the plane's legacy fb pointer, so that later
+ * modeset prep sees the state that will be present when the semaphore
+ * is released.
+ */
+ drm_atomic_set_fb_for_plane(plane->state, fb);
+ plane->fb = fb;
+
+ vc4_queue_seqno_cb(dev, &flip_state->cb, bo->seqno,
+ vc4_async_page_flip_complete);
+
+ /* Driver takes ownership of state on successful async commit. */
+ return 0;
+}
+
+static int vc4_page_flip(struct drm_crtc *crtc,
+ struct drm_framebuffer *fb,
+ struct drm_pending_vblank_event *event,
+ uint32_t flags)
+{
+ if (flags & DRM_MODE_PAGE_FLIP_ASYNC)
+ return vc4_async_page_flip(crtc, fb, event, flags);
+ else
+ return drm_atomic_helper_page_flip(crtc, fb, event, flags);
+}
+
static const struct drm_crtc_funcs vc4_crtc_funcs = {
.set_config = drm_atomic_helper_set_config,
.destroy = vc4_crtc_destroy,
- .page_flip = drm_atomic_helper_page_flip,
+ .page_flip = vc4_page_flip,
.set_property = NULL,
.cursor_set = NULL, /* handled by drm_mode_cursor_universal */
.cursor_move = NULL, /* handled by drm_mode_cursor_universal */
diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h
index 0bc8c57196ac..f9927d87369f 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.h
+++ b/drivers/gpu/drm/vc4/vc4_drv.h
@@ -76,6 +76,11 @@ struct vc4_dev {
wait_queue_head_t job_wait_queue;
struct work_struct job_done_work;
+ /* List of struct vc4_seqno_cb for callbacks to be made from a
+ * workqueue when the given seqno is passed.
+ */
+ struct list_head seqno_cb_list;
+
/* The binner overflow memory that's currently set up in
* BPOA/BPOS registers. When overflow occurs and a new one is
* allocated, the previous one will be moved to
@@ -128,6 +133,12 @@ to_vc4_bo(struct drm_gem_object *bo)
return (struct vc4_bo *)bo;
}
+struct vc4_seqno_cb {
+ struct work_struct work;
+ uint64_t seqno;
+ void (*func)(struct vc4_seqno_cb *cb);
+};
+
struct vc4_v3d {
struct platform_device *pdev;
void __iomem *regs;
@@ -384,6 +395,9 @@ void vc4_submit_next_job(struct drm_device *dev);
int vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno,
uint64_t timeout_ns, bool interruptible);
void vc4_job_handle_completed(struct vc4_dev *vc4);
+int vc4_queue_seqno_cb(struct drm_device *dev,
+ struct vc4_seqno_cb *cb, uint64_t seqno,
+ void (*func)(struct vc4_seqno_cb *cb));
/* vc4_hdmi.c */
extern struct platform_driver vc4_hdmi_driver;
@@ -409,6 +423,8 @@ struct drm_plane *vc4_plane_init(struct drm_device *dev,
enum drm_plane_type type);
u32 vc4_plane_write_dlist(struct drm_plane *plane, u32 __iomem *dlist);
u32 vc4_plane_dlist_size(struct drm_plane_state *state);
+void vc4_plane_async_set_fb(struct drm_plane *plane,
+ struct drm_framebuffer *fb);
/* vc4_v3d.c */
extern struct platform_driver vc4_v3d_driver;
diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c
index 936dddfa890f..5fb0556e001e 100644
--- a/drivers/gpu/drm/vc4/vc4_gem.c
+++ b/drivers/gpu/drm/vc4/vc4_gem.c
@@ -461,6 +461,7 @@ void
vc4_job_handle_completed(struct vc4_dev *vc4)
{
unsigned long irqflags;
+ struct vc4_seqno_cb *cb, *cb_temp;
spin_lock_irqsave(&vc4->job_lock, irqflags);
while (!list_empty(&vc4->job_done_list)) {
@@ -473,7 +474,45 @@ vc4_job_handle_completed(struct vc4_dev *vc4)
vc4_complete_exec(vc4->dev, exec);
spin_lock_irqsave(&vc4->job_lock, irqflags);
}
+
+ list_for_each_entry_safe(cb, cb_temp, &vc4->seqno_cb_list, work.entry) {
+ if (cb->seqno <= vc4->finished_seqno) {
+ list_del_init(&cb->work.entry);
+ schedule_work(&cb->work);
+ }
+ }
+
+ spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+}
+
+static void vc4_seqno_cb_work(struct work_struct *work)
+{
+ struct vc4_seqno_cb *cb = container_of(work, struct vc4_seqno_cb, work);
+
+ cb->func(cb);
+}
+
+int vc4_queue_seqno_cb(struct drm_device *dev,
+ struct vc4_seqno_cb *cb, uint64_t seqno,
+ void (*func)(struct vc4_seqno_cb *cb))
+{
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ int ret = 0;
+ unsigned long irqflags;
+
+ cb->func = func;
+ INIT_WORK(&cb->work, vc4_seqno_cb_work);
+
+ spin_lock_irqsave(&vc4->job_lock, irqflags);
+ if (seqno > vc4->finished_seqno) {
+ cb->seqno = seqno;
+ list_add_tail(&cb->work.entry, &vc4->seqno_cb_list);
+ } else {
+ schedule_work(&cb->work);
+ }
spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+
+ return ret;
}
/* Scheduled when any job has been completed, this walks the list of
@@ -610,6 +649,7 @@ vc4_gem_init(struct drm_device *dev)
INIT_LIST_HEAD(&vc4->job_list);
INIT_LIST_HEAD(&vc4->job_done_list);
+ INIT_LIST_HEAD(&vc4->seqno_cb_list);
spin_lock_init(&vc4->job_lock);
INIT_WORK(&vc4->hangcheck.reset_work, vc4_reset_work);
diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c
index 2e5597d10cc6..f95f2df5f8d1 100644
--- a/drivers/gpu/drm/vc4/vc4_kms.c
+++ b/drivers/gpu/drm/vc4/vc4_kms.c
@@ -15,6 +15,7 @@
*/
#include "drm_crtc.h"
+#include "drm_atomic.h"
#include "drm_atomic_helper.h"
#include "drm_crtc_helper.h"
#include "drm_plane_helper.h"
@@ -29,10 +30,152 @@ static void vc4_output_poll_changed(struct drm_device *dev)
drm_fbdev_cma_hotplug_event(vc4->fbdev);
}
+struct vc4_commit {
+ struct drm_device *dev;
+ struct drm_atomic_state *state;
+ struct vc4_seqno_cb cb;
+};
+
+static void
+vc4_atomic_complete_commit(struct vc4_commit *c)
+{
+ struct drm_atomic_state *state = c->state;
+ struct drm_device *dev = state->dev;
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+ drm_atomic_helper_commit_modeset_disables(dev, state);
+
+ drm_atomic_helper_commit_planes(dev, state, false);
+
+ drm_atomic_helper_commit_modeset_enables(dev, state);
+
+ drm_atomic_helper_wait_for_vblanks(dev, state);
+
+ drm_atomic_helper_cleanup_planes(dev, state);
+
+ drm_atomic_state_free(state);
+
+ up(&vc4->async_modeset);
+
+ kfree(c);
+}
+
+static void
+vc4_atomic_complete_commit_seqno_cb(struct vc4_seqno_cb *cb)
+{
+ struct vc4_commit *c = container_of(cb, struct vc4_commit, cb);
+
+ vc4_atomic_complete_commit(c);
+}
+
+static struct vc4_commit *commit_init(struct drm_atomic_state *state)
+{
+ struct vc4_commit *c = kzalloc(sizeof(*c), GFP_KERNEL);
+
+ if (!c)
+ return NULL;
+ c->dev = state->dev;
+ c->state = state;
+
+ return c;
+}
+
+/**
+ * vc4_atomic_commit - commit validated state object
+ * @dev: DRM device
+ * @state: the driver state object
+ * @async: asynchronous commit
+ *
+ * This function commits a with drm_atomic_helper_check() pre-validated state
+ * object. This can still fail when e.g. the framebuffer reservation fails. For
+ * now this doesn't implement asynchronous commits.
+ *
+ * RETURNS
+ * Zero for success or -errno.
+ */
+static int vc4_atomic_commit(struct drm_device *dev,
+ struct drm_atomic_state *state,
+ bool async)
+{
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ int ret;
+ int i;
+ uint64_t wait_seqno = 0;
+ struct vc4_commit *c;
+
+ c = commit_init(state);
+ if (!c)
+ return -ENOMEM;
+
+ /* Make sure that any outstanding modesets have finished. */
+ ret = down_interruptible(&vc4->async_modeset);
+ if (ret) {
+ kfree(c);
+ return ret;
+ }
+
+ ret = drm_atomic_helper_prepare_planes(dev, state);
+ if (ret) {
+ kfree(c);
+ up(&vc4->async_modeset);
+ return ret;
+ }
+
+ for (i = 0; i < dev->mode_config.num_total_plane; i++) {
+ struct drm_plane *plane = state->planes[i];
+ struct drm_plane_state *new_state = state->plane_states[i];
+
+ if (!plane)
+ continue;
+
+ if ((plane->state->fb != new_state->fb) && new_state->fb) {
+ struct drm_gem_cma_object *cma_bo =
+ drm_fb_cma_get_gem_obj(new_state->fb, 0);
+ struct vc4_bo *bo = to_vc4_bo(&cma_bo->base);
+
+ wait_seqno = max(bo->seqno, wait_seqno);
+ }
+ }
+
+ /*
+ * This is the point of no return - everything below never fails except
+ * when the hw goes bonghits. Which means we can commit the new state on
+ * the software side now.
+ */
+
+ drm_atomic_helper_swap_state(dev, state);
+
+ /*
+ * Everything below can be run asynchronously without the need to grab
+ * any modeset locks at all under one condition: It must be guaranteed
+ * that the asynchronous work has either been cancelled (if the driver
+ * supports it, which at least requires that the framebuffers get
+ * cleaned up with drm_atomic_helper_cleanup_planes()) or completed
+ * before the new state gets committed on the software side with
+ * drm_atomic_helper_swap_state().
+ *
+ * This scheme allows new atomic state updates to be prepared and
+ * checked in parallel to the asynchronous completion of the previous
+ * update. Which is important since compositors need to figure out the
+ * composition of the next frame right after having submitted the
+ * current layout.
+ */
+
+ if (async) {
+ vc4_queue_seqno_cb(dev, &c->cb, wait_seqno,
+ vc4_atomic_complete_commit_seqno_cb);
+ } else {
+ vc4_wait_for_seqno(dev, wait_seqno, ~0ull, false);
+ vc4_atomic_complete_commit(c);
+ }
+
+ return 0;
+}
+
static const struct drm_mode_config_funcs vc4_mode_funcs = {
.output_poll_changed = vc4_output_poll_changed,
.atomic_check = drm_atomic_helper_check,
- .atomic_commit = drm_atomic_helper_commit,
+ .atomic_commit = vc4_atomic_commit,
.fb_create = drm_fb_cma_create,
};
@@ -41,6 +184,8 @@ int vc4_kms_load(struct drm_device *dev)
struct vc4_dev *vc4 = to_vc4_dev(dev);
int ret;
+ sema_init(&vc4->async_modeset, 1);
+
ret = drm_vblank_init(dev, dev->mode_config.num_crtc);
if (ret < 0) {
dev_err(dev->dev, "failed to initialize vblank\n");
@@ -51,6 +196,8 @@ int vc4_kms_load(struct drm_device *dev)
dev->mode_config.max_height = 2048;
dev->mode_config.funcs = &vc4_mode_funcs;
dev->mode_config.preferred_depth = 24;
+ dev->mode_config.async_page_flip = true;
+
dev->vblank_disable_allowed = true;
drm_mode_config_reset(dev);
diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c
index cdd8b10c0147..db32c37335b1 100644
--- a/drivers/gpu/drm/vc4/vc4_plane.c
+++ b/drivers/gpu/drm/vc4/vc4_plane.c
@@ -29,6 +29,14 @@ struct vc4_plane_state {
u32 *dlist;
u32 dlist_size; /* Number of dwords in allocated for the display list */
u32 dlist_count; /* Number of used dwords in the display list. */
+
+ /* Offset in the dlist to pointer word 0. */
+ u32 pw0_offset;
+
+ /* Offset where the plane's dlist was last stored in the
+ hardware at vc4_crtc_atomic_flush() time.
+ */
+ u32 *hw_dlist;
};
static inline struct vc4_plane_state *
@@ -197,6 +205,8 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
/* Position Word 3: Context. Written by the HVS. */
vc4_dlist_write(vc4_state, 0xc0c0c0c0);
+ vc4_state->pw0_offset = vc4_state->dlist_count;
+
/* Pointer Word 0: RGB / Y Pointer */
vc4_dlist_write(vc4_state, bo->paddr + offset);
@@ -248,6 +258,8 @@ u32 vc4_plane_write_dlist(struct drm_plane *plane, u32 __iomem *dlist)
struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state);
int i;
+ vc4_state->hw_dlist = dlist;
+
/* Can't memcpy_toio() because it needs to be 32-bit writes. */
for (i = 0; i < vc4_state->dlist_count; i++)
writel(vc4_state->dlist[i], &dlist[i]);
@@ -262,6 +274,34 @@ u32 vc4_plane_dlist_size(struct drm_plane_state *state)
return vc4_state->dlist_count;
}
+/* Updates the plane to immediately (well, once the FIFO needs
+ * refilling) scan out from at a new framebuffer.
+ */
+void vc4_plane_async_set_fb(struct drm_plane *plane, struct drm_framebuffer *fb)
+{
+ struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state);
+ struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0);
+ uint32_t addr;
+
+ /* We're skipping the address adjustment for negative origin,
+ * because this is only called on the primary plane.
+ */
+ WARN_ON_ONCE(plane->state->crtc_x < 0 || plane->state->crtc_y < 0);
+ addr = bo->paddr + fb->offsets[0];
+
+ /* Write the new address into the hardware immediately. The
+ * scanout will start from this address as soon as the FIFO
+ * needs to refill with pixels.
+ */
+ writel(addr, &vc4_state->hw_dlist[vc4_state->pw0_offset]);
+
+ /* Also update the CPU-side dlist copy, so that any later
+ * atomic updates that don't do a new modeset on our plane
+ * also use our updated address.
+ */
+ vc4_state->dlist[vc4_state->pw0_offset] = addr;
+}
+
static const struct drm_plane_helper_funcs vc4_plane_helper_funcs = {
.prepare_fb = NULL,
.cleanup_fb = NULL,