summaryrefslogtreecommitdiffstats
path: root/target/linux/bcm27xx/patches-5.15/950-0742-drm-vc4-hvs-Defer-dlist-slots-deallocation.patch
diff options
context:
space:
mode:
Diffstat (limited to 'target/linux/bcm27xx/patches-5.15/950-0742-drm-vc4-hvs-Defer-dlist-slots-deallocation.patch')
-rw-r--r--target/linux/bcm27xx/patches-5.15/950-0742-drm-vc4-hvs-Defer-dlist-slots-deallocation.patch409
1 files changed, 0 insertions, 409 deletions
diff --git a/target/linux/bcm27xx/patches-5.15/950-0742-drm-vc4-hvs-Defer-dlist-slots-deallocation.patch b/target/linux/bcm27xx/patches-5.15/950-0742-drm-vc4-hvs-Defer-dlist-slots-deallocation.patch
deleted file mode 100644
index 9fce5fec6b..0000000000
--- a/target/linux/bcm27xx/patches-5.15/950-0742-drm-vc4-hvs-Defer-dlist-slots-deallocation.patch
+++ /dev/null
@@ -1,409 +0,0 @@
-From a09f1129301f85dfb2f800c6dab2a5f8acac8f27 Mon Sep 17 00:00:00 2001
-From: Maxime Ripard <maxime@cerno.tech>
-Date: Thu, 16 Dec 2021 14:54:54 +0100
-Subject: [PATCH] drm/vc4: hvs: Defer dlist slots deallocation
-
-During normal operations, the cursor position update is done through an
-asynchronous plane update, which on the vc4 driver basically just
-modifies the right dlist word to move the plane to the new coordinates.
-
-However, when we have the overscan margins setup, we fall back to a
-regular commit when we are next to the edges. And since that commit
-happens to be on a cursor plane, it's considered a legacy cursor update
-by KMS.
-
-The main difference it makes is that it won't wait for its completion
-(ie, next vblank) before returning. This means if we have multiple
-commits happening in rapid succession, we can have several of them
-happening before the next vblank.
-
-In parallel, our dlist allocation is tied to a CRTC state, and each time
-we do a commit we end up with a new CRTC state, with the previous one
-being freed. This means that we free our previous dlist entry (but don't
-clear it though) every time a new one is being committed.
-
-Now, if we were to have two commits happening before the next vblank, we
-could end up freeing reusing the same dlist entries before the next
-vblank.
-
-Indeed, we would start from an initial state taking, for example, the
-dlist entries 10 to 20, then start a commit taking the entries 20 to 30
-and setting the dlist pointer to 20, and freeing the dlist entries 10 to
-20. However, since we haven't reach vblank yet, the HVS is still using
-the entries 10 to 20.
-
-If we were to make a new commit now, chances are the allocator are going
-to give the 10 to 20 entries back, and we would change their content to
-match the new state. If vblank hasn't happened yet, we just corrupted
-the active dlist entries.
-
-A first attempt to solve this was made by creating an intermediate dlist
-buffer to store the current (ie, as of the last commit) dlist content,
-that we would update each time the HVS is done with a frame. However, if
-the interrupt handler missed the vblank window, we would end up copying
-our intermediate dlist to the hardware one during the composition,
-essentially creating the same issue.
-
-Since making sure that our interrupt handler runs within a fixed,
-constrained, time window would require to make Linux a real-time kernel,
-this seems a bit out of scope.
-
-Instead, we can work around our original issue by keeping the dlist
-slots allocation longer. That way, we won't reuse a dlist slot while
-it's still in flight. In order to achieve this, instead of freeing the
-dlist slot when its associated CRTC state is destroyed, we'll queue it
-in a list.
-
-A naive implementation would free the buffers in that queue when we get
-our end of frame interrupt. However, there's still a race since, just
-like in the shadow dlist case, we don't control when the handler for
-that interrupt is going to run. Thus, we can end up with a commit adding
-an old dlist allocation to our queue during the window between our
-actual interrupt and when our handler will run. And since that buffer is
-still being used for the composition of the current frame, we can't free
-it right away, exposing us to the original bug.
-
-Fortunately for us, the hardware provides a frame counter that is
-increased each time the first line of a frame is being generated.
-Associating the frame counter the image is supposed to go away to the
-allocation, and then only deallocate buffers that have a counter below
-or equal to the one we see when the deallocation code should prevent the
-above race from occuring.
-
-Signed-off-by: Maxime Ripard <maxime@cerno.tech>
----
- drivers/gpu/drm/vc4/vc4_crtc.c | 10 +-
- drivers/gpu/drm/vc4/vc4_drv.h | 15 ++-
- drivers/gpu/drm/vc4/vc4_hvs.c | 181 ++++++++++++++++++++++++++++++---
- drivers/gpu/drm/vc4/vc4_regs.h | 1 +
- 4 files changed, 184 insertions(+), 23 deletions(-)
-
---- a/drivers/gpu/drm/vc4/vc4_crtc.c
-+++ b/drivers/gpu/drm/vc4/vc4_crtc.c
-@@ -943,14 +943,8 @@ void vc4_crtc_destroy_state(struct drm_c
- struct vc4_dev *vc4 = to_vc4_dev(crtc->dev);
- struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(state);
-
-- if (drm_mm_node_allocated(&vc4_state->mm)) {
-- unsigned long flags;
--
-- spin_lock_irqsave(&vc4->hvs->mm_lock, flags);
-- drm_mm_remove_node(&vc4_state->mm);
-- spin_unlock_irqrestore(&vc4->hvs->mm_lock, flags);
--
-- }
-+ vc4_hvs_mark_dlist_entry_stale(vc4->hvs, vc4_state->mm);
-+ vc4_state->mm = NULL;
-
- drm_atomic_helper_crtc_destroy_state(crtc, state);
- }
---- a/drivers/gpu/drm/vc4/vc4_drv.h
-+++ b/drivers/gpu/drm/vc4/vc4_drv.h
-@@ -335,6 +335,9 @@ struct vc4_hvs {
- struct drm_mm lbm_mm;
- spinlock_t mm_lock;
-
-+ struct list_head stale_dlist_entries;
-+ struct work_struct free_dlist_work;
-+
- struct drm_mm_node mitchell_netravali_filter;
-
- struct debugfs_regset32 regset;
-@@ -573,10 +576,16 @@ struct drm_connector *vc4_get_crtc_conne
- struct drm_encoder *vc4_get_crtc_encoder(struct drm_crtc *crtc,
- struct drm_crtc_state *state);
-
-+struct vc4_hvs_dlist_allocation {
-+ struct list_head node;
-+ struct drm_mm_node mm_node;
-+ unsigned int channel;
-+ u8 target_frame_count;
-+};
-+
- struct vc4_crtc_state {
- struct drm_crtc_state base;
-- /* Dlist area for this CRTC configuration. */
-- struct drm_mm_node mm;
-+ struct vc4_hvs_dlist_allocation *mm;
- bool txp_armed;
- unsigned int assigned_channel;
-
-@@ -968,6 +977,8 @@ extern struct platform_driver vc4_hvs_dr
- void vc4_hvs_stop_channel(struct vc4_hvs *hvs, unsigned int output);
- int vc4_hvs_get_fifo_from_output(struct vc4_hvs *hvs, unsigned int output);
- u8 vc4_hvs_get_fifo_frame_count(struct vc4_hvs *hvs, unsigned int fifo);
-+void vc4_hvs_mark_dlist_entry_stale(struct vc4_hvs *hvs,
-+ struct vc4_hvs_dlist_allocation *alloc);
- int vc4_hvs_atomic_check(struct drm_crtc *crtc, struct drm_atomic_state *state);
- void vc4_hvs_atomic_begin(struct drm_crtc *crtc, struct drm_atomic_state *state);
- void vc4_hvs_atomic_enable(struct drm_crtc *crtc, struct drm_atomic_state *state);
---- a/drivers/gpu/drm/vc4/vc4_hvs.c
-+++ b/drivers/gpu/drm/vc4/vc4_hvs.c
-@@ -315,6 +315,150 @@ static void vc4_hvs_update_gamma_lut(str
- vc4_hvs_lut_load(hvs, vc4_crtc);
- }
-
-+static void vc4_hvs_irq_enable_eof(const struct vc4_hvs *hvs,
-+ unsigned int channel)
-+{
-+ u32 irq_mask = hvs->hvs5 ?
-+ SCALER5_DISPCTRL_DSPEIEOF(channel) :
-+ SCALER_DISPCTRL_DSPEIEOF(channel);
-+
-+ HVS_WRITE(SCALER_DISPCTRL,
-+ HVS_READ(SCALER_DISPCTRL) | irq_mask);
-+}
-+
-+static void vc4_hvs_irq_clear_eof(const struct vc4_hvs *hvs,
-+ unsigned int channel)
-+{
-+ u32 irq_mask = hvs->hvs5 ?
-+ SCALER5_DISPCTRL_DSPEIEOF(channel) :
-+ SCALER_DISPCTRL_DSPEIEOF(channel);
-+
-+ HVS_WRITE(SCALER_DISPCTRL,
-+ HVS_READ(SCALER_DISPCTRL) & ~irq_mask);
-+}
-+
-+static struct vc4_hvs_dlist_allocation *
-+vc4_hvs_alloc_dlist_entry(struct vc4_hvs *hvs,
-+ unsigned int channel,
-+ size_t dlist_count)
-+{
-+ struct vc4_hvs_dlist_allocation *alloc;
-+ unsigned long flags;
-+ int ret;
-+
-+ if (channel == VC4_HVS_CHANNEL_DISABLED)
-+ return NULL;
-+
-+ alloc = kzalloc(sizeof(*alloc), GFP_KERNEL);
-+ if (!alloc)
-+ return ERR_PTR(-ENOMEM);
-+
-+ spin_lock_irqsave(&hvs->mm_lock, flags);
-+ ret = drm_mm_insert_node(&hvs->dlist_mm, &alloc->mm_node,
-+ dlist_count);
-+ spin_unlock_irqrestore(&hvs->mm_lock, flags);
-+ if (ret)
-+ return ERR_PTR(ret);
-+
-+ alloc->channel = channel;
-+
-+ return alloc;
-+}
-+
-+void vc4_hvs_mark_dlist_entry_stale(struct vc4_hvs *hvs,
-+ struct vc4_hvs_dlist_allocation *alloc)
-+{
-+ unsigned long flags;
-+ u8 frcnt;
-+
-+ if (!alloc)
-+ return;
-+
-+ if (!drm_mm_node_allocated(&alloc->mm_node))
-+ return;
-+
-+ frcnt = vc4_hvs_get_fifo_frame_count(hvs, alloc->channel);
-+ alloc->target_frame_count = (frcnt + 1) & ((1 << 6) - 1);
-+
-+ spin_lock_irqsave(&hvs->mm_lock, flags);
-+
-+ list_add_tail(&alloc->node, &hvs->stale_dlist_entries);
-+
-+ HVS_WRITE(SCALER_DISPSTAT, SCALER_DISPSTAT_EOF(alloc->channel));
-+ vc4_hvs_irq_enable_eof(hvs, alloc->channel);
-+
-+ spin_unlock_irqrestore(&hvs->mm_lock, flags);
-+}
-+
-+static void vc4_hvs_schedule_dlist_sweep(struct vc4_hvs *hvs,
-+ unsigned int channel)
-+{
-+ unsigned long flags;
-+
-+ spin_lock_irqsave(&hvs->mm_lock, flags);
-+
-+ if (!list_empty(&hvs->stale_dlist_entries))
-+ queue_work(system_unbound_wq, &hvs->free_dlist_work);
-+
-+ vc4_hvs_irq_clear_eof(hvs, channel);
-+
-+ spin_unlock_irqrestore(&hvs->mm_lock, flags);
-+}
-+
-+/*
-+ * Frame counts are essentially sequence numbers over 6 bits, and we
-+ * thus can use sequence number arithmetic and follow the RFC1982 to
-+ * implement proper comparison between them.
-+ */
-+static bool vc4_hvs_frcnt_lte(u8 cnt1, u8 cnt2)
-+{
-+ return (s8)((cnt1 << 2) - (cnt2 << 2)) <= 0;
-+}
-+
-+/*
-+ * Some atomic commits (legacy cursor updates, mostly) will not wait for
-+ * the next vblank and will just return once the commit has been pushed
-+ * to the hardware.
-+ *
-+ * On the hardware side, our HVS stores the planes parameters in its
-+ * context RAM, and will use part of the RAM to store data during the
-+ * frame rendering.
-+ *
-+ * This interacts badly if we get multiple commits before the next
-+ * vblank since we could end up overwriting the DLIST entries used by
-+ * previous commits if our dlist allocation reuses that entry. In such a
-+ * case, we would overwrite the data currently being used by the
-+ * hardware, resulting in a corrupted frame.
-+ *
-+ * In order to work around this, we'll queue the dlist entries in a list
-+ * once the associated CRTC state is destroyed. The HVS only allows us
-+ * to know which entry is being active, but not which one are no longer
-+ * being used, so in order to avoid freeing entries that are still used
-+ * by the hardware we add a guesstimate of the frame count where our
-+ * entry will no longer be used, and thus will only free those entries
-+ * when we will have reached that frame count.
-+ */
-+static void vc4_hvs_dlist_free_work(struct work_struct *work)
-+{
-+ struct vc4_hvs *hvs = container_of(work, struct vc4_hvs, free_dlist_work);
-+ struct vc4_hvs_dlist_allocation *cur, *next;
-+ unsigned long flags;
-+
-+ spin_lock_irqsave(&hvs->mm_lock, flags);
-+ list_for_each_entry_safe(cur, next, &hvs->stale_dlist_entries, node) {
-+ u8 frcnt;
-+
-+ frcnt = vc4_hvs_get_fifo_frame_count(hvs, cur->channel);
-+ if (!vc4_hvs_frcnt_lte(cur->target_frame_count, frcnt))
-+ continue;
-+
-+ list_del(&cur->node);
-+ drm_mm_remove_node(&cur->mm_node);
-+ kfree(cur);
-+ }
-+ spin_unlock_irqrestore(&hvs->mm_lock, flags);
-+}
-+
- u8 vc4_hvs_get_fifo_frame_count(struct vc4_hvs *hvs, unsigned int fifo)
- {
- u8 field = 0;
-@@ -588,13 +732,12 @@ int vc4_hvs_atomic_check(struct drm_crtc
- {
- struct drm_crtc_state *crtc_state = drm_atomic_get_new_crtc_state(state, crtc);
- struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc_state);
-+ struct vc4_hvs_dlist_allocation *alloc;
- struct drm_device *dev = crtc->dev;
- struct vc4_dev *vc4 = to_vc4_dev(dev);
- struct drm_plane *plane;
-- unsigned long flags;
- const struct drm_plane_state *plane_state;
- u32 dlist_count = 0;
-- int ret;
-
- /* The pixelvalve can only feed one encoder (and encoders are
- * 1:1 with connectors.)
-@@ -607,12 +750,11 @@ int vc4_hvs_atomic_check(struct drm_crtc
-
- dlist_count++; /* Account for SCALER_CTL0_END. */
-
-- spin_lock_irqsave(&vc4->hvs->mm_lock, flags);
-- ret = drm_mm_insert_node(&vc4->hvs->dlist_mm, &vc4_state->mm,
-- dlist_count);
-- spin_unlock_irqrestore(&vc4->hvs->mm_lock, flags);
-- if (ret)
-- return ret;
-+ alloc = vc4_hvs_alloc_dlist_entry(vc4->hvs, vc4_state->assigned_channel, dlist_count);
-+ if (IS_ERR(alloc))
-+ return PTR_ERR(alloc);
-+
-+ vc4_state->mm = alloc;
-
- return vc4_hvs_gamma_check(crtc, state);
- }
-@@ -624,8 +766,9 @@ static void vc4_hvs_install_dlist(struct
- struct vc4_hvs *hvs = vc4->hvs;
- struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc->state);
-
-+ WARN_ON(!vc4_state->mm);
- HVS_WRITE(SCALER_DISPLISTX(vc4_state->assigned_channel),
-- vc4_state->mm.start);
-+ vc4_state->mm->mm_node.start);
- }
-
- static void vc4_hvs_update_dlist(struct drm_crtc *crtc)
-@@ -650,8 +793,10 @@ static void vc4_hvs_update_dlist(struct
- spin_unlock_irqrestore(&dev->event_lock, flags);
- }
-
-+ WARN_ON(!vc4_state->mm);
-+
- spin_lock_irqsave(&vc4_crtc->irq_lock, flags);
-- vc4_crtc->current_dlist = vc4_state->mm.start;
-+ vc4_crtc->current_dlist = vc4_state->mm->mm_node.start;
- spin_unlock_irqrestore(&vc4_crtc->irq_lock, flags);
- }
-
-@@ -708,8 +853,7 @@ void vc4_hvs_atomic_flush(struct drm_crt
- struct vc4_plane_state *vc4_plane_state;
- bool debug_dump_regs = false;
- bool enable_bg_fill = false;
-- u32 __iomem *dlist_start = vc4->hvs->dlist + vc4_state->mm.start;
-- u32 __iomem *dlist_next = dlist_start;
-+ u32 __iomem *dlist_start, *dlist_next;
-
- if (vc4_state->assigned_channel == VC4_HVS_CHANNEL_DISABLED)
- return;
-@@ -719,6 +863,9 @@ void vc4_hvs_atomic_flush(struct drm_crt
- vc4_hvs_dump_state(hvs);
- }
-
-+ dlist_start = vc4->hvs->dlist + vc4_state->mm->mm_node.start;
-+ dlist_next = dlist_start;
-+
- /* Copy all the active planes' dlist contents to the hardware dlist. */
- drm_atomic_crtc_for_each_plane(plane, crtc) {
- /* Is this the first active plane? */
-@@ -741,7 +888,8 @@ void vc4_hvs_atomic_flush(struct drm_crt
- writel(SCALER_CTL0_END, dlist_next);
- dlist_next++;
-
-- WARN_ON_ONCE(dlist_next - dlist_start != vc4_state->mm.size);
-+ WARN_ON(!vc4_state->mm);
-+ WARN_ON_ONCE(dlist_next - dlist_start != vc4_state->mm->mm_node.size);
-
- if (enable_bg_fill)
- /* This sets a black background color fill, as is the case
-@@ -846,6 +994,11 @@ static irqreturn_t vc4_hvs_irq_handler(i
-
- irqret = IRQ_HANDLED;
- }
-+
-+ if (status & SCALER_DISPSTAT_EOF(channel)) {
-+ vc4_hvs_schedule_dlist_sweep(hvs, channel);
-+ irqret = IRQ_HANDLED;
-+ }
- }
-
- /* Clear every per-channel interrupt flag. */
-@@ -903,6 +1056,8 @@ static int vc4_hvs_bind(struct device *d
- hvs->dlist = hvs->regs + SCALER5_DLIST_START;
-
- spin_lock_init(&hvs->mm_lock);
-+ INIT_LIST_HEAD(&hvs->stale_dlist_entries);
-+ INIT_WORK(&hvs->free_dlist_work, vc4_hvs_dlist_free_work);
-
- /* Set up the HVS display list memory manager. We never
- * overwrite the setup from the bootloader (just 128b out of
---- a/drivers/gpu/drm/vc4/vc4_regs.h
-+++ b/drivers/gpu/drm/vc4/vc4_regs.h
-@@ -240,6 +240,7 @@
- # define SCALER_DISPCTRL_DSPEIEOLN(x) BIT(8 + ((x) * 2))
- /* Enables Display 0 EOF contribution to SCALER_DISPSTAT_IRQDISP0 */
- # define SCALER_DISPCTRL_DSPEIEOF(x) BIT(7 + ((x) * 2))
-+# define SCALER5_DISPCTRL_DSPEIEOF(x) BIT(7 + ((x) * 4))
-
- # define SCALER_DISPCTRL_SLVRDEIRQ BIT(6)
- # define SCALER_DISPCTRL_SLVWREIRQ BIT(5)