summaryrefslogtreecommitdiffstats
path: root/target/linux/bcm27xx/patches-6.1/950-0963-drm-vc4-hvs-Support-BCM2712-HVS.patch
diff options
context:
space:
mode:
Diffstat (limited to 'target/linux/bcm27xx/patches-6.1/950-0963-drm-vc4-hvs-Support-BCM2712-HVS.patch')
-rw-r--r--target/linux/bcm27xx/patches-6.1/950-0963-drm-vc4-hvs-Support-BCM2712-HVS.patch2139
1 files changed, 2139 insertions, 0 deletions
diff --git a/target/linux/bcm27xx/patches-6.1/950-0963-drm-vc4-hvs-Support-BCM2712-HVS.patch b/target/linux/bcm27xx/patches-6.1/950-0963-drm-vc4-hvs-Support-BCM2712-HVS.patch
new file mode 100644
index 0000000000..9659432294
--- /dev/null
+++ b/target/linux/bcm27xx/patches-6.1/950-0963-drm-vc4-hvs-Support-BCM2712-HVS.patch
@@ -0,0 +1,2139 @@
+From e84da235223d0209165183c430692dde5c69854c Mon Sep 17 00:00:00 2001
+From: Maxime Ripard <maxime@cerno.tech>
+Date: Fri, 17 Feb 2023 15:25:16 +0100
+Subject: [PATCH] drm/vc4: hvs: Support BCM2712 HVS
+
+The HVS found in the BCM2712, while having a similar role, is very
+different from the one found in the previous SoCs. Indeed, the register
+layout is fairly different, and the DLIST format is new as well.
+
+Let's introduce the needed functions to support the new HVS.
+
+Signed-off-by: Maxime Ripard <maxime@cerno.tech>
+---
+ drivers/gpu/drm/vc4/vc4_crtc.c | 47 ++-
+ drivers/gpu/drm/vc4/vc4_drv.c | 8 +-
+ drivers/gpu/drm/vc4/vc4_drv.h | 18 +
+ drivers/gpu/drm/vc4/vc4_hvs.c | 626 ++++++++++++++++++++++++++++---
+ drivers/gpu/drm/vc4/vc4_kms.c | 102 ++++-
+ drivers/gpu/drm/vc4/vc4_plane.c | 641 +++++++++++++++++++++++++++++++-
+ drivers/gpu/drm/vc4/vc4_regs.h | 181 +++++++++
+ 7 files changed, 1540 insertions(+), 83 deletions(-)
+
+--- a/drivers/gpu/drm/vc4/vc4_crtc.c
++++ b/drivers/gpu/drm/vc4/vc4_crtc.c
+@@ -82,13 +82,22 @@ static unsigned int
+ vc4_crtc_get_cob_allocation(struct vc4_dev *vc4, unsigned int channel)
+ {
+ struct vc4_hvs *hvs = vc4->hvs;
+- u32 dispbase = HVS_READ(SCALER_DISPBASEX(channel));
++ u32 dispbase, top, base;
++
+ /* Top/base are supposed to be 4-pixel aligned, but the
+ * Raspberry Pi firmware fills the low bits (which are
+ * presumably ignored).
+ */
+- u32 top = VC4_GET_FIELD(dispbase, SCALER_DISPBASEX_TOP) & ~3;
+- u32 base = VC4_GET_FIELD(dispbase, SCALER_DISPBASEX_BASE) & ~3;
++
++ if (vc4->gen >= VC4_GEN_6) {
++ dispbase = HVS_READ(SCALER6_DISPX_COB(channel));
++ top = VC4_GET_FIELD(dispbase, SCALER6_DISPX_COB_TOP) & ~3;
++ base = VC4_GET_FIELD(dispbase, SCALER6_DISPX_COB_BASE) & ~3;
++ } else {
++ dispbase = HVS_READ(SCALER_DISPBASEX(channel));
++ top = VC4_GET_FIELD(dispbase, SCALER_DISPBASEX_TOP) & ~3;
++ base = VC4_GET_FIELD(dispbase, SCALER_DISPBASEX_BASE) & ~3;
++ }
+
+ return top - base + 4;
+ }
+@@ -121,7 +130,10 @@ static bool vc4_crtc_get_scanout_positio
+ * Read vertical scanline which is currently composed for our
+ * pixelvalve by the HVS, and also the scaler status.
+ */
+- val = HVS_READ(SCALER_DISPSTATX(channel));
++ if (vc4->gen >= VC4_GEN_6)
++ val = HVS_READ(SCALER6_DISPX_STATUS(channel));
++ else
++ val = HVS_READ(SCALER_DISPSTATX(channel));
+
+ /* Get optional system timestamp after query. */
+ if (etime)
+@@ -130,7 +142,12 @@ static bool vc4_crtc_get_scanout_positio
+ /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */
+
+ /* Vertical position of hvs composed scanline. */
+- *vpos = VC4_GET_FIELD(val, SCALER_DISPSTATX_LINE);
++
++ if (vc4->gen >= VC4_GEN_6)
++ *vpos = VC4_GET_FIELD(val, SCALER6_DISPX_STATUS_YLINE);
++ else
++ *vpos = VC4_GET_FIELD(val, SCALER_DISPSTATX_LINE);
++
+ *hpos = 0;
+
+ if (mode->flags & DRM_MODE_FLAG_INTERLACE) {
+@@ -475,8 +492,10 @@ static void require_hvs_enabled(struct d
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ struct vc4_hvs *hvs = vc4->hvs;
+
+- WARN_ON_ONCE((HVS_READ(SCALER_DISPCTRL) & SCALER_DISPCTRL_ENABLE) !=
+- SCALER_DISPCTRL_ENABLE);
++ if (vc4->gen >= VC4_GEN_6)
++ WARN_ON_ONCE(!(HVS_READ(SCALER6_CONTROL) & SCALER6_CONTROL_HVS_EN));
++ else
++ WARN_ON_ONCE(!(HVS_READ(SCALER_DISPCTRL) & SCALER_DISPCTRL_ENABLE));
+ }
+
+ static int vc4_crtc_disable(struct drm_crtc *crtc,
+@@ -804,14 +823,21 @@ static void vc4_crtc_handle_page_flip(st
+ struct drm_device *dev = crtc->dev;
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ struct vc4_hvs *hvs = vc4->hvs;
++ unsigned int current_dlist;
+ u32 chan = vc4_crtc->current_hvs_channel;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dev->event_lock, flags);
+ spin_lock(&vc4_crtc->irq_lock);
++
++ if (vc4->gen >= VC4_GEN_6)
++ current_dlist = VC4_GET_FIELD(HVS_READ(SCALER6_DISPX_DL(chan)),
++ SCALER6_DISPX_DL_LACT);
++ else
++ current_dlist = HVS_READ(SCALER_DISPLACTX(chan));
++
+ if (vc4_crtc->event &&
+- (vc4_crtc->current_dlist == HVS_READ(SCALER_DISPLACTX(chan)) ||
+- vc4_crtc->feeds_txp)) {
++ (vc4_crtc->current_dlist == current_dlist || vc4_crtc->feeds_txp)) {
+ drm_crtc_send_vblank_event(crtc, vc4_crtc->event);
+ vc4_crtc->event = NULL;
+ drm_crtc_vblank_put(crtc);
+@@ -822,7 +848,8 @@ static void vc4_crtc_handle_page_flip(st
+ * the CRTC and encoder already reconfigured, leading to
+ * underruns. This can be seen when reconfiguring the CRTC.
+ */
+- vc4_hvs_unmask_underrun(hvs, chan);
++ if (vc4->gen < VC4_GEN_6)
++ vc4_hvs_unmask_underrun(hvs, chan);
+ }
+ spin_unlock(&vc4_crtc->irq_lock);
+ spin_unlock_irqrestore(&dev->event_lock, flags);
+--- a/drivers/gpu/drm/vc4/vc4_drv.c
++++ b/drivers/gpu/drm/vc4/vc4_drv.c
+@@ -277,6 +277,7 @@ static const struct of_device_id vc4_dma
+ { .compatible = "brcm,bcm2711-hvs" },
+ { .compatible = "brcm,bcm2835-hvs" },
+ { .compatible = "brcm,bcm2711-hvs" },
++ { .compatible = "brcm,bcm2712-hvs" },
+ { .compatible = "raspberrypi,rpi-firmware-kms" },
+ { .compatible = "brcm,bcm2835-v3d" },
+ { .compatible = "brcm,cygnus-v3d" },
+@@ -308,8 +309,6 @@ static int vc4_drm_bind(struct device *d
+ enum vc4_gen gen;
+ int ret = 0;
+
+- dev->coherent_dma_mask = DMA_BIT_MASK(32);
+-
+ if (of_device_is_compatible(dev->of_node, "brcm,bcm2712-vc6"))
+ gen = VC4_GEN_6;
+ else if (of_device_is_compatible(dev->of_node, "brcm,bcm2711-vc5"))
+@@ -322,6 +321,11 @@ static int vc4_drm_bind(struct device *d
+ else
+ driver = &vc4_drm_driver;
+
++ if (gen >= VC4_GEN_6)
++ dma_set_mask_and_coherent(dev, DMA_BIT_MASK(36));
++ else
++ dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
++
+ node = of_find_matching_node_and_match(NULL, vc4_dma_range_matches,
+ NULL);
+ if (node) {
+--- a/drivers/gpu/drm/vc4/vc4_drv.h
++++ b/drivers/gpu/drm/vc4/vc4_drv.h
+@@ -345,8 +345,10 @@ struct vc4_hvs {
+ unsigned int dlist_mem_size;
+
+ struct clk *core_clk;
++ struct clk *disp_clk;
+
+ struct {
++ unsigned int desc;
+ unsigned int enabled: 1;
+ } eof_irq[HVS_NUM_CHANNELS];
+
+@@ -358,6 +360,11 @@ struct vc4_hvs {
+ struct drm_mm dlist_mm;
+ /* Memory manager for the LBM memory used by HVS scaling. */
+ struct drm_mm lbm_mm;
++
++ /* Memory manager for the UPM memory used for prefetching. */
++ struct drm_mm upm_mm;
++ struct ida upm_handles;
++
+ spinlock_t mm_lock;
+
+ struct list_head stale_dlist_entries;
+@@ -382,6 +389,8 @@ struct vc4_hvs {
+ bool vc5_hdmi_enable_4096by2160;
+ };
+
++#define HVS_UBM_WORD_SIZE 256
++
+ struct vc4_hvs_state {
+ struct drm_private_state base;
+ unsigned long core_clock_rate;
+@@ -456,6 +465,15 @@ struct vc4_plane_state {
+ /* Our allocation in LBM for temporary storage during scaling. */
+ struct drm_mm_node lbm;
+
++ /* Our allocation in UPM for prefetching. */
++ struct drm_mm_node upm[DRM_FORMAT_MAX_PLANES];
++
++ /* The Unified Pre-Fetcher Handle */
++ unsigned int upm_handle[DRM_FORMAT_MAX_PLANES];
++
++ /* Number of lines to pre-fetch */
++ unsigned int upm_buffer_lines;
++
+ /* Set when the plane has per-pixel alpha content or does not cover
+ * the entire screen. This is a hint to the CRTC that it might need
+ * to enable background color fill.
+--- a/drivers/gpu/drm/vc4/vc4_hvs.c
++++ b/drivers/gpu/drm/vc4/vc4_hvs.c
+@@ -67,6 +67,80 @@ static const struct debugfs_reg32 vc4_hv
+ VC4_REG32(SCALER_OLEDCOEF2),
+ };
+
++static const struct debugfs_reg32 vc6_hvs_regs[] = {
++ VC4_REG32(SCALER6_VERSION),
++ VC4_REG32(SCALER6_CXM_SIZE),
++ VC4_REG32(SCALER6_LBM_SIZE),
++ VC4_REG32(SCALER6_UBM_SIZE),
++ VC4_REG32(SCALER6_COBA_SIZE),
++ VC4_REG32(SCALER6_COB_SIZE),
++ VC4_REG32(SCALER6_CONTROL),
++ VC4_REG32(SCALER6_FETCHER_STATUS),
++ VC4_REG32(SCALER6_FETCH_STATUS),
++ VC4_REG32(SCALER6_HANDLE_ERROR),
++ VC4_REG32(SCALER6_DISP0_CTRL0),
++ VC4_REG32(SCALER6_DISP0_CTRL1),
++ VC4_REG32(SCALER6_DISP0_BGND),
++ VC4_REG32(SCALER6_DISP0_LPTRS),
++ VC4_REG32(SCALER6_DISP0_COB),
++ VC4_REG32(SCALER6_DISP0_STATUS),
++ VC4_REG32(SCALER6_DISP0_DL),
++ VC4_REG32(SCALER6_DISP0_RUN),
++ VC4_REG32(SCALER6_DISP1_CTRL0),
++ VC4_REG32(SCALER6_DISP1_CTRL1),
++ VC4_REG32(SCALER6_DISP1_BGND),
++ VC4_REG32(SCALER6_DISP1_LPTRS),
++ VC4_REG32(SCALER6_DISP1_COB),
++ VC4_REG32(SCALER6_DISP1_STATUS),
++ VC4_REG32(SCALER6_DISP1_DL),
++ VC4_REG32(SCALER6_DISP1_RUN),
++ VC4_REG32(SCALER6_DISP2_CTRL0),
++ VC4_REG32(SCALER6_DISP2_CTRL1),
++ VC4_REG32(SCALER6_DISP2_BGND),
++ VC4_REG32(SCALER6_DISP2_LPTRS),
++ VC4_REG32(SCALER6_DISP2_COB),
++ VC4_REG32(SCALER6_DISP2_STATUS),
++ VC4_REG32(SCALER6_DISP2_DL),
++ VC4_REG32(SCALER6_DISP2_RUN),
++ VC4_REG32(SCALER6_EOLN),
++ VC4_REG32(SCALER6_DL_STATUS),
++ VC4_REG32(SCALER6_BFG_MISC),
++ VC4_REG32(SCALER6_QOS0),
++ VC4_REG32(SCALER6_PROF0),
++ VC4_REG32(SCALER6_QOS1),
++ VC4_REG32(SCALER6_PROF1),
++ VC4_REG32(SCALER6_QOS2),
++ VC4_REG32(SCALER6_PROF2),
++ VC4_REG32(SCALER6_PRI_MAP0),
++ VC4_REG32(SCALER6_PRI_MAP1),
++ VC4_REG32(SCALER6_HISTCTRL),
++ VC4_REG32(SCALER6_HISTBIN0),
++ VC4_REG32(SCALER6_HISTBIN1),
++ VC4_REG32(SCALER6_HISTBIN2),
++ VC4_REG32(SCALER6_HISTBIN3),
++ VC4_REG32(SCALER6_HISTBIN4),
++ VC4_REG32(SCALER6_HISTBIN5),
++ VC4_REG32(SCALER6_HISTBIN6),
++ VC4_REG32(SCALER6_HISTBIN7),
++ VC4_REG32(SCALER6_HDR_CFG_REMAP),
++ VC4_REG32(SCALER6_COL_SPACE),
++ VC4_REG32(SCALER6_HVS_ID),
++ VC4_REG32(SCALER6_CFC1),
++ VC4_REG32(SCALER6_DISP_UPM_ISO0),
++ VC4_REG32(SCALER6_DISP_UPM_ISO1),
++ VC4_REG32(SCALER6_DISP_UPM_ISO2),
++ VC4_REG32(SCALER6_DISP_LBM_ISO0),
++ VC4_REG32(SCALER6_DISP_LBM_ISO1),
++ VC4_REG32(SCALER6_DISP_LBM_ISO2),
++ VC4_REG32(SCALER6_DISP_COB_ISO0),
++ VC4_REG32(SCALER6_DISP_COB_ISO1),
++ VC4_REG32(SCALER6_DISP_COB_ISO2),
++ VC4_REG32(SCALER6_BAD_COB),
++ VC4_REG32(SCALER6_BAD_LBM),
++ VC4_REG32(SCALER6_BAD_UPM),
++ VC4_REG32(SCALER6_BAD_AXI),
++};
++
+ void vc4_hvs_dump_state(struct vc4_hvs *hvs)
+ {
+ struct drm_device *drm = &hvs->vc4->base;
+@@ -145,6 +219,55 @@ static int vc4_hvs_debugfs_dlist(struct
+ return 0;
+ }
+
++static int vc6_hvs_debugfs_dlist(struct seq_file *m, void *data)
++{
++ struct drm_info_node *node = m->private;
++ struct drm_device *dev = node->minor->dev;
++ struct vc4_dev *vc4 = to_vc4_dev(dev);
++ struct vc4_hvs *hvs = vc4->hvs;
++ struct drm_printer p = drm_seq_file_printer(m);
++ unsigned int dlist_mem_size = hvs->dlist_mem_size;
++ unsigned int next_entry_start;
++ unsigned int i;
++
++ for (i = 0; i < SCALER_CHANNELS_COUNT; i++) {
++ unsigned int active_dlist, dispstat;
++ unsigned int j;
++
++ dispstat = VC4_GET_FIELD(HVS_READ(SCALER6_DISPX_STATUS(i)),
++ SCALER6_DISPX_STATUS_MODE);
++ if (dispstat == SCALER6_DISPX_STATUS_MODE_DISABLED ||
++ dispstat == SCALER6_DISPX_STATUS_MODE_EOF) {
++ drm_printf(&p, "HVS chan %u disabled\n", i);
++ continue;
++ }
++
++ drm_printf(&p, "HVS chan %u:\n", i);
++
++ active_dlist = VC4_GET_FIELD(HVS_READ(SCALER6_DISPX_DL(i)),
++ SCALER6_DISPX_DL_LACT);
++ next_entry_start = 0;
++
++ for (j = active_dlist; j < dlist_mem_size; j++) {
++ u32 dlist_word;
++
++ dlist_word = readl((u32 __iomem *)vc4->hvs->dlist + j);
++ drm_printf(&p, "dlist: %02d: 0x%08x\n", j,
++ dlist_word);
++ if (!next_entry_start ||
++ next_entry_start == j) {
++ if (dlist_word & SCALER_CTL0_END)
++ break;
++ next_entry_start = j +
++ VC4_GET_FIELD(dlist_word,
++ SCALER_CTL0_SIZE);
++ }
++ }
++ }
++
++ return 0;
++}
++
+ static int vc5_hvs_debugfs_gamma(struct seq_file *m, void *data)
+ {
+ struct drm_info_node *node = m->private;
+@@ -435,6 +558,10 @@ static void vc4_hvs_irq_enable_eof(struc
+ SCALER5_DISPCTRL_DSPEIEOF(channel));
+ break;
+
++ case VC4_GEN_6:
++ enable_irq(hvs->eof_irq[channel].desc);
++ break;
++
+ default:
+ break;
+ }
+@@ -463,6 +590,10 @@ static void vc4_hvs_irq_clear_eof(struct
+ ~SCALER5_DISPCTRL_DSPEIEOF(channel));
+ break;
+
++ case VC4_GEN_6:
++ disable_irq_nosync(hvs->eof_irq[channel].desc);
++ break;
++
+ default:
+ break;
+ }
+@@ -622,26 +753,32 @@ static void vc4_hvs_dlist_free_work(stru
+
+ u8 vc4_hvs_get_fifo_frame_count(struct vc4_hvs *hvs, unsigned int fifo)
+ {
+- struct drm_device *drm = &hvs->vc4->base;
++ struct vc4_dev *vc4 = hvs->vc4;
++ struct drm_device *drm = &vc4->base;
+ u8 field = 0;
+ int idx;
+
+ if (!drm_dev_enter(drm, &idx))
+ return 0;
+
+- switch (fifo) {
+- case 0:
+- field = VC4_GET_FIELD(HVS_READ(SCALER_DISPSTAT1),
+- SCALER_DISPSTAT1_FRCNT0);
+- break;
+- case 1:
+- field = VC4_GET_FIELD(HVS_READ(SCALER_DISPSTAT1),
+- SCALER_DISPSTAT1_FRCNT1);
+- break;
+- case 2:
+- field = VC4_GET_FIELD(HVS_READ(SCALER_DISPSTAT2),
+- SCALER_DISPSTAT2_FRCNT2);
+- break;
++ if (vc4->gen >= VC4_GEN_6) {
++ field = VC4_GET_FIELD(HVS_READ(SCALER6_DISPX_STATUS(fifo)),
++ SCALER6_DISPX_STATUS_FRCNT);
++ } else {
++ switch (fifo) {
++ case 0:
++ field = VC4_GET_FIELD(HVS_READ(SCALER_DISPSTAT1),
++ SCALER_DISPSTAT1_FRCNT0);
++ break;
++ case 1:
++ field = VC4_GET_FIELD(HVS_READ(SCALER_DISPSTAT1),
++ SCALER_DISPSTAT1_FRCNT1);
++ break;
++ case 2:
++ field = VC4_GET_FIELD(HVS_READ(SCALER_DISPSTAT2),
++ SCALER_DISPSTAT2_FRCNT2);
++ break;
++ }
+ }
+
+ drm_dev_exit(idx);
+@@ -708,6 +845,23 @@ int vc4_hvs_get_fifo_from_output(struct
+ default:
+ return -EPIPE;
+ }
++
++ case VC4_GEN_6:
++ switch (output) {
++ case 0:
++ return 0;
++
++ case 2:
++ return 2;
++
++ case 1:
++ case 3:
++ case 4:
++ return 1;
++
++ default:
++ return -EPIPE;
++ }
+ }
+
+ return -EPIPE;
+@@ -782,7 +936,41 @@ static int vc4_hvs_init_channel(struct v
+ return 0;
+ }
+
+-void vc4_hvs_stop_channel(struct vc4_hvs *hvs, unsigned int chan)
++static int vc6_hvs_init_channel(struct vc4_hvs *hvs, struct drm_crtc *crtc,
++ struct drm_display_mode *mode, bool oneshot)
++{
++ struct vc4_dev *vc4 = hvs->vc4;
++ struct drm_device *drm = &vc4->base;
++ struct vc4_crtc_state *vc4_crtc_state = to_vc4_crtc_state(crtc->state);
++ unsigned int chan = vc4_crtc_state->assigned_channel;
++ bool interlace = mode->flags & DRM_MODE_FLAG_INTERLACE;
++ u32 disp_ctrl1;
++ int idx;
++
++ if (!drm_dev_enter(drm, &idx))
++ return -ENODEV;
++
++ HVS_WRITE(SCALER6_DISPX_CTRL0(chan), SCALER6_DISPX_CTRL0_RESET);
++
++ disp_ctrl1 = HVS_READ(SCALER6_DISPX_CTRL1(chan));
++ disp_ctrl1 &= ~SCALER6_DISPX_CTRL1_INTLACE;
++ HVS_WRITE(SCALER6_DISPX_CTRL1(chan),
++ disp_ctrl1 | (interlace ? SCALER6_DISPX_CTRL1_INTLACE : 0));
++
++ HVS_WRITE(SCALER6_DISPX_CTRL0(chan),
++ SCALER6_DISPX_CTRL0_ENB |
++ VC4_SET_FIELD(mode->hdisplay - 1,
++ SCALER6_DISPX_CTRL0_FWIDTH) |
++ (oneshot ? SCALER6_DISPX_CTRL0_ONESHOT : 0) |
++ VC4_SET_FIELD(mode->vdisplay - 1,
++ SCALER6_DISPX_CTRL0_LINES));
++
++ drm_dev_exit(idx);
++
++ return 0;
++}
++
++static void __vc4_hvs_stop_channel(struct vc4_hvs *hvs, unsigned int chan)
+ {
+ struct drm_device *drm = &hvs->vc4->base;
+ int idx;
+@@ -813,6 +1001,42 @@ out:
+ drm_dev_exit(idx);
+ }
+
++static void __vc6_hvs_stop_channel(struct vc4_hvs *hvs, unsigned int chan)
++{
++ struct vc4_dev *vc4 = hvs->vc4;
++ struct drm_device *drm = &vc4->base;
++ int idx;
++
++ if (!drm_dev_enter(drm, &idx))
++ return;
++
++ if (HVS_READ(SCALER6_DISPX_CTRL0(chan)) & SCALER6_DISPX_CTRL0_ENB)
++ goto out;
++
++ HVS_WRITE(SCALER6_DISPX_CTRL0(chan),
++ HVS_READ(SCALER6_DISPX_CTRL0(chan)) | SCALER6_DISPX_CTRL0_RESET);
++
++ HVS_WRITE(SCALER6_DISPX_CTRL0(chan),
++ HVS_READ(SCALER6_DISPX_CTRL0(chan)) & ~SCALER6_DISPX_CTRL0_ENB);
++
++ WARN_ON_ONCE(VC4_GET_FIELD(HVS_READ(SCALER6_DISPX_STATUS(chan)),
++ SCALER6_DISPX_STATUS_MODE) !=
++ SCALER6_DISPX_STATUS_MODE_DISABLED);
++
++out:
++ drm_dev_exit(idx);
++}
++
++void vc4_hvs_stop_channel(struct vc4_hvs *hvs, unsigned int chan)
++{
++ struct vc4_dev *vc4 = hvs->vc4;
++
++ if (vc4->gen >= VC4_GEN_6)
++ __vc6_hvs_stop_channel(hvs, chan);
++ else
++ __vc4_hvs_stop_channel(hvs, chan);
++}
++
+ static int vc4_hvs_gamma_check(struct drm_crtc *crtc,
+ struct drm_atomic_state *state)
+ {
+@@ -907,8 +1131,14 @@ static void vc4_hvs_install_dlist(struct
+ return;
+
+ WARN_ON(!vc4_state->mm);
+- HVS_WRITE(SCALER_DISPLISTX(vc4_state->assigned_channel),
+- vc4_state->mm->mm_node.start);
++
++ if (vc4->gen >= VC4_GEN_6)
++ HVS_WRITE(SCALER6_DISPX_LPTRS(vc4_state->assigned_channel),
++ VC4_SET_FIELD(vc4_state->mm->mm_node.start,
++ SCALER6_DISPX_LPTRS_HEADE));
++ else
++ HVS_WRITE(SCALER_DISPLISTX(vc4_state->assigned_channel),
++ vc4_state->mm->mm_node.start);
+
+ drm_dev_exit(idx);
+ }
+@@ -965,7 +1195,11 @@ void vc4_hvs_atomic_enable(struct drm_cr
+
+ vc4_hvs_install_dlist(crtc);
+ vc4_hvs_update_dlist(crtc);
+- vc4_hvs_init_channel(vc4->hvs, crtc, mode, oneshot);
++
++ if (vc4->gen >= VC4_GEN_6)
++ vc6_hvs_init_channel(vc4->hvs, crtc, mode, oneshot);
++ else
++ vc4_hvs_init_channel(vc4->hvs, crtc, mode, oneshot);
+ }
+
+ void vc4_hvs_atomic_disable(struct drm_crtc *crtc,
+@@ -1052,13 +1286,28 @@ void vc4_hvs_atomic_flush(struct drm_crt
+ WARN_ON(!vc4_state->mm);
+ WARN_ON_ONCE(dlist_next - dlist_start != vc4_state->mm->mm_node.size);
+
+- if (enable_bg_fill)
++ if (enable_bg_fill) {
+ /* This sets a black background color fill, as is the case
+ * with other DRM drivers.
+ */
+- HVS_WRITE(SCALER_DISPBKGNDX(channel),
+- HVS_READ(SCALER_DISPBKGNDX(channel)) |
+- SCALER_DISPBKGND_FILL);
++ if (vc4->gen >= VC4_GEN_6)
++ HVS_WRITE(SCALER6_DISPX_CTRL1(channel),
++ HVS_READ(SCALER6_DISPX_CTRL1(channel)) |
++ SCALER6_DISPX_CTRL1_BGENB);
++ else
++ HVS_WRITE(SCALER_DISPBKGNDX(channel),
++ HVS_READ(SCALER_DISPBKGNDX(channel)) |
++ SCALER_DISPBKGND_FILL);
++ } else {
++ if (vc4->gen >= VC4_GEN_6)
++ HVS_WRITE(SCALER6_DISPX_CTRL1(channel),
++ HVS_READ(SCALER6_DISPX_CTRL1(channel)) &
++ ~SCALER6_DISPX_CTRL1_BGENB);
++ else
++ HVS_WRITE(SCALER_DISPBKGNDX(channel),
++ HVS_READ(SCALER_DISPBKGNDX(channel)) &
++ ~SCALER_DISPBKGND_FILL);
++ }
+
+ /* Only update DISPLIST if the CRTC was already running and is not
+ * being disabled.
+@@ -1210,6 +1459,27 @@ static irqreturn_t vc4_hvs_irq_handler(i
+ return irqret;
+ }
+
++static irqreturn_t vc6_hvs_eof_irq_handler(int irq, void *data)
++{
++ struct drm_device *dev = data;
++ struct vc4_dev *vc4 = to_vc4_dev(dev);
++ struct vc4_hvs *hvs = vc4->hvs;
++ unsigned int i;
++
++ for (i = 0; i < HVS_NUM_CHANNELS; i++) {
++ if (!hvs->eof_irq[i].enabled)
++ continue;
++
++ if (hvs->eof_irq[i].desc != irq)
++ continue;
++
++ vc4_hvs_schedule_dlist_sweep(hvs, i);
++ return IRQ_HANDLED;
++ }
++
++ return IRQ_NONE;
++}
++
+ int vc4_hvs_debugfs_init(struct drm_minor *minor)
+ {
+ struct drm_device *drm = minor->dev;
+@@ -1232,8 +1502,10 @@ int vc4_hvs_debugfs_init(struct drm_mino
+ NULL);
+ }
+
+- ret = vc4_debugfs_add_file(minor, "hvs_dlists",
+- vc4_hvs_debugfs_dlist, NULL);
++ if (vc4->gen >= VC4_GEN_6)
++ ret = vc4_debugfs_add_file(minor, "hvs_dlists", vc6_hvs_debugfs_dlist, NULL);
++ else
++ ret = vc4_debugfs_add_file(minor, "hvs_dlists", vc4_hvs_debugfs_dlist, NULL);
+ if (ret)
+ return ret;
+
+@@ -1256,6 +1528,9 @@ struct vc4_hvs *__vc4_hvs_alloc(struct v
+ {
+ struct drm_device *drm = &vc4->base;
+ struct vc4_hvs *hvs;
++ unsigned int dlist_start;
++ size_t dlist_size;
++ size_t lbm_size;
+
+ hvs = drmm_kzalloc(drm, sizeof(*hvs), GFP_KERNEL);
+ if (!hvs)
+@@ -1270,14 +1545,39 @@ struct vc4_hvs *__vc4_hvs_alloc(struct v
+ INIT_LIST_HEAD(&hvs->stale_dlist_entries);
+ INIT_WORK(&hvs->free_dlist_work, vc4_hvs_dlist_free_work);
+
+- /* Set up the HVS display list memory manager. We never
+- * overwrite the setup from the bootloader (just 128b out of
+- * our 16K), since we don't want to scramble the screen when
+- * transitioning from the firmware's boot setup to runtime.
+- */
+- drm_mm_init(&hvs->dlist_mm,
+- HVS_BOOTLOADER_DLIST_END,
+- (SCALER_DLIST_SIZE >> 2) - HVS_BOOTLOADER_DLIST_END);
++ switch (vc4->gen) {
++ case VC4_GEN_4:
++ case VC4_GEN_5:
++ /* Set up the HVS display list memory manager. We never
++ * overwrite the setup from the bootloader (just 128b
++ * out of our 16K), since we don't want to scramble the
++ * screen when transitioning from the firmware's boot
++ * setup to runtime.
++ */
++ dlist_start = HVS_BOOTLOADER_DLIST_END;
++ dlist_size = (SCALER_DLIST_SIZE >> 2) - HVS_BOOTLOADER_DLIST_END;
++ break;
++
++ case VC4_GEN_6:
++ dlist_start = HVS_BOOTLOADER_DLIST_END;
++
++ /*
++ * If we are running a test, it means that we can't
++ * access a register. Use a plausible size then.
++ */
++ if (!kunit_get_current_test())
++ dlist_size = HVS_READ(SCALER6_CXM_SIZE);
++ else
++ dlist_size = 4096;
++
++ break;
++
++ default:
++ drm_err(drm, "Unknown VC4 generation: %d", vc4->gen);
++ return ERR_PTR(-ENODEV);
++ }
++
++ drm_mm_init(&hvs->dlist_mm, dlist_start, dlist_size);
+
+ hvs->dlist_mem_size = dlist_size;
+
+@@ -1286,12 +1586,46 @@ struct vc4_hvs *__vc4_hvs_alloc(struct v
+ * between planes when they don't overlap on the screen, but
+ * for now we just allocate globally.
+ */
+- if (vc4->gen == VC4_GEN_4)
++
++ switch (vc4->gen) {
++ case VC4_GEN_4:
+ /* 48k words of 2x12-bit pixels */
+- drm_mm_init(&hvs->lbm_mm, 0, 48 * 1024);
+- else
++ lbm_size = 48 * SZ_1K;
++ break;
++
++ case VC4_GEN_5:
+ /* 60k words of 4x12-bit pixels */
+- drm_mm_init(&hvs->lbm_mm, 0, 60 * 1024);
++ lbm_size = 60 * SZ_1K;
++ break;
++
++ case VC4_GEN_6:
++ /*
++ * If we are running a test, it means that we can't
++ * access a register. Use a plausible size then.
++ */
++ lbm_size = 1024;
++ break;
++
++ default:
++ drm_err(drm, "Unknown VC4 generation: %d", vc4->gen);
++ return ERR_PTR(-ENODEV);
++ }
++
++ drm_mm_init(&hvs->lbm_mm, 0, lbm_size);
++
++ if (vc4->gen >= VC4_GEN_6) {
++ ida_init(&hvs->upm_handles);
++
++ /*
++ * NOTE: On BCM2712, the size can also be read through
++ * the SCALER_UBM_SIZE register. We would need to do a
++ * register access though, which we can't do with kunit
++ * that also uses this function to create its mock
++ * device.
++ */
++ drm_mm_init(&hvs->upm_mm, 0, 1024 * HVS_UBM_WORD_SIZE);
++ }
++
+
+ vc4->hvs = hvs;
+
+@@ -1388,10 +1722,124 @@ static int vc4_hvs_hw_init(struct vc4_hv
+ return 0;
+ }
+
++#define CFC1_N_NL_CSC_CTRL(x) (0xa000 + ((x) * 0x3000))
++#define CFC1_N_MA_CSC_COEFF_C00(x) (0xa008 + ((x) * 0x3000))
++#define CFC1_N_MA_CSC_COEFF_C01(x) (0xa00c + ((x) * 0x3000))
++#define CFC1_N_MA_CSC_COEFF_C02(x) (0xa010 + ((x) * 0x3000))
++#define CFC1_N_MA_CSC_COEFF_C03(x) (0xa014 + ((x) * 0x3000))
++#define CFC1_N_MA_CSC_COEFF_C04(x) (0xa018 + ((x) * 0x3000))
++#define CFC1_N_MA_CSC_COEFF_C10(x) (0xa01c + ((x) * 0x3000))
++#define CFC1_N_MA_CSC_COEFF_C11(x) (0xa020 + ((x) * 0x3000))
++#define CFC1_N_MA_CSC_COEFF_C12(x) (0xa024 + ((x) * 0x3000))
++#define CFC1_N_MA_CSC_COEFF_C13(x) (0xa028 + ((x) * 0x3000))
++#define CFC1_N_MA_CSC_COEFF_C14(x) (0xa02c + ((x) * 0x3000))
++#define CFC1_N_MA_CSC_COEFF_C20(x) (0xa030 + ((x) * 0x3000))
++#define CFC1_N_MA_CSC_COEFF_C21(x) (0xa034 + ((x) * 0x3000))
++#define CFC1_N_MA_CSC_COEFF_C22(x) (0xa038 + ((x) * 0x3000))
++#define CFC1_N_MA_CSC_COEFF_C23(x) (0xa03c + ((x) * 0x3000))
++#define CFC1_N_MA_CSC_COEFF_C24(x) (0xa040 + ((x) * 0x3000))
++
++/* 4 S2.22 multiplication factors, and 1 S9.15 addititive element for each of 3
++ * output components
++ */
++struct vc6_csc_coeff_entry {
++ u32 csc[3][5];
++};
++
++static const struct vc6_csc_coeff_entry csc_coeffs[2][3] = {
++ [DRM_COLOR_YCBCR_LIMITED_RANGE] = {
++ [DRM_COLOR_YCBCR_BT601] = {
++ .csc = {
++ { 0x004A8542, 0x0, 0x0066254A, 0x0, 0xFF908A0D },
++ { 0x004A8542, 0xFFE6ED5D, 0xFFCBF856, 0x0, 0x0043C9A3 },
++ { 0x004A8542, 0x00811A54, 0x0, 0x0, 0xFF759502 }
++ }
++ },
++ [DRM_COLOR_YCBCR_BT709] = {
++ .csc = {
++ { 0x004A8542, 0x0, 0x0072BC44, 0x0, 0xFF83F312 },
++ { 0x004A8542, 0xFFF25A22, 0xFFDDE4D0, 0x0, 0x00267064 },
++ { 0x004A8542, 0x00873197, 0x0, 0x0, 0xFF6F7DC0 }
++ }
++ },
++ [DRM_COLOR_YCBCR_BT2020] = {
++ .csc = {
++ { 0x004A8542, 0x0, 0x006B4A17, 0x0, 0xFF8B653F },
++ { 0x004A8542, 0xFFF402D9, 0xFFDDE4D0, 0x0, 0x0024C7AE },
++ { 0x004A8542, 0x008912CC, 0x0, 0x0, 0xFF6D9C8B }
++ }
++ }
++ },
++ [DRM_COLOR_YCBCR_FULL_RANGE] = {
++ [DRM_COLOR_YCBCR_BT601] = {
++ .csc = {
++ { 0x00400000, 0x0, 0x0059BA5E, 0x0, 0xFFA645A1 },
++ { 0x00400000, 0xFFE9F9AC, 0xFFD24B97, 0x0, 0x0043BABB },
++ { 0x00400000, 0x00716872, 0x0, 0x0, 0xFF8E978D }
++ }
++ },
++ [DRM_COLOR_YCBCR_BT709] = {
++ .csc = {
++ { 0x00400000, 0x0, 0x0064C985, 0x0, 0xFF9B367A },
++ { 0x00400000, 0xFFF402E1, 0xFFE20A40, 0x0, 0x0029F2DE },
++ { 0x00400000, 0x0076C226, 0x0, 0x0, 0xFF893DD9 }
++ }
++ },
++ [DRM_COLOR_YCBCR_BT2020] = {
++ .csc = {
++ { 0x00400000, 0x0, 0x005E3F14, 0x0, 0xFFA1C0EB },
++ { 0x00400000, 0xFFF577F6, 0xFFDB580F, 0x0, 0x002F2FFA },
++ { 0x00400000, 0x007868DB, 0x0, 0x0, 0xFF879724 }
++ }
++ }
++ }
++};
++
++static int vc6_hvs_hw_init(struct vc4_hvs *hvs)
++{
++ const struct vc6_csc_coeff_entry *coeffs;
++ unsigned int i;
++
++ HVS_WRITE(SCALER6_CONTROL,
++ SCALER6_CONTROL_HVS_EN |
++ VC4_SET_FIELD(8, SCALER6_CONTROL_PF_LINES) |
++ VC4_SET_FIELD(15, SCALER6_CONTROL_MAX_REQS));
++
++ /* Set HVS arbiter priority to max */
++ HVS_WRITE(SCALER6_PRI_MAP0, 0xffffffff);
++ HVS_WRITE(SCALER6_PRI_MAP1, 0xffffffff);
++
++ for (i = 0; i < 6; i++) {
++ coeffs = &csc_coeffs[i / 3][i % 3];
++
++ HVS_WRITE(CFC1_N_MA_CSC_COEFF_C00(i), coeffs->csc[0][0]);
++ HVS_WRITE(CFC1_N_MA_CSC_COEFF_C01(i), coeffs->csc[0][1]);
++ HVS_WRITE(CFC1_N_MA_CSC_COEFF_C02(i), coeffs->csc[0][2]);
++ HVS_WRITE(CFC1_N_MA_CSC_COEFF_C03(i), coeffs->csc[0][3]);
++ HVS_WRITE(CFC1_N_MA_CSC_COEFF_C04(i), coeffs->csc[0][4]);
++
++ HVS_WRITE(CFC1_N_MA_CSC_COEFF_C10(i), coeffs->csc[1][0]);
++ HVS_WRITE(CFC1_N_MA_CSC_COEFF_C11(i), coeffs->csc[1][1]);
++ HVS_WRITE(CFC1_N_MA_CSC_COEFF_C12(i), coeffs->csc[1][2]);
++ HVS_WRITE(CFC1_N_MA_CSC_COEFF_C13(i), coeffs->csc[1][3]);
++ HVS_WRITE(CFC1_N_MA_CSC_COEFF_C14(i), coeffs->csc[1][4]);
++
++ HVS_WRITE(CFC1_N_MA_CSC_COEFF_C20(i), coeffs->csc[2][0]);
++ HVS_WRITE(CFC1_N_MA_CSC_COEFF_C21(i), coeffs->csc[2][1]);
++ HVS_WRITE(CFC1_N_MA_CSC_COEFF_C22(i), coeffs->csc[2][2]);
++ HVS_WRITE(CFC1_N_MA_CSC_COEFF_C23(i), coeffs->csc[2][3]);
++ HVS_WRITE(CFC1_N_MA_CSC_COEFF_C24(i), coeffs->csc[2][4]);
++
++ HVS_WRITE(CFC1_N_NL_CSC_CTRL(i), BIT(15));
++ }
++
++ return 0;
++}
++
+ static int vc4_hvs_cob_init(struct vc4_hvs *hvs)
+ {
+ struct vc4_dev *vc4 = hvs->vc4;
+- u32 reg, top;
++ u32 reg, top, base;
+
+ /*
+ * Recompute Composite Output Buffer (COB) allocations for the
+@@ -1452,6 +1900,31 @@ static int vc4_hvs_cob_init(struct vc4_h
+ HVS_WRITE(SCALER_DISPBASE0, reg);
+ break;
+
++ case VC4_GEN_6:
++ #define VC6_COB_LINE_WIDTH 3840
++ #define VC6_COB_NUM_LINES 4
++ reg = 0;
++ top = 3840;
++
++ HVS_WRITE(SCALER6_DISP2_COB,
++ VC4_SET_FIELD(top, SCALER6_DISPX_COB_TOP) |
++ VC4_SET_FIELD(base, SCALER6_DISPX_COB_BASE));
++
++ base = top + 16;
++ top += VC6_COB_LINE_WIDTH * VC6_COB_NUM_LINES;
++
++ HVS_WRITE(SCALER6_DISP1_COB,
++ VC4_SET_FIELD(top, SCALER6_DISPX_COB_TOP) |
++ VC4_SET_FIELD(base, SCALER6_DISPX_COB_BASE));
++
++ base = top + 16;
++ top += VC6_COB_LINE_WIDTH * VC6_COB_NUM_LINES;
++
++ HVS_WRITE(SCALER6_DISP0_COB,
++ VC4_SET_FIELD(top, SCALER6_DISPX_COB_TOP) |
++ VC4_SET_FIELD(base, SCALER6_DISPX_COB_BASE));
++ break;
++
+ default:
+ return -EINVAL;
+ }
+@@ -1477,10 +1950,16 @@ static int vc4_hvs_bind(struct device *d
+ return PTR_ERR(hvs);
+
+ hvs->regset.base = hvs->regs;
+- hvs->regset.regs = vc4_hvs_regs;
+- hvs->regset.nregs = ARRAY_SIZE(vc4_hvs_regs);
+
+- if (vc4->gen == VC4_GEN_5) {
++ if (vc4->gen >= VC4_GEN_6) {
++ hvs->regset.regs = vc6_hvs_regs;
++ hvs->regset.nregs = ARRAY_SIZE(vc6_hvs_regs);
++ } else {
++ hvs->regset.regs = vc4_hvs_regs;
++ hvs->regset.nregs = ARRAY_SIZE(vc4_hvs_regs);
++ }
++
++ if (vc4->gen >= VC4_GEN_5) {
+ struct rpi_firmware *firmware;
+ struct device_node *node;
+ unsigned int max_rate;
+@@ -1494,12 +1973,20 @@ static int vc4_hvs_bind(struct device *d
+ if (!firmware)
+ return -EPROBE_DEFER;
+
+- hvs->core_clk = devm_clk_get(&pdev->dev, NULL);
++ hvs->core_clk = devm_clk_get(&pdev->dev,
++ (vc4->gen >= VC4_GEN_6) ? "core" : NULL);
+ if (IS_ERR(hvs->core_clk)) {
+ dev_err(&pdev->dev, "Couldn't get core clock\n");
+ return PTR_ERR(hvs->core_clk);
+ }
+
++ hvs->disp_clk = devm_clk_get(&pdev->dev,
++ (vc4->gen >= VC4_GEN_6) ? "disp" : NULL);
++ if (IS_ERR(hvs->disp_clk)) {
++ dev_err(&pdev->dev, "Couldn't get disp clock\n");
++ return PTR_ERR(hvs->disp_clk);
++ }
++
+ max_rate = rpi_firmware_clk_get_max_rate(firmware,
+ RPI_FIRMWARE_CORE_CLK_ID);
+ rpi_firmware_put(firmware);
+@@ -1516,14 +2003,51 @@ static int vc4_hvs_bind(struct device *d
+ dev_err(&pdev->dev, "Couldn't enable the core clock\n");
+ return ret;
+ }
++
++ ret = clk_prepare_enable(hvs->disp_clk);
++ if (ret) {
++ dev_err(&pdev->dev, "Couldn't enable the disp clock\n");
++ return ret;
++ }
+ }
+
+- if (vc4->gen == VC4_GEN_4)
+- hvs->dlist = hvs->regs + SCALER_DLIST_START;
+- else
++ if (vc4->gen >= VC4_GEN_6) {
++ unsigned int i;
++
++ for (i = 0; i < HVS_NUM_CHANNELS; i++) {
++ char irq_name[16];
++ int irq;
++
++ snprintf(irq_name, sizeof(irq_name), "ch%u-eof", i);
++
++ irq = platform_get_irq_byname(pdev, irq_name);
++ if (irq < 0) {
++ dev_err(&pdev->dev,
++ "Couldn't get %s interrupt: %d\n",
++ irq_name, irq);
++ return irq;
++ }
++
++ ret = devm_request_irq(&pdev->dev,
++ irq,
++ vc6_hvs_eof_irq_handler,
++ IRQF_NO_AUTOEN,
++ dev_name(&pdev->dev),
++ drm);
++
++ hvs->eof_irq[i].desc = irq;
++ }
++ }
++
++ if (vc4->gen >= VC4_GEN_5)
+ hvs->dlist = hvs->regs + SCALER5_DLIST_START;
++ else
++ hvs->dlist = hvs->regs + SCALER_DLIST_START;
+
+- ret = vc4_hvs_hw_init(hvs);
++ if (vc4->gen >= VC4_GEN_6)
++ ret = vc6_hvs_hw_init(hvs);
++ else
++ ret = vc4_hvs_hw_init(hvs);
+ if (ret)
+ return ret;
+
+@@ -1540,10 +2064,12 @@ static int vc4_hvs_bind(struct device *d
+ if (ret)
+ return ret;
+
+- ret = devm_request_irq(dev, platform_get_irq(pdev, 0),
+- vc4_hvs_irq_handler, 0, "vc4 hvs", drm);
+- if (ret)
+- return ret;
++ if (vc4->gen < VC4_GEN_6) {
++ ret = devm_request_irq(dev, platform_get_irq(pdev, 0),
++ vc4_hvs_irq_handler, 0, "vc4 hvs", drm);
++ if (ret)
++ return ret;
++ }
+
+ return 0;
+ }
+@@ -1568,6 +2094,7 @@ static void vc4_hvs_unbind(struct device
+ drm_mm_remove_node(node);
+ drm_mm_takedown(&vc4->hvs->lbm_mm);
+
++ clk_disable_unprepare(hvs->disp_clk);
+ clk_disable_unprepare(hvs->core_clk);
+
+ vc4->hvs = NULL;
+@@ -1591,6 +2118,7 @@ static int vc4_hvs_dev_remove(struct pla
+
+ static const struct of_device_id vc4_hvs_dt_match[] = {
+ { .compatible = "brcm,bcm2711-hvs" },
++ { .compatible = "brcm,bcm2712-hvs" },
+ { .compatible = "brcm,bcm2835-hvs" },
+ {}
+ };
+--- a/drivers/gpu/drm/vc4/vc4_kms.c
++++ b/drivers/gpu/drm/vc4/vc4_kms.c
+@@ -329,17 +329,59 @@ static void vc5_hvs_pv_muxing_commit(str
+ }
+ }
+
++static void vc6_hvs_pv_muxing_commit(struct vc4_dev *vc4,
++ struct drm_atomic_state *state)
++{
++ struct vc4_hvs *hvs = vc4->hvs;
++ struct drm_crtc_state *crtc_state;
++ struct drm_crtc *crtc;
++ unsigned int i;
++
++ WARN_ON_ONCE(vc4->gen != VC4_GEN_6);
++
++ for_each_new_crtc_in_state(state, crtc, crtc_state, i) {
++ struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc_state);
++ struct vc4_encoder *vc4_encoder;
++ struct drm_encoder *encoder;
++ unsigned char mux;
++ u32 reg;
++
++ if (!vc4_state->update_muxing)
++ continue;
++
++ if (vc4_state->assigned_channel != 1)
++ continue;
++
++ encoder = vc4_get_crtc_encoder(crtc, crtc_state);
++ vc4_encoder = to_vc4_encoder(encoder);
++ switch (vc4_encoder->type) {
++ case VC4_ENCODER_TYPE_HDMI1:
++ mux = 0;
++ break;
++
++ case VC4_ENCODER_TYPE_TXP:
++ mux = 2;
++ break;
++
++ default:
++ break;
++ }
++
++ reg = HVS_READ(SCALER6_CONTROL);
++ HVS_WRITE(SCALER6_CONTROL,
++ (reg & ~SCALER6_CONTROL_DSP1_TARGET_MASK) |
++ VC4_SET_FIELD(mux, SCALER6_CONTROL_DSP1_TARGET));
++ }
++}
++
+ static void vc4_atomic_commit_tail(struct drm_atomic_state *state)
+ {
+ struct drm_device *dev = state->dev;
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ struct vc4_hvs *hvs = vc4->hvs;
+- struct drm_crtc_state *new_crtc_state;
+ struct vc4_hvs_state *new_hvs_state;
+- struct drm_crtc *crtc;
+ struct vc4_hvs_state *old_hvs_state;
+ unsigned int channel;
+- int i;
+
+ old_hvs_state = vc4_hvs_get_old_global_state(state);
+ if (WARN_ON(IS_ERR(old_hvs_state)))
+@@ -349,14 +391,23 @@ static void vc4_atomic_commit_tail(struc
+ if (WARN_ON(IS_ERR(new_hvs_state)))
+ return;
+
+- for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {
+- struct vc4_crtc_state *vc4_crtc_state;
++ if (vc4->gen < VC4_GEN_6) {
++ struct drm_crtc_state *new_crtc_state;
++ struct drm_crtc *crtc;
++ int i;
++
++ for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {
++ struct vc4_crtc_state *vc4_crtc_state;
+
+- if (!new_crtc_state->commit || vc4->firmware_kms)
+- continue;
++ if (vc4->firmware_kms)
++ continue;
+
+- vc4_crtc_state = to_vc4_crtc_state(new_crtc_state);
+- vc4_hvs_mask_underrun(hvs, vc4_crtc_state->assigned_channel);
++ if (!new_crtc_state->commit)
++ continue;
++
++ vc4_crtc_state = to_vc4_crtc_state(new_crtc_state);
++ vc4_hvs_mask_underrun(hvs, vc4_crtc_state->assigned_channel);
++ }
+ }
+
+ for (channel = 0; channel < HVS_NUM_CHANNELS; channel++) {
+@@ -378,7 +429,7 @@ static void vc4_atomic_commit_tail(struc
+ old_hvs_state->fifo_state[channel].pending_commit = NULL;
+ }
+
+- if (vc4->gen == VC4_GEN_5 && !vc4->firmware_kms) {
++ if (vc4->gen >= VC4_GEN_5 && !vc4->firmware_kms) {
+ unsigned long state_rate = max(old_hvs_state->core_clock_rate,
+ new_hvs_state->core_clock_rate);
+ unsigned long core_rate = clamp_t(unsigned long, state_rate,
+@@ -391,17 +442,32 @@ static void vc4_atomic_commit_tail(struc
+ * modeset.
+ */
+ WARN_ON(clk_set_min_rate(hvs->core_clk, core_rate));
++ WARN_ON(clk_set_min_rate(hvs->disp_clk, core_rate));
+ }
+
+ drm_atomic_helper_commit_modeset_disables(dev, state);
+
+- vc4_ctm_commit(vc4, state);
++ if (vc4->gen <= VC4_GEN_5)
++ vc4_ctm_commit(vc4, state);
+
+ if (!vc4->firmware_kms) {
+- if (vc4->gen == VC4_GEN_5)
+- vc5_hvs_pv_muxing_commit(vc4, state);
+- else
++ switch (vc4->gen) {
++ case VC4_GEN_4:
+ vc4_hvs_pv_muxing_commit(vc4, state);
++ break;
++
++ case VC4_GEN_5:
++ vc5_hvs_pv_muxing_commit(vc4, state);
++ break;
++
++ case VC4_GEN_6:
++ vc6_hvs_pv_muxing_commit(vc4, state);
++ break;
++
++ default:
++ drm_err(dev, "Unknown VC4 generation: %d", vc4->gen);
++ break;
++ }
+ }
+
+ drm_atomic_helper_commit_planes(dev, state,
+@@ -417,7 +483,7 @@ static void vc4_atomic_commit_tail(struc
+
+ drm_atomic_helper_cleanup_planes(dev, state);
+
+- if (vc4->gen == VC4_GEN_5 && !vc4->firmware_kms) {
++ if (vc4->gen >= VC4_GEN_5 && !vc4->firmware_kms) {
+ unsigned long core_rate = min_t(unsigned long,
+ hvs->max_core_rate,
+ new_hvs_state->core_clock_rate);
+@@ -429,6 +495,7 @@ static void vc4_atomic_commit_tail(struc
+ * requirements.
+ */
+ WARN_ON(clk_set_min_rate(hvs->core_clk, core_rate));
++ WARN_ON(clk_set_min_rate(hvs->disp_clk, core_rate));
+
+ drm_dbg(dev, "Core clock actual rate: %lu Hz\n",
+ clk_get_rate(hvs->core_clk));
+@@ -1081,7 +1148,10 @@ int vc4_kms_load(struct drm_device *dev)
+ return ret;
+ }
+
+- if (vc4->gen == VC4_GEN_5) {
++ if (vc4->gen >= VC4_GEN_6) {
++ dev->mode_config.max_width = 8192;
++ dev->mode_config.max_height = 8192;
++ } else if (vc4->gen >= VC4_GEN_5) {
+ dev->mode_config.max_width = 7680;
+ dev->mode_config.max_height = 7680;
+ } else {
+--- a/drivers/gpu/drm/vc4/vc4_plane.c
++++ b/drivers/gpu/drm/vc4/vc4_plane.c
+@@ -279,6 +279,7 @@ static bool plane_enabled(struct drm_pla
+ static struct drm_plane_state *vc4_plane_duplicate_state(struct drm_plane *plane)
+ {
+ struct vc4_plane_state *vc4_state;
++ unsigned int i;
+
+ if (WARN_ON(!plane->state))
+ return NULL;
+@@ -288,6 +289,11 @@ static struct drm_plane_state *vc4_plane
+ return NULL;
+
+ memset(&vc4_state->lbm, 0, sizeof(vc4_state->lbm));
++ memset(&vc4_state->upm, 0, sizeof(vc4_state->upm));
++
++ for (i = 0; i < DRM_FORMAT_MAX_PLANES; i++)
++ vc4_state->upm_handle[i] = 0;
++
+ vc4_state->dlist_initialized = 0;
+
+ __drm_atomic_helper_plane_duplicate_state(plane, &vc4_state->base);
+@@ -310,14 +316,30 @@ static void vc4_plane_destroy_state(stru
+ struct drm_plane_state *state)
+ {
+ struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
++ struct vc4_hvs *hvs = vc4->hvs;
+ struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
++ unsigned int i;
+
+ if (drm_mm_node_allocated(&vc4_state->lbm)) {
+ unsigned long irqflags;
+
+- spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags);
++ spin_lock_irqsave(&hvs->mm_lock, irqflags);
+ drm_mm_remove_node(&vc4_state->lbm);
+- spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags);
++ spin_unlock_irqrestore(&hvs->mm_lock, irqflags);
++ }
++
++ for (i = 0; i < DRM_FORMAT_MAX_PLANES; i++) {
++ unsigned long irqflags;
++
++ if (!drm_mm_node_allocated(&vc4_state->upm[i]))
++ continue;
++
++ spin_lock_irqsave(&hvs->mm_lock, irqflags);
++ drm_mm_remove_node(&vc4_state->upm[i]);
++ spin_unlock_irqrestore(&hvs->mm_lock, irqflags);
++
++ if (vc4_state->upm_handle[i] > 0)
++ ida_free(&hvs->upm_handles, vc4_state->upm_handle[i]);
+ }
+
+ kfree(vc4_state->dlist);
+@@ -543,6 +565,11 @@ static void vc4_write_tpz(struct vc4_pla
+ recip = ~0 / scale;
+
+ vc4_dlist_write(vc4_state,
++ /*
++ * The BCM2712 is lacking BIT(31) compared to
++ * the previous generations, but we don't use
++ * it.
++ */
+ VC4_SET_FIELD(scale, SCALER_TPZ0_SCALE) |
+ VC4_SET_FIELD(0, SCALER_TPZ0_IPHASE));
+ vc4_dlist_write(vc4_state,
+@@ -590,10 +617,15 @@ static void vc4_write_ppf(struct vc4_pla
+ vc4_dlist_write(vc4_state,
+ SCALER_PPF_AGC |
+ VC4_SET_FIELD(scale, SCALER_PPF_SCALE) |
++ /*
++ * The register layout documentation is slightly
++ * different to setup the phase in the BCM2712,
++ * but they seem equivalent.
++ */
+ VC4_SET_FIELD(phase, SCALER_PPF_IPHASE));
+ }
+
+-static u32 vc4_lbm_size(struct drm_plane_state *state)
++static u32 __vc4_lbm_size(struct drm_plane_state *state)
+ {
+ struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
+ struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev);
+@@ -641,6 +673,131 @@ static u32 vc4_lbm_size(struct drm_plane
+ return lbm;
+ }
+
++static unsigned int vc4_lbm_words_per_component(const struct drm_plane_state *state,
++ unsigned int channel)
++{
++ struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
++
++ switch (vc4_state->y_scaling[channel]) {
++ case VC4_SCALING_PPF:
++ return 4;
++
++ case VC4_SCALING_TPZ:
++ return 2;
++
++ default:
++ return 0;
++ }
++}
++
++static unsigned int vc4_lbm_components(const struct drm_plane_state *state,
++ unsigned int channel)
++{
++ const struct drm_format_info *info = state->fb->format;
++ struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
++
++ if (vc4_state->y_scaling[channel] == VC4_SCALING_NONE)
++ return 0;
++
++ if (info->is_yuv)
++ return channel ? 2 : 1;
++
++ if (info->has_alpha)
++ return 4;
++
++ return 3;
++}
++
++static unsigned int vc4_lbm_channel_size(const struct drm_plane_state *state,
++ unsigned int channel)
++{
++ const struct drm_format_info *info = state->fb->format;
++ struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
++ unsigned int channels_scaled = 0;
++ unsigned int components, words, wpc;
++ unsigned int width, lines;
++ unsigned int i;
++
++ /* LBM is meant to use the smaller of source or dest width, but there
++ * is a issue with UV scaling that the size required for the second
++ * channel is based on the source width only.
++ */
++ if (info->hsub > 1 && channel == 1)
++ width = state->src_w >> 16;
++ else
++ width = min(state->src_w >> 16, state->crtc_w);
++ width = round_up(width / info->hsub, 4);
++
++ wpc = vc4_lbm_words_per_component(state, channel);
++ if (!wpc)
++ return 0;
++
++ components = vc4_lbm_components(state, channel);
++ if (!components)
++ return 0;
++
++ if (state->alpha != DRM_BLEND_ALPHA_OPAQUE)
++ components -= 1;
++
++ words = width * wpc * components;
++
++ lines = DIV_ROUND_UP(words, 128 / info->hsub);
++
++ for (i = 0; i < 2; i++)
++ if (vc4_state->y_scaling[channel] != VC4_SCALING_NONE)
++ channels_scaled++;
++
++ if (channels_scaled == 1)
++ lines = lines / 2;
++
++ return lines;
++}
++
++static unsigned int __vc6_lbm_size(const struct drm_plane_state *state)
++{
++ const struct drm_format_info *info = state->fb->format;
++
++ if (info->hsub > 1)
++ return max(vc4_lbm_channel_size(state, 0),
++ vc4_lbm_channel_size(state, 1));
++ else
++ return vc4_lbm_channel_size(state, 0);
++}
++
++u32 vc4_lbm_size(struct drm_plane_state *state)
++{
++ struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
++ struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev);
++
++ /* LBM is not needed when there's no vertical scaling. */
++ if (vc4_state->y_scaling[0] == VC4_SCALING_NONE &&
++ vc4_state->y_scaling[1] == VC4_SCALING_NONE)
++ return 0;
++
++ if (vc4->gen >= VC4_GEN_6)
++ return __vc6_lbm_size(state);
++ else
++ return __vc4_lbm_size(state);
++}
++
++static size_t vc6_upm_size(const struct drm_plane_state *state,
++ unsigned int plane)
++{
++ struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
++ unsigned int stride = state->fb->pitches[plane];
++
++ /*
++ * TODO: This only works for raster formats, and is sub-optimal
++ * for buffers with a stride aligned on 32 bytes.
++ */
++ unsigned int words_per_line = (stride + 62) / 32;
++ unsigned int fetch_region_size = words_per_line * 32;
++ unsigned int buffer_lines = 2 << vc4_state->upm_buffer_lines;
++ unsigned int buffer_size = fetch_region_size * buffer_lines;
++
++ return ALIGN(buffer_size, HVS_UBM_WORD_SIZE);
++}
++
+ static void vc4_write_scaling_parameters(struct drm_plane_state *state,
+ int channel)
+ {
+@@ -744,6 +901,10 @@ static int vc4_plane_allocate_lbm(struct
+ if (!lbm_size)
+ return 0;
+
++ /*
++ * NOTE: BCM2712 doesn't need to be aligned, since the size
++ * returned by vc4_lbm_size() is in words already.
++ */
+ if (vc4->gen == VC4_GEN_5)
+ lbm_size = ALIGN(lbm_size, 64);
+ else if (vc4->gen == VC4_GEN_4)
+@@ -781,6 +942,57 @@ static int vc4_plane_allocate_lbm(struct
+ return 0;
+ }
+
++static int vc6_plane_allocate_upm(struct drm_plane_state *state)
++{
++ const struct drm_format_info *info = state->fb->format;
++ struct drm_device *drm = state->plane->dev;
++ struct vc4_dev *vc4 = to_vc4_dev(drm);
++ struct vc4_hvs *hvs = vc4->hvs;
++ struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
++ unsigned int i;
++ int ret;
++
++ WARN_ON_ONCE(vc4->gen < VC4_GEN_6);
++
++ vc4_state->upm_buffer_lines = SCALER6_PTR0_UPM_BUFF_SIZE_2_LINES;
++
++ for (i = 0; i < info->num_planes; i++) {
++ unsigned long irqflags;
++ size_t upm_size;
++
++ upm_size = vc6_upm_size(state, i);
++ if (!upm_size)
++ return -EINVAL;
++
++ spin_lock_irqsave(&hvs->mm_lock, irqflags);
++ ret = drm_mm_insert_node_generic(&hvs->upm_mm,
++ &vc4_state->upm[i],
++ upm_size, HVS_UBM_WORD_SIZE,
++ 0, 0);
++ spin_unlock_irqrestore(&hvs->mm_lock, irqflags);
++ if (ret) {
++ drm_err(drm, "Failed to allocate UPM entry: %d\n", ret);
++ return ret;
++ }
++
++ ret = ida_alloc_range(&hvs->upm_handles, 1, 32, GFP_KERNEL);
++ if (ret < 0)
++ return ret;
++
++ vc4_state->upm_handle[i] = ret;
++
++ vc4_state->dlist[vc4_state->ptr0_offset[i]] |=
++ VC4_SET_FIELD(vc4_state->upm[i].start / HVS_UBM_WORD_SIZE,
++ SCALER6_PTR0_UPM_BASE) |
++ VC4_SET_FIELD(vc4_state->upm_handle[i] - 1,
++ SCALER6_PTR0_UPM_HANDLE) |
++ VC4_SET_FIELD(vc4_state->upm_buffer_lines,
++ SCALER6_PTR0_UPM_BUFF_SIZE);
++ }
++
++ return 0;
++}
++
+ /*
+ * The colorspace conversion matrices are held in 3 entries in the dlist.
+ * Create an array of them, with entries for each full and limited mode, and
+@@ -1355,6 +1567,413 @@ static int vc4_plane_mode_set(struct drm
+ return 0;
+ }
+
++static u32 vc6_plane_get_csc_mode(struct vc4_plane_state *vc4_state)
++{
++ struct drm_plane_state *state = &vc4_state->base;
++ u32 ret = 0;
++
++ if (vc4_state->is_yuv) {
++ enum drm_color_encoding color_encoding = state->color_encoding;
++ enum drm_color_range color_range = state->color_range;
++
++ ret |= SCALER6_CTL2_CSC_ENABLE;
++
++ /* CSC pre-loaded with:
++ * 0 = BT601 limited range
++ * 1 = BT709 limited range
++ * 2 = BT2020 limited range
++ * 3 = BT601 full range
++ * 4 = BT709 full range
++ * 5 = BT2020 full range
++ */
++ if (color_encoding > DRM_COLOR_YCBCR_BT2020)
++ color_encoding = DRM_COLOR_YCBCR_BT601;
++ if (color_range > DRM_COLOR_YCBCR_FULL_RANGE)
++ color_range = DRM_COLOR_YCBCR_LIMITED_RANGE;
++
++ ret |= VC4_SET_FIELD(color_encoding + (color_range * 3),
++ SCALER6_CTL2_BRCM_CFC_CONTROL);
++ }
++
++ return ret;
++}
++
++static int vc6_plane_mode_set(struct drm_plane *plane,
++ struct drm_plane_state *state)
++{
++ struct drm_device *drm = plane->dev;
++ struct vc4_dev *vc4 = to_vc4_dev(drm);
++ struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
++ struct drm_framebuffer *fb = state->fb;
++ const struct hvs_format *format = vc4_get_hvs_format(fb->format->format);
++ u64 base_format_mod = fourcc_mod_broadcom_mod(fb->modifier);
++ int num_planes = fb->format->num_planes;
++ u32 h_subsample = fb->format->hsub;
++ u32 v_subsample = fb->format->vsub;
++ bool mix_plane_alpha;
++ bool covers_screen;
++ u32 scl0, scl1, pitch0;
++ u32 tiling, src_x, src_y;
++ u32 width, height;
++ u32 hvs_format = format->hvs;
++ u32 offsets[3] = { 0 };
++ unsigned int rotation;
++ int ret, i;
++
++ if (vc4_state->dlist_initialized)
++ return 0;
++
++ ret = vc4_plane_setup_clipping_and_scaling(state);
++ if (ret)
++ return ret;
++
++ width = vc4_state->src_w[0] >> 16;
++ height = vc4_state->src_h[0] >> 16;
++
++ /* SCL1 is used for Cb/Cr scaling of planar formats. For RGB
++ * and 4:4:4, scl1 should be set to scl0 so both channels of
++ * the scaler do the same thing. For YUV, the Y plane needs
++ * to be put in channel 1 and Cb/Cr in channel 0, so we swap
++ * the scl fields here.
++ */
++ if (num_planes == 1) {
++ scl0 = vc4_get_scl_field(state, 0);
++ scl1 = scl0;
++ } else {
++ scl0 = vc4_get_scl_field(state, 1);
++ scl1 = vc4_get_scl_field(state, 0);
++ }
++
++ rotation = drm_rotation_simplify(state->rotation,
++ DRM_MODE_ROTATE_0 |
++ DRM_MODE_REFLECT_X |
++ DRM_MODE_REFLECT_Y);
++
++ /* We must point to the last line when Y reflection is enabled. */
++ src_y = vc4_state->src_y >> 16;
++ if (rotation & DRM_MODE_REFLECT_Y)
++ src_y += height - 1;
++
++ src_x = vc4_state->src_x >> 16;
++
++ switch (base_format_mod) {
++ case DRM_FORMAT_MOD_LINEAR:
++ tiling = SCALER6_CTL0_ADDR_MODE_LINEAR;
++
++ /* Adjust the base pointer to the first pixel to be scanned
++ * out.
++ */
++ for (i = 0; i < num_planes; i++) {
++ offsets[i] += src_y / (i ? v_subsample : 1) * fb->pitches[i];
++ offsets[i] += src_x / (i ? h_subsample : 1) * fb->format->cpp[i];
++ }
++
++ break;
++
++ case DRM_FORMAT_MOD_BROADCOM_SAND128:
++ case DRM_FORMAT_MOD_BROADCOM_SAND256: {
++ uint32_t param = fourcc_mod_broadcom_param(fb->modifier);
++ u32 components_per_word;
++ u32 starting_offset;
++ u32 fetch_count;
++
++ if (param > SCALER_TILE_HEIGHT_MASK) {
++ DRM_DEBUG_KMS("SAND height too large (%d)\n",
++ param);
++ return -EINVAL;
++ }
++
++ if (fb->format->format == DRM_FORMAT_P030) {
++ hvs_format = HVS_PIXEL_FORMAT_YCBCR_10BIT;
++ tiling = SCALER6_CTL0_ADDR_MODE_128B;
++ } else {
++ hvs_format = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE;
++
++ switch (base_format_mod) {
++ case DRM_FORMAT_MOD_BROADCOM_SAND128:
++ tiling = SCALER6_CTL0_ADDR_MODE_128B;
++ break;
++ case DRM_FORMAT_MOD_BROADCOM_SAND256:
++ tiling = SCALER6_CTL0_ADDR_MODE_256B;
++ break;
++ default:
++ return -EINVAL;
++ }
++ }
++
++ /* Adjust the base pointer to the first pixel to be scanned
++ * out.
++ *
++ * For P030, y_ptr [31:4] is the 128bit word for the start pixel
++ * y_ptr [3:0] is the pixel (0-11) contained within that 128bit
++ * word that should be taken as the first pixel.
++ * Ditto uv_ptr [31:4] vs [3:0], however [3:0] contains the
++ * element within the 128bit word, eg for pixel 3 the value
++ * should be 6.
++ */
++ for (i = 0; i < num_planes; i++) {
++ u32 tile_w, tile, x_off, pix_per_tile;
++
++ if (fb->format->format == DRM_FORMAT_P030) {
++ /*
++ * Spec says: bits [31:4] of the given address
++ * should point to the 128-bit word containing
++ * the desired starting pixel, and bits[3:0]
++ * should be between 0 and 11, indicating which
++ * of the 12-pixels in that 128-bit word is the
++ * first pixel to be used
++ */
++ u32 remaining_pixels = src_x % 96;
++ u32 aligned = remaining_pixels / 12;
++ u32 last_bits = remaining_pixels % 12;
++
++ x_off = aligned * 16 + last_bits;
++ tile_w = 128;
++ pix_per_tile = 96;
++ } else {
++ switch (base_format_mod) {
++ case DRM_FORMAT_MOD_BROADCOM_SAND128:
++ tile_w = 128;
++ break;
++ case DRM_FORMAT_MOD_BROADCOM_SAND256:
++ tile_w = 256;
++ break;
++ default:
++ return -EINVAL;
++ }
++ pix_per_tile = tile_w / fb->format->cpp[0];
++ x_off = (src_x % pix_per_tile) /
++ (i ? h_subsample : 1) *
++ fb->format->cpp[i];
++ }
++
++ tile = src_x / pix_per_tile;
++
++ offsets[i] += param * tile_w * tile;
++ offsets[i] += src_y / (i ? v_subsample : 1) * tile_w;
++ offsets[i] += x_off & ~(i ? 1 : 0);
++ }
++
++ components_per_word = fb->format->format == DRM_FORMAT_P030 ? 24 : 32;
++ starting_offset = src_x % components_per_word;
++ fetch_count = (width + starting_offset + components_per_word - 1) /
++ components_per_word;
++
++ pitch0 = VC4_SET_FIELD(param, SCALER6_PTR2_PITCH) |
++ VC4_SET_FIELD(fetch_count - 1, SCALER6_PTR2_FETCH_COUNT);
++ break;
++ }
++
++ default:
++ DRM_DEBUG_KMS("Unsupported FB tiling flag 0x%16llx",
++ (long long)fb->modifier);
++ return -EINVAL;
++ }
++
++ /* fetch an extra pixel if we don't actually line up with the left edge. */
++ if ((vc4_state->src_x & 0xffff) && vc4_state->src_x < (state->fb->width << 16))
++ width++;
++
++ /* same for the right side */
++ if (((vc4_state->src_x + vc4_state->src_w[0]) & 0xffff) &&
++ vc4_state->src_x + vc4_state->src_w[0] < (state->fb->width << 16))
++ width++;
++
++ /* now for the top */
++ if ((vc4_state->src_y & 0xffff) && vc4_state->src_y < (state->fb->height << 16))
++ height++;
++
++ /* and the bottom */
++ if (((vc4_state->src_y + vc4_state->src_h[0]) & 0xffff) &&
++ vc4_state->src_y + vc4_state->src_h[0] < (state->fb->height << 16))
++ height++;
++
++ /* for YUV444 hardware wants double the width, otherwise it doesn't
++ * fetch full width of chroma
++ */
++ if (format->drm == DRM_FORMAT_YUV444 || format->drm == DRM_FORMAT_YVU444)
++ width <<= 1;
++
++ /* Don't waste cycles mixing with plane alpha if the set alpha
++ * is opaque or there is no per-pixel alpha information.
++ * In any case we use the alpha property value as the fixed alpha.
++ */
++ mix_plane_alpha = state->alpha != DRM_BLEND_ALPHA_OPAQUE &&
++ fb->format->has_alpha;
++
++ /* Control Word 0: Scaling Configuration & Element Validity*/
++ vc4_dlist_write(vc4_state,
++ SCALER6_CTL0_VALID |
++ VC4_SET_FIELD(tiling, SCALER6_CTL0_ADDR_MODE) |
++ VC4_SET_FIELD(0, SCALER6_CTL0_ALPHA_MASK) |
++ (vc4_state->is_unity ? SCALER6_CTL0_UNITY : 0) |
++ VC4_SET_FIELD(format->pixel_order_hvs5, SCALER6_CTL0_ORDERRGBA) |
++ VC4_SET_FIELD(scl1, SCALER6_CTL0_SCL1_MODE) |
++ VC4_SET_FIELD(scl0, SCALER6_CTL0_SCL0_MODE) |
++ VC4_SET_FIELD(hvs_format, SCALER6_CTL0_PIXEL_FORMAT));
++
++ /* Position Word 0: Image Position */
++ vc4_state->pos0_offset = vc4_state->dlist_count;
++ vc4_dlist_write(vc4_state,
++ VC4_SET_FIELD(vc4_state->crtc_y, SCALER6_POS0_START_Y) |
++ (rotation & DRM_MODE_REFLECT_X ? SCALER6_POS0_HFLIP : 0) |
++ VC4_SET_FIELD(vc4_state->crtc_x, SCALER6_POS0_START_X));
++
++ /* Control Word 2: Alpha Value & CSC */
++ vc4_dlist_write(vc4_state,
++ vc6_plane_get_csc_mode(vc4_state) |
++ vc4_hvs5_get_alpha_blend_mode(state) |
++ (mix_plane_alpha ? SCALER6_CTL2_ALPHA_MIX : 0) |
++ VC4_SET_FIELD(state->alpha >> 4, SCALER5_CTL2_ALPHA));
++
++ /* Position Word 1: Scaled Image Dimensions */
++ if (!vc4_state->is_unity)
++ vc4_dlist_write(vc4_state,
++ VC4_SET_FIELD(vc4_state->crtc_h - 1,
++ SCALER6_POS1_SCL_LINES) |
++ VC4_SET_FIELD(vc4_state->crtc_w - 1,
++ SCALER6_POS1_SCL_WIDTH));
++
++ /* Position Word 2: Source Image Size */
++ vc4_state->pos2_offset = vc4_state->dlist_count;
++ vc4_dlist_write(vc4_state,
++ VC4_SET_FIELD(height - 1,
++ SCALER6_POS2_SRC_LINES) |
++ VC4_SET_FIELD(width - 1,
++ SCALER6_POS2_SRC_WIDTH));
++
++ /* Position Word 3: Context */
++ vc4_dlist_write(vc4_state, 0xc0c0c0c0);
++
++ /*
++ * TODO: This only covers Raster Scan Order planes
++ */
++ for (i = 0; i < num_planes; i++) {
++ dma_addr_t paddr = drm_fb_dma_get_gem_addr(fb, state, i);
++
++ paddr += offsets[i];
++
++ /* Pointer Word 0 */
++ vc4_state->ptr0_offset[i] = vc4_state->dlist_count;
++ vc4_dlist_write(vc4_state,
++ (rotation & DRM_MODE_REFLECT_Y ? SCALER6_PTR0_VFLIP : 0) |
++ /*
++ * The UPM buffer will be allocated in
++ * vc6_plane_allocate_upm().
++ */
++ VC4_SET_FIELD(upper_32_bits(paddr) & 0xf,
++ SCALER6_PTR0_UPPER_ADDR));
++
++ /* Pointer Word 1 */
++ vc4_dlist_write(vc4_state, lower_32_bits(paddr));
++
++ /* Pointer Word 2 */
++ if (base_format_mod != DRM_FORMAT_MOD_BROADCOM_SAND128 &&
++ base_format_mod != DRM_FORMAT_MOD_BROADCOM_SAND256) {
++ vc4_dlist_write(vc4_state,
++ VC4_SET_FIELD(fb->pitches[i],
++ SCALER6_PTR2_PITCH));
++ } else {
++ vc4_dlist_write(vc4_state, pitch0);
++ }
++ }
++
++ /*
++ * Palette Word 0
++ * TODO: We're not using the palette mode
++ */
++
++ /*
++ * Trans Word 0
++ * TODO: It's only relevant if we set the trans_rgb bit in the
++ * control word 0, and we don't at the moment.
++ */
++
++ vc4_state->lbm_offset = 0;
++
++ if (!vc4_state->is_unity || fb->format->is_yuv) {
++ /*
++ * Reserve a slot for the LBM Base Address. The real value will
++ * be set when calling vc4_plane_allocate_lbm().
++ */
++ if (vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
++ vc4_state->y_scaling[1] != VC4_SCALING_NONE) {
++ vc4_state->lbm_offset = vc4_state->dlist_count;
++ vc4_dlist_counter_increment(vc4_state);
++ }
++
++ if (vc4_state->x_scaling[0] != VC4_SCALING_NONE ||
++ vc4_state->x_scaling[1] != VC4_SCALING_NONE ||
++ vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
++ vc4_state->y_scaling[1] != VC4_SCALING_NONE) {
++ if (num_planes > 1)
++ /*
++ * Emit Cb/Cr as channel 0 and Y as channel
++ * 1. This matches how we set up scl0/scl1
++ * above.
++ */
++ vc4_write_scaling_parameters(state, 1);
++
++ vc4_write_scaling_parameters(state, 0);
++ }
++
++ /*
++ * If any PPF setup was done, then all the kernel
++ * pointers get uploaded.
++ */
++ if (vc4_state->x_scaling[0] == VC4_SCALING_PPF ||
++ vc4_state->y_scaling[0] == VC4_SCALING_PPF ||
++ vc4_state->x_scaling[1] == VC4_SCALING_PPF ||
++ vc4_state->y_scaling[1] == VC4_SCALING_PPF) {
++ u32 kernel =
++ VC4_SET_FIELD(vc4->hvs->mitchell_netravali_filter.start,
++ SCALER_PPF_KERNEL_OFFSET);
++
++ /* HPPF plane 0 */
++ vc4_dlist_write(vc4_state, kernel);
++ /* VPPF plane 0 */
++ vc4_dlist_write(vc4_state, kernel);
++ /* HPPF plane 1 */
++ vc4_dlist_write(vc4_state, kernel);
++ /* VPPF plane 1 */
++ vc4_dlist_write(vc4_state, kernel);
++ }
++ }
++
++ vc4_dlist_write(vc4_state, SCALER6_CTL0_END);
++
++ vc4_state->dlist[0] |=
++ VC4_SET_FIELD(vc4_state->dlist_count, SCALER6_CTL0_NEXT);
++
++ /* crtc_* are already clipped coordinates. */
++ covers_screen = vc4_state->crtc_x == 0 && vc4_state->crtc_y == 0 &&
++ vc4_state->crtc_w == state->crtc->mode.hdisplay &&
++ vc4_state->crtc_h == state->crtc->mode.vdisplay;
++
++ /*
++ * Background fill might be necessary when the plane has per-pixel
++ * alpha content or a non-opaque plane alpha and could blend from the
++ * background or does not cover the entire screen.
++ */
++ vc4_state->needs_bg_fill = fb->format->has_alpha || !covers_screen ||
++ state->alpha != DRM_BLEND_ALPHA_OPAQUE;
++
++ /*
++ * Flag the dlist as initialized to avoid checking it twice in case
++ * the async update check already called vc4_plane_mode_set() and
++ * decided to fallback to sync update because async update was not
++ * possible.
++ */
++ vc4_state->dlist_initialized = 1;
++
++ vc4_plane_calc_load(state);
++
++ drm_dbg_driver(drm, "[PLANE:%d:%s] Computed DLIST size: %u\n",
++ plane->base.id, plane->name, vc4_state->dlist_count);
++
++ return 0;
++}
++
+ /* If a modeset involves changing the setup of a plane, the atomic
+ * infrastructure will call this to validate a proposed plane setup.
+ * However, if a plane isn't getting updated, this (and the
+@@ -1365,6 +1984,7 @@ static int vc4_plane_mode_set(struct drm
+ static int vc4_plane_atomic_check(struct drm_plane *plane,
+ struct drm_atomic_state *state)
+ {
++ struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
+ struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state,
+ plane);
+ struct vc4_plane_state *vc4_state = to_vc4_plane_state(new_plane_state);
+@@ -1375,7 +1995,10 @@ static int vc4_plane_atomic_check(struct
+ if (!plane_enabled(new_plane_state))
+ return 0;
+
+- ret = vc4_plane_mode_set(plane, new_plane_state);
++ if (vc4->gen >= VC4_GEN_6)
++ ret = vc6_plane_mode_set(plane, new_plane_state);
++ else
++ ret = vc4_plane_mode_set(plane, new_plane_state);
+ if (ret)
+ return ret;
+
+@@ -1383,6 +2006,12 @@ static int vc4_plane_atomic_check(struct
+ if (ret)
+ return ret;
+
++ if (vc4->gen >= VC4_GEN_6) {
++ ret = vc6_plane_allocate_upm(new_plane_state);
++ if (ret)
++ return ret;
++ }
++
+ return 0;
+ }
+
+@@ -1716,7 +2345,7 @@ struct drm_plane *vc4_plane_init(struct
+ };
+
+ for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) {
+- if (!hvs_formats[i].hvs5_only || vc4->gen == VC4_GEN_5) {
++ if (!hvs_formats[i].hvs5_only || vc4->gen >= VC4_GEN_5) {
+ formats[num_formats] = hvs_formats[i].drm;
+ num_formats++;
+ }
+@@ -1731,7 +2360,7 @@ struct drm_plane *vc4_plane_init(struct
+ return ERR_CAST(vc4_plane);
+ plane = &vc4_plane->base;
+
+- if (vc4->gen == VC4_GEN_5)
++ if (vc4->gen >= VC4_GEN_5)
+ drm_plane_helper_add(plane, &vc5_plane_helper_funcs);
+ else
+ drm_plane_helper_add(plane, &vc4_plane_helper_funcs);
+--- a/drivers/gpu/drm/vc4/vc4_regs.h
++++ b/drivers/gpu/drm/vc4/vc4_regs.h
+@@ -536,6 +536,130 @@
+
+ #define SCALER5_DLIST_START 0x00004000
+
++#define SCALER6_VERSION 0x00000000
++#define SCALER6_CXM_SIZE 0x00000004
++#define SCALER6_LBM_SIZE 0x00000008
++#define SCALER6_UBM_SIZE 0x0000000c
++#define SCALER6_COBA_SIZE 0x00000010
++#define SCALER6_COB_SIZE 0x00000014
++
++#define SCALER6_CONTROL 0x00000020
++# define SCALER6_CONTROL_HVS_EN BIT(31)
++# define SCALER6_CONTROL_PF_LINES_MASK VC4_MASK(22, 18)
++# define SCALER6_CONTROL_ABORT_ON_EMPTY BIT(16)
++# define SCALER6_CONTROL_DSP1_TARGET_MASK VC4_MASK(13, 12)
++# define SCALER6_CONTROL_MAX_REQS_MASK VC4_MASK(7, 4)
++
++#define SCALER6_FETCHER_STATUS 0x00000024
++#define SCALER6_FETCH_STATUS 0x00000028
++#define SCALER6_HANDLE_ERROR 0x0000002c
++
++#define SCALER6_DISP0_CTRL0 0x00000030
++#define SCALER6_DISPX_CTRL0(x) \
++ (SCALER6_DISP0_CTRL0 + ((x) * (SCALER6_DISP1_CTRL0 - SCALER6_DISP0_CTRL0)))
++# define SCALER6_DISPX_CTRL0_ENB BIT(31)
++# define SCALER6_DISPX_CTRL0_RESET BIT(30)
++# define SCALER6_DISPX_CTRL0_FWIDTH_MASK VC4_MASK(28, 16)
++# define SCALER6_DISPX_CTRL0_ONESHOT BIT(15)
++# define SCALER6_DISPX_CTRL0_ONECTX_MASK VC4_MASK(14, 13)
++# define SCALER6_DISPX_CTRL0_LINES_MASK VC4_MASK(12, 0)
++
++#define SCALER6_DISP0_CTRL1 0x00000034
++#define SCALER6_DISPX_CTRL1(x) \
++ (SCALER6_DISP0_CTRL1 + ((x) * (SCALER6_DISP1_CTRL1 - SCALER6_DISP0_CTRL1)))
++# define SCALER6_DISPX_CTRL1_BGENB BIT(8)
++# define SCALER6_DISPX_CTRL1_INTLACE BIT(0)
++
++#define SCALER6_DISP0_BGND 0x00000038
++#define SCALER6_DISPX_BGND(x) \
++ (SCALER6_DISP0_BGND + ((x) * (SCALER6_DISP1_BGND - SCALER6_DISP0_BGND)))
++
++#define SCALER6_DISP0_LPTRS 0x0000003c
++#define SCALER6_DISPX_LPTRS(x) \
++ (SCALER6_DISP0_LPTRS + ((x) * (SCALER6_DISP1_LPTRS - SCALER6_DISP0_LPTRS)))
++# define SCALER6_DISPX_LPTRS_HEADE_MASK VC4_MASK(11, 0)
++
++#define SCALER6_DISP0_COB 0x00000040
++#define SCALER6_DISPX_COB(x) \
++ (SCALER6_DISP0_COB + ((x) * (SCALER6_DISP1_COB - SCALER6_DISP0_COB)))
++# define SCALER6_DISPX_COB_TOP_MASK VC4_MASK(31, 16)
++# define SCALER6_DISPX_COB_BASE_MASK VC4_MASK(15, 0)
++
++#define SCALER6_DISP0_STATUS 0x00000044
++
++#define SCALER6_DISPX_STATUS(x) \
++ (SCALER6_DISP0_STATUS + ((x) * (SCALER6_DISP1_STATUS - SCALER6_DISP0_STATUS)))
++# define SCALER6_DISPX_STATUS_EMPTY BIT(22)
++# define SCALER6_DISPX_STATUS_FRCNT_MASK VC4_MASK(21, 16)
++# define SCALER6_DISPX_STATUS_OFIELD BIT(15)
++# define SCALER6_DISPX_STATUS_MODE_MASK VC4_MASK(14, 13)
++# define SCALER6_DISPX_STATUS_MODE_DISABLED 0
++# define SCALER6_DISPX_STATUS_MODE_INIT 1
++# define SCALER6_DISPX_STATUS_MODE_RUN 2
++# define SCALER6_DISPX_STATUS_MODE_EOF 3
++# define SCALER6_DISPX_STATUS_YLINE_MASK VC4_MASK(12, 0)
++
++#define SCALER6_DISP0_DL 0x00000048
++
++#define SCALER6_DISPX_DL(x) \
++ (SCALER6_DISP0_DL + ((x) * (SCALER6_DISP1_DL - SCALER6_DISP0_DL)))
++# define SCALER6_DISPX_DL_LACT_MASK VC4_MASK(11, 0)
++
++#define SCALER6_DISP0_RUN 0x0000004c
++#define SCALER6_DISP1_CTRL0 0x00000050
++#define SCALER6_DISP1_CTRL1 0x00000054
++#define SCALER6_DISP1_BGND 0x00000058
++#define SCALER6_DISP1_LPTRS 0x0000005c
++#define SCALER6_DISP1_COB 0x00000060
++#define SCALER6_DISP1_STATUS 0x00000064
++#define SCALER6_DISP1_DL 0x00000068
++#define SCALER6_DISP1_RUN 0x0000006c
++#define SCALER6_DISP2_CTRL0 0x00000070
++#define SCALER6_DISP2_CTRL1 0x00000074
++#define SCALER6_DISP2_BGND 0x00000078
++#define SCALER6_DISP2_LPTRS 0x0000007c
++#define SCALER6_DISP2_COB 0x00000080
++#define SCALER6_DISP2_STATUS 0x00000084
++#define SCALER6_DISP2_DL 0x00000088
++#define SCALER6_DISP2_RUN 0x0000008c
++#define SCALER6_EOLN 0x00000090
++#define SCALER6_DL_STATUS 0x00000094
++#define SCALER6_BFG_MISC 0x0000009c
++#define SCALER6_QOS0 0x000000a0
++#define SCALER6_PROF0 0x000000a4
++#define SCALER6_QOS1 0x000000a8
++#define SCALER6_PROF1 0x000000ac
++#define SCALER6_QOS2 0x000000b0
++#define SCALER6_PROF2 0x000000b4
++#define SCALER6_PRI_MAP0 0x000000b8
++#define SCALER6_PRI_MAP1 0x000000bc
++#define SCALER6_HISTCTRL 0x000000c0
++#define SCALER6_HISTBIN0 0x000000c4
++#define SCALER6_HISTBIN1 0x000000c8
++#define SCALER6_HISTBIN2 0x000000cc
++#define SCALER6_HISTBIN3 0x000000d0
++#define SCALER6_HISTBIN4 0x000000d4
++#define SCALER6_HISTBIN5 0x000000d8
++#define SCALER6_HISTBIN6 0x000000dc
++#define SCALER6_HISTBIN7 0x000000e0
++#define SCALER6_HDR_CFG_REMAP 0x000000f4
++#define SCALER6_COL_SPACE 0x000000f8
++#define SCALER6_HVS_ID 0x000000fc
++#define SCALER6_CFC1 0x00000100
++#define SCALER6_DISP_UPM_ISO0 0x00000200
++#define SCALER6_DISP_UPM_ISO1 0x00000204
++#define SCALER6_DISP_UPM_ISO2 0x00000208
++#define SCALER6_DISP_LBM_ISO0 0x0000020c
++#define SCALER6_DISP_LBM_ISO1 0x00000210
++#define SCALER6_DISP_LBM_ISO2 0x00000214
++#define SCALER6_DISP_COB_ISO0 0x00000218
++#define SCALER6_DISP_COB_ISO1 0x0000021c
++#define SCALER6_DISP_COB_ISO2 0x00000220
++#define SCALER6_BAD_COB 0x00000224
++#define SCALER6_BAD_LBM 0x00000228
++#define SCALER6_BAD_UPM 0x0000022c
++#define SCALER6_BAD_AXI 0x00000230
++
+ # define VC4_HDMI_SW_RESET_FORMAT_DETECT BIT(1)
+ # define VC4_HDMI_SW_RESET_HDMI BIT(0)
+
+@@ -1131,4 +1255,61 @@ enum hvs_pixel_format {
+ #define SCALER_PITCH0_TILE_WIDTH_R_MASK VC4_MASK(6, 0)
+ #define SCALER_PITCH0_TILE_WIDTH_R_SHIFT 0
+
++#define SCALER6_CTL0_END BIT(31)
++#define SCALER6_CTL0_VALID BIT(30)
++#define SCALER6_CTL0_NEXT_MASK VC4_MASK(29, 24)
++#define SCALER6_CTL0_RGB_TRANS BIT(23)
++#define SCALER6_CTL0_ADDR_MODE_MASK VC4_MASK(22, 20)
++#define SCALER6_CTL0_ADDR_MODE_LINEAR 0
++#define SCALER6_CTL0_ADDR_MODE_128B 1
++#define SCALER6_CTL0_ADDR_MODE_256B 2
++#define SCALER6_CTL0_ADDR_MODE_MAP8 3
++#define SCALER6_CTL0_ADDR_MODE_UIF 4
++
++#define SCALER6_CTL0_ALPHA_MASK_MASK VC4_MASK(19, 18)
++#define SCALER6_CTL0_UNITY BIT(15)
++#define SCALER6_CTL0_ORDERRGBA_MASK VC4_MASK(14, 13)
++#define SCALER6_CTL0_SCL1_MODE_MASK VC4_MASK(10, 8)
++#define SCALER6_CTL0_SCL0_MODE_MASK VC4_MASK(7, 5)
++#define SCALER6_CTL0_PIXEL_FORMAT_MASK VC4_MASK(4, 0)
++
++#define SCALER6_POS0_START_Y_MASK VC4_MASK(28, 16)
++#define SCALER6_POS0_HFLIP BIT(15)
++#define SCALER6_POS0_START_X_MASK VC4_MASK(12, 0)
++
++#define SCALER6_CTL2_ALPHA_MODE_MASK VC4_MASK(31, 30)
++#define SCALER6_CTL2_ALPHA_PREMULT BIT(29)
++#define SCALER6_CTL2_ALPHA_MIX BIT(28)
++#define SCALER6_CTL2_BFG BIT(26)
++#define SCALER6_CTL2_CSC_ENABLE BIT(25)
++#define SCALER6_CTL2_BRCM_CFC_CONTROL_MASK VC4_MASK(18, 16)
++#define SCALER6_CTL2_ALPHA_MASK VC4_MASK(15, 4)
++
++#define SCALER6_POS1_SCL_LINES_MASK VC4_MASK(28, 16)
++#define SCALER6_POS1_SCL_WIDTH_MASK VC4_MASK(12, 0)
++
++#define SCALER6_POS2_SRC_LINES_MASK VC4_MASK(28, 16)
++#define SCALER6_POS2_SRC_WIDTH_MASK VC4_MASK(12, 0)
++
++#define SCALER6_PTR0_VFLIP BIT(31)
++#define SCALER6_PTR0_UPM_BASE_MASK VC4_MASK(28, 16)
++#define SCALER6_PTR0_UPM_HANDLE_MASK VC4_MASK(14, 10)
++#define SCALER6_PTR0_UPM_BUFF_SIZE_MASK VC4_MASK(9, 8)
++#define SCALER6_PTR0_UPM_BUFF_SIZE_16_LINES 3
++#define SCALER6_PTR0_UPM_BUFF_SIZE_8_LINES 2
++#define SCALER6_PTR0_UPM_BUFF_SIZE_4_LINES 1
++#define SCALER6_PTR0_UPM_BUFF_SIZE_2_LINES 0
++#define SCALER6_PTR0_UPPER_ADDR_MASK VC4_MASK(7, 0)
++
++#define SCALER6_PTR2_ALPHA_BPP_MASK VC4_MASK(31, 31)
++#define SCALER6_PTR2_ALPHA_BPP_1BPP 1
++#define SCALER6_PTR2_ALPHA_BPP_8BPP 0
++#define SCALER6_PTR2_ALPHA_ORDER_MASK VC4_MASK(30, 30)
++#define SCALER6_PTR2_ALPHA_ORDER_MSB_TO_LSB 1
++#define SCALER6_PTR2_ALPHA_ORDER_LSB_TO_MSB 0
++#define SCALER6_PTR2_ALPHA_OFFS_MASK VC4_MASK(29, 27)
++#define SCALER6_PTR2_LSKIP_MASK VC4_MASK(26, 24)
++#define SCALER6_PTR2_PITCH_MASK VC4_MASK(16, 0)
++#define SCALER6_PTR2_FETCH_COUNT_MASK VC4_MASK(26, 16)
++
+ #endif /* VC4_REGS_H */