diff options
Diffstat (limited to 'target/linux/bcm27xx/patches-6.1/950-0963-drm-vc4-hvs-Support-BCM2712-HVS.patch')
-rw-r--r-- | target/linux/bcm27xx/patches-6.1/950-0963-drm-vc4-hvs-Support-BCM2712-HVS.patch | 2139 |
1 files changed, 2139 insertions, 0 deletions
diff --git a/target/linux/bcm27xx/patches-6.1/950-0963-drm-vc4-hvs-Support-BCM2712-HVS.patch b/target/linux/bcm27xx/patches-6.1/950-0963-drm-vc4-hvs-Support-BCM2712-HVS.patch new file mode 100644 index 0000000000..9659432294 --- /dev/null +++ b/target/linux/bcm27xx/patches-6.1/950-0963-drm-vc4-hvs-Support-BCM2712-HVS.patch @@ -0,0 +1,2139 @@ +From e84da235223d0209165183c430692dde5c69854c Mon Sep 17 00:00:00 2001 +From: Maxime Ripard <maxime@cerno.tech> +Date: Fri, 17 Feb 2023 15:25:16 +0100 +Subject: [PATCH] drm/vc4: hvs: Support BCM2712 HVS + +The HVS found in the BCM2712, while having a similar role, is very +different from the one found in the previous SoCs. Indeed, the register +layout is fairly different, and the DLIST format is new as well. + +Let's introduce the needed functions to support the new HVS. + +Signed-off-by: Maxime Ripard <maxime@cerno.tech> +--- + drivers/gpu/drm/vc4/vc4_crtc.c | 47 ++- + drivers/gpu/drm/vc4/vc4_drv.c | 8 +- + drivers/gpu/drm/vc4/vc4_drv.h | 18 + + drivers/gpu/drm/vc4/vc4_hvs.c | 626 ++++++++++++++++++++++++++++--- + drivers/gpu/drm/vc4/vc4_kms.c | 102 ++++- + drivers/gpu/drm/vc4/vc4_plane.c | 641 +++++++++++++++++++++++++++++++- + drivers/gpu/drm/vc4/vc4_regs.h | 181 +++++++++ + 7 files changed, 1540 insertions(+), 83 deletions(-) + +--- a/drivers/gpu/drm/vc4/vc4_crtc.c ++++ b/drivers/gpu/drm/vc4/vc4_crtc.c +@@ -82,13 +82,22 @@ static unsigned int + vc4_crtc_get_cob_allocation(struct vc4_dev *vc4, unsigned int channel) + { + struct vc4_hvs *hvs = vc4->hvs; +- u32 dispbase = HVS_READ(SCALER_DISPBASEX(channel)); ++ u32 dispbase, top, base; ++ + /* Top/base are supposed to be 4-pixel aligned, but the + * Raspberry Pi firmware fills the low bits (which are + * presumably ignored). + */ +- u32 top = VC4_GET_FIELD(dispbase, SCALER_DISPBASEX_TOP) & ~3; +- u32 base = VC4_GET_FIELD(dispbase, SCALER_DISPBASEX_BASE) & ~3; ++ ++ if (vc4->gen >= VC4_GEN_6) { ++ dispbase = HVS_READ(SCALER6_DISPX_COB(channel)); ++ top = VC4_GET_FIELD(dispbase, SCALER6_DISPX_COB_TOP) & ~3; ++ base = VC4_GET_FIELD(dispbase, SCALER6_DISPX_COB_BASE) & ~3; ++ } else { ++ dispbase = HVS_READ(SCALER_DISPBASEX(channel)); ++ top = VC4_GET_FIELD(dispbase, SCALER_DISPBASEX_TOP) & ~3; ++ base = VC4_GET_FIELD(dispbase, SCALER_DISPBASEX_BASE) & ~3; ++ } + + return top - base + 4; + } +@@ -121,7 +130,10 @@ static bool vc4_crtc_get_scanout_positio + * Read vertical scanline which is currently composed for our + * pixelvalve by the HVS, and also the scaler status. + */ +- val = HVS_READ(SCALER_DISPSTATX(channel)); ++ if (vc4->gen >= VC4_GEN_6) ++ val = HVS_READ(SCALER6_DISPX_STATUS(channel)); ++ else ++ val = HVS_READ(SCALER_DISPSTATX(channel)); + + /* Get optional system timestamp after query. */ + if (etime) +@@ -130,7 +142,12 @@ static bool vc4_crtc_get_scanout_positio + /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */ + + /* Vertical position of hvs composed scanline. */ +- *vpos = VC4_GET_FIELD(val, SCALER_DISPSTATX_LINE); ++ ++ if (vc4->gen >= VC4_GEN_6) ++ *vpos = VC4_GET_FIELD(val, SCALER6_DISPX_STATUS_YLINE); ++ else ++ *vpos = VC4_GET_FIELD(val, SCALER_DISPSTATX_LINE); ++ + *hpos = 0; + + if (mode->flags & DRM_MODE_FLAG_INTERLACE) { +@@ -475,8 +492,10 @@ static void require_hvs_enabled(struct d + struct vc4_dev *vc4 = to_vc4_dev(dev); + struct vc4_hvs *hvs = vc4->hvs; + +- WARN_ON_ONCE((HVS_READ(SCALER_DISPCTRL) & SCALER_DISPCTRL_ENABLE) != +- SCALER_DISPCTRL_ENABLE); ++ if (vc4->gen >= VC4_GEN_6) ++ WARN_ON_ONCE(!(HVS_READ(SCALER6_CONTROL) & SCALER6_CONTROL_HVS_EN)); ++ else ++ WARN_ON_ONCE(!(HVS_READ(SCALER_DISPCTRL) & SCALER_DISPCTRL_ENABLE)); + } + + static int vc4_crtc_disable(struct drm_crtc *crtc, +@@ -804,14 +823,21 @@ static void vc4_crtc_handle_page_flip(st + struct drm_device *dev = crtc->dev; + struct vc4_dev *vc4 = to_vc4_dev(dev); + struct vc4_hvs *hvs = vc4->hvs; ++ unsigned int current_dlist; + u32 chan = vc4_crtc->current_hvs_channel; + unsigned long flags; + + spin_lock_irqsave(&dev->event_lock, flags); + spin_lock(&vc4_crtc->irq_lock); ++ ++ if (vc4->gen >= VC4_GEN_6) ++ current_dlist = VC4_GET_FIELD(HVS_READ(SCALER6_DISPX_DL(chan)), ++ SCALER6_DISPX_DL_LACT); ++ else ++ current_dlist = HVS_READ(SCALER_DISPLACTX(chan)); ++ + if (vc4_crtc->event && +- (vc4_crtc->current_dlist == HVS_READ(SCALER_DISPLACTX(chan)) || +- vc4_crtc->feeds_txp)) { ++ (vc4_crtc->current_dlist == current_dlist || vc4_crtc->feeds_txp)) { + drm_crtc_send_vblank_event(crtc, vc4_crtc->event); + vc4_crtc->event = NULL; + drm_crtc_vblank_put(crtc); +@@ -822,7 +848,8 @@ static void vc4_crtc_handle_page_flip(st + * the CRTC and encoder already reconfigured, leading to + * underruns. This can be seen when reconfiguring the CRTC. + */ +- vc4_hvs_unmask_underrun(hvs, chan); ++ if (vc4->gen < VC4_GEN_6) ++ vc4_hvs_unmask_underrun(hvs, chan); + } + spin_unlock(&vc4_crtc->irq_lock); + spin_unlock_irqrestore(&dev->event_lock, flags); +--- a/drivers/gpu/drm/vc4/vc4_drv.c ++++ b/drivers/gpu/drm/vc4/vc4_drv.c +@@ -277,6 +277,7 @@ static const struct of_device_id vc4_dma + { .compatible = "brcm,bcm2711-hvs" }, + { .compatible = "brcm,bcm2835-hvs" }, + { .compatible = "brcm,bcm2711-hvs" }, ++ { .compatible = "brcm,bcm2712-hvs" }, + { .compatible = "raspberrypi,rpi-firmware-kms" }, + { .compatible = "brcm,bcm2835-v3d" }, + { .compatible = "brcm,cygnus-v3d" }, +@@ -308,8 +309,6 @@ static int vc4_drm_bind(struct device *d + enum vc4_gen gen; + int ret = 0; + +- dev->coherent_dma_mask = DMA_BIT_MASK(32); +- + if (of_device_is_compatible(dev->of_node, "brcm,bcm2712-vc6")) + gen = VC4_GEN_6; + else if (of_device_is_compatible(dev->of_node, "brcm,bcm2711-vc5")) +@@ -322,6 +321,11 @@ static int vc4_drm_bind(struct device *d + else + driver = &vc4_drm_driver; + ++ if (gen >= VC4_GEN_6) ++ dma_set_mask_and_coherent(dev, DMA_BIT_MASK(36)); ++ else ++ dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32)); ++ + node = of_find_matching_node_and_match(NULL, vc4_dma_range_matches, + NULL); + if (node) { +--- a/drivers/gpu/drm/vc4/vc4_drv.h ++++ b/drivers/gpu/drm/vc4/vc4_drv.h +@@ -345,8 +345,10 @@ struct vc4_hvs { + unsigned int dlist_mem_size; + + struct clk *core_clk; ++ struct clk *disp_clk; + + struct { ++ unsigned int desc; + unsigned int enabled: 1; + } eof_irq[HVS_NUM_CHANNELS]; + +@@ -358,6 +360,11 @@ struct vc4_hvs { + struct drm_mm dlist_mm; + /* Memory manager for the LBM memory used by HVS scaling. */ + struct drm_mm lbm_mm; ++ ++ /* Memory manager for the UPM memory used for prefetching. */ ++ struct drm_mm upm_mm; ++ struct ida upm_handles; ++ + spinlock_t mm_lock; + + struct list_head stale_dlist_entries; +@@ -382,6 +389,8 @@ struct vc4_hvs { + bool vc5_hdmi_enable_4096by2160; + }; + ++#define HVS_UBM_WORD_SIZE 256 ++ + struct vc4_hvs_state { + struct drm_private_state base; + unsigned long core_clock_rate; +@@ -456,6 +465,15 @@ struct vc4_plane_state { + /* Our allocation in LBM for temporary storage during scaling. */ + struct drm_mm_node lbm; + ++ /* Our allocation in UPM for prefetching. */ ++ struct drm_mm_node upm[DRM_FORMAT_MAX_PLANES]; ++ ++ /* The Unified Pre-Fetcher Handle */ ++ unsigned int upm_handle[DRM_FORMAT_MAX_PLANES]; ++ ++ /* Number of lines to pre-fetch */ ++ unsigned int upm_buffer_lines; ++ + /* Set when the plane has per-pixel alpha content or does not cover + * the entire screen. This is a hint to the CRTC that it might need + * to enable background color fill. +--- a/drivers/gpu/drm/vc4/vc4_hvs.c ++++ b/drivers/gpu/drm/vc4/vc4_hvs.c +@@ -67,6 +67,80 @@ static const struct debugfs_reg32 vc4_hv + VC4_REG32(SCALER_OLEDCOEF2), + }; + ++static const struct debugfs_reg32 vc6_hvs_regs[] = { ++ VC4_REG32(SCALER6_VERSION), ++ VC4_REG32(SCALER6_CXM_SIZE), ++ VC4_REG32(SCALER6_LBM_SIZE), ++ VC4_REG32(SCALER6_UBM_SIZE), ++ VC4_REG32(SCALER6_COBA_SIZE), ++ VC4_REG32(SCALER6_COB_SIZE), ++ VC4_REG32(SCALER6_CONTROL), ++ VC4_REG32(SCALER6_FETCHER_STATUS), ++ VC4_REG32(SCALER6_FETCH_STATUS), ++ VC4_REG32(SCALER6_HANDLE_ERROR), ++ VC4_REG32(SCALER6_DISP0_CTRL0), ++ VC4_REG32(SCALER6_DISP0_CTRL1), ++ VC4_REG32(SCALER6_DISP0_BGND), ++ VC4_REG32(SCALER6_DISP0_LPTRS), ++ VC4_REG32(SCALER6_DISP0_COB), ++ VC4_REG32(SCALER6_DISP0_STATUS), ++ VC4_REG32(SCALER6_DISP0_DL), ++ VC4_REG32(SCALER6_DISP0_RUN), ++ VC4_REG32(SCALER6_DISP1_CTRL0), ++ VC4_REG32(SCALER6_DISP1_CTRL1), ++ VC4_REG32(SCALER6_DISP1_BGND), ++ VC4_REG32(SCALER6_DISP1_LPTRS), ++ VC4_REG32(SCALER6_DISP1_COB), ++ VC4_REG32(SCALER6_DISP1_STATUS), ++ VC4_REG32(SCALER6_DISP1_DL), ++ VC4_REG32(SCALER6_DISP1_RUN), ++ VC4_REG32(SCALER6_DISP2_CTRL0), ++ VC4_REG32(SCALER6_DISP2_CTRL1), ++ VC4_REG32(SCALER6_DISP2_BGND), ++ VC4_REG32(SCALER6_DISP2_LPTRS), ++ VC4_REG32(SCALER6_DISP2_COB), ++ VC4_REG32(SCALER6_DISP2_STATUS), ++ VC4_REG32(SCALER6_DISP2_DL), ++ VC4_REG32(SCALER6_DISP2_RUN), ++ VC4_REG32(SCALER6_EOLN), ++ VC4_REG32(SCALER6_DL_STATUS), ++ VC4_REG32(SCALER6_BFG_MISC), ++ VC4_REG32(SCALER6_QOS0), ++ VC4_REG32(SCALER6_PROF0), ++ VC4_REG32(SCALER6_QOS1), ++ VC4_REG32(SCALER6_PROF1), ++ VC4_REG32(SCALER6_QOS2), ++ VC4_REG32(SCALER6_PROF2), ++ VC4_REG32(SCALER6_PRI_MAP0), ++ VC4_REG32(SCALER6_PRI_MAP1), ++ VC4_REG32(SCALER6_HISTCTRL), ++ VC4_REG32(SCALER6_HISTBIN0), ++ VC4_REG32(SCALER6_HISTBIN1), ++ VC4_REG32(SCALER6_HISTBIN2), ++ VC4_REG32(SCALER6_HISTBIN3), ++ VC4_REG32(SCALER6_HISTBIN4), ++ VC4_REG32(SCALER6_HISTBIN5), ++ VC4_REG32(SCALER6_HISTBIN6), ++ VC4_REG32(SCALER6_HISTBIN7), ++ VC4_REG32(SCALER6_HDR_CFG_REMAP), ++ VC4_REG32(SCALER6_COL_SPACE), ++ VC4_REG32(SCALER6_HVS_ID), ++ VC4_REG32(SCALER6_CFC1), ++ VC4_REG32(SCALER6_DISP_UPM_ISO0), ++ VC4_REG32(SCALER6_DISP_UPM_ISO1), ++ VC4_REG32(SCALER6_DISP_UPM_ISO2), ++ VC4_REG32(SCALER6_DISP_LBM_ISO0), ++ VC4_REG32(SCALER6_DISP_LBM_ISO1), ++ VC4_REG32(SCALER6_DISP_LBM_ISO2), ++ VC4_REG32(SCALER6_DISP_COB_ISO0), ++ VC4_REG32(SCALER6_DISP_COB_ISO1), ++ VC4_REG32(SCALER6_DISP_COB_ISO2), ++ VC4_REG32(SCALER6_BAD_COB), ++ VC4_REG32(SCALER6_BAD_LBM), ++ VC4_REG32(SCALER6_BAD_UPM), ++ VC4_REG32(SCALER6_BAD_AXI), ++}; ++ + void vc4_hvs_dump_state(struct vc4_hvs *hvs) + { + struct drm_device *drm = &hvs->vc4->base; +@@ -145,6 +219,55 @@ static int vc4_hvs_debugfs_dlist(struct + return 0; + } + ++static int vc6_hvs_debugfs_dlist(struct seq_file *m, void *data) ++{ ++ struct drm_info_node *node = m->private; ++ struct drm_device *dev = node->minor->dev; ++ struct vc4_dev *vc4 = to_vc4_dev(dev); ++ struct vc4_hvs *hvs = vc4->hvs; ++ struct drm_printer p = drm_seq_file_printer(m); ++ unsigned int dlist_mem_size = hvs->dlist_mem_size; ++ unsigned int next_entry_start; ++ unsigned int i; ++ ++ for (i = 0; i < SCALER_CHANNELS_COUNT; i++) { ++ unsigned int active_dlist, dispstat; ++ unsigned int j; ++ ++ dispstat = VC4_GET_FIELD(HVS_READ(SCALER6_DISPX_STATUS(i)), ++ SCALER6_DISPX_STATUS_MODE); ++ if (dispstat == SCALER6_DISPX_STATUS_MODE_DISABLED || ++ dispstat == SCALER6_DISPX_STATUS_MODE_EOF) { ++ drm_printf(&p, "HVS chan %u disabled\n", i); ++ continue; ++ } ++ ++ drm_printf(&p, "HVS chan %u:\n", i); ++ ++ active_dlist = VC4_GET_FIELD(HVS_READ(SCALER6_DISPX_DL(i)), ++ SCALER6_DISPX_DL_LACT); ++ next_entry_start = 0; ++ ++ for (j = active_dlist; j < dlist_mem_size; j++) { ++ u32 dlist_word; ++ ++ dlist_word = readl((u32 __iomem *)vc4->hvs->dlist + j); ++ drm_printf(&p, "dlist: %02d: 0x%08x\n", j, ++ dlist_word); ++ if (!next_entry_start || ++ next_entry_start == j) { ++ if (dlist_word & SCALER_CTL0_END) ++ break; ++ next_entry_start = j + ++ VC4_GET_FIELD(dlist_word, ++ SCALER_CTL0_SIZE); ++ } ++ } ++ } ++ ++ return 0; ++} ++ + static int vc5_hvs_debugfs_gamma(struct seq_file *m, void *data) + { + struct drm_info_node *node = m->private; +@@ -435,6 +558,10 @@ static void vc4_hvs_irq_enable_eof(struc + SCALER5_DISPCTRL_DSPEIEOF(channel)); + break; + ++ case VC4_GEN_6: ++ enable_irq(hvs->eof_irq[channel].desc); ++ break; ++ + default: + break; + } +@@ -463,6 +590,10 @@ static void vc4_hvs_irq_clear_eof(struct + ~SCALER5_DISPCTRL_DSPEIEOF(channel)); + break; + ++ case VC4_GEN_6: ++ disable_irq_nosync(hvs->eof_irq[channel].desc); ++ break; ++ + default: + break; + } +@@ -622,26 +753,32 @@ static void vc4_hvs_dlist_free_work(stru + + u8 vc4_hvs_get_fifo_frame_count(struct vc4_hvs *hvs, unsigned int fifo) + { +- struct drm_device *drm = &hvs->vc4->base; ++ struct vc4_dev *vc4 = hvs->vc4; ++ struct drm_device *drm = &vc4->base; + u8 field = 0; + int idx; + + if (!drm_dev_enter(drm, &idx)) + return 0; + +- switch (fifo) { +- case 0: +- field = VC4_GET_FIELD(HVS_READ(SCALER_DISPSTAT1), +- SCALER_DISPSTAT1_FRCNT0); +- break; +- case 1: +- field = VC4_GET_FIELD(HVS_READ(SCALER_DISPSTAT1), +- SCALER_DISPSTAT1_FRCNT1); +- break; +- case 2: +- field = VC4_GET_FIELD(HVS_READ(SCALER_DISPSTAT2), +- SCALER_DISPSTAT2_FRCNT2); +- break; ++ if (vc4->gen >= VC4_GEN_6) { ++ field = VC4_GET_FIELD(HVS_READ(SCALER6_DISPX_STATUS(fifo)), ++ SCALER6_DISPX_STATUS_FRCNT); ++ } else { ++ switch (fifo) { ++ case 0: ++ field = VC4_GET_FIELD(HVS_READ(SCALER_DISPSTAT1), ++ SCALER_DISPSTAT1_FRCNT0); ++ break; ++ case 1: ++ field = VC4_GET_FIELD(HVS_READ(SCALER_DISPSTAT1), ++ SCALER_DISPSTAT1_FRCNT1); ++ break; ++ case 2: ++ field = VC4_GET_FIELD(HVS_READ(SCALER_DISPSTAT2), ++ SCALER_DISPSTAT2_FRCNT2); ++ break; ++ } + } + + drm_dev_exit(idx); +@@ -708,6 +845,23 @@ int vc4_hvs_get_fifo_from_output(struct + default: + return -EPIPE; + } ++ ++ case VC4_GEN_6: ++ switch (output) { ++ case 0: ++ return 0; ++ ++ case 2: ++ return 2; ++ ++ case 1: ++ case 3: ++ case 4: ++ return 1; ++ ++ default: ++ return -EPIPE; ++ } + } + + return -EPIPE; +@@ -782,7 +936,41 @@ static int vc4_hvs_init_channel(struct v + return 0; + } + +-void vc4_hvs_stop_channel(struct vc4_hvs *hvs, unsigned int chan) ++static int vc6_hvs_init_channel(struct vc4_hvs *hvs, struct drm_crtc *crtc, ++ struct drm_display_mode *mode, bool oneshot) ++{ ++ struct vc4_dev *vc4 = hvs->vc4; ++ struct drm_device *drm = &vc4->base; ++ struct vc4_crtc_state *vc4_crtc_state = to_vc4_crtc_state(crtc->state); ++ unsigned int chan = vc4_crtc_state->assigned_channel; ++ bool interlace = mode->flags & DRM_MODE_FLAG_INTERLACE; ++ u32 disp_ctrl1; ++ int idx; ++ ++ if (!drm_dev_enter(drm, &idx)) ++ return -ENODEV; ++ ++ HVS_WRITE(SCALER6_DISPX_CTRL0(chan), SCALER6_DISPX_CTRL0_RESET); ++ ++ disp_ctrl1 = HVS_READ(SCALER6_DISPX_CTRL1(chan)); ++ disp_ctrl1 &= ~SCALER6_DISPX_CTRL1_INTLACE; ++ HVS_WRITE(SCALER6_DISPX_CTRL1(chan), ++ disp_ctrl1 | (interlace ? SCALER6_DISPX_CTRL1_INTLACE : 0)); ++ ++ HVS_WRITE(SCALER6_DISPX_CTRL0(chan), ++ SCALER6_DISPX_CTRL0_ENB | ++ VC4_SET_FIELD(mode->hdisplay - 1, ++ SCALER6_DISPX_CTRL0_FWIDTH) | ++ (oneshot ? SCALER6_DISPX_CTRL0_ONESHOT : 0) | ++ VC4_SET_FIELD(mode->vdisplay - 1, ++ SCALER6_DISPX_CTRL0_LINES)); ++ ++ drm_dev_exit(idx); ++ ++ return 0; ++} ++ ++static void __vc4_hvs_stop_channel(struct vc4_hvs *hvs, unsigned int chan) + { + struct drm_device *drm = &hvs->vc4->base; + int idx; +@@ -813,6 +1001,42 @@ out: + drm_dev_exit(idx); + } + ++static void __vc6_hvs_stop_channel(struct vc4_hvs *hvs, unsigned int chan) ++{ ++ struct vc4_dev *vc4 = hvs->vc4; ++ struct drm_device *drm = &vc4->base; ++ int idx; ++ ++ if (!drm_dev_enter(drm, &idx)) ++ return; ++ ++ if (HVS_READ(SCALER6_DISPX_CTRL0(chan)) & SCALER6_DISPX_CTRL0_ENB) ++ goto out; ++ ++ HVS_WRITE(SCALER6_DISPX_CTRL0(chan), ++ HVS_READ(SCALER6_DISPX_CTRL0(chan)) | SCALER6_DISPX_CTRL0_RESET); ++ ++ HVS_WRITE(SCALER6_DISPX_CTRL0(chan), ++ HVS_READ(SCALER6_DISPX_CTRL0(chan)) & ~SCALER6_DISPX_CTRL0_ENB); ++ ++ WARN_ON_ONCE(VC4_GET_FIELD(HVS_READ(SCALER6_DISPX_STATUS(chan)), ++ SCALER6_DISPX_STATUS_MODE) != ++ SCALER6_DISPX_STATUS_MODE_DISABLED); ++ ++out: ++ drm_dev_exit(idx); ++} ++ ++void vc4_hvs_stop_channel(struct vc4_hvs *hvs, unsigned int chan) ++{ ++ struct vc4_dev *vc4 = hvs->vc4; ++ ++ if (vc4->gen >= VC4_GEN_6) ++ __vc6_hvs_stop_channel(hvs, chan); ++ else ++ __vc4_hvs_stop_channel(hvs, chan); ++} ++ + static int vc4_hvs_gamma_check(struct drm_crtc *crtc, + struct drm_atomic_state *state) + { +@@ -907,8 +1131,14 @@ static void vc4_hvs_install_dlist(struct + return; + + WARN_ON(!vc4_state->mm); +- HVS_WRITE(SCALER_DISPLISTX(vc4_state->assigned_channel), +- vc4_state->mm->mm_node.start); ++ ++ if (vc4->gen >= VC4_GEN_6) ++ HVS_WRITE(SCALER6_DISPX_LPTRS(vc4_state->assigned_channel), ++ VC4_SET_FIELD(vc4_state->mm->mm_node.start, ++ SCALER6_DISPX_LPTRS_HEADE)); ++ else ++ HVS_WRITE(SCALER_DISPLISTX(vc4_state->assigned_channel), ++ vc4_state->mm->mm_node.start); + + drm_dev_exit(idx); + } +@@ -965,7 +1195,11 @@ void vc4_hvs_atomic_enable(struct drm_cr + + vc4_hvs_install_dlist(crtc); + vc4_hvs_update_dlist(crtc); +- vc4_hvs_init_channel(vc4->hvs, crtc, mode, oneshot); ++ ++ if (vc4->gen >= VC4_GEN_6) ++ vc6_hvs_init_channel(vc4->hvs, crtc, mode, oneshot); ++ else ++ vc4_hvs_init_channel(vc4->hvs, crtc, mode, oneshot); + } + + void vc4_hvs_atomic_disable(struct drm_crtc *crtc, +@@ -1052,13 +1286,28 @@ void vc4_hvs_atomic_flush(struct drm_crt + WARN_ON(!vc4_state->mm); + WARN_ON_ONCE(dlist_next - dlist_start != vc4_state->mm->mm_node.size); + +- if (enable_bg_fill) ++ if (enable_bg_fill) { + /* This sets a black background color fill, as is the case + * with other DRM drivers. + */ +- HVS_WRITE(SCALER_DISPBKGNDX(channel), +- HVS_READ(SCALER_DISPBKGNDX(channel)) | +- SCALER_DISPBKGND_FILL); ++ if (vc4->gen >= VC4_GEN_6) ++ HVS_WRITE(SCALER6_DISPX_CTRL1(channel), ++ HVS_READ(SCALER6_DISPX_CTRL1(channel)) | ++ SCALER6_DISPX_CTRL1_BGENB); ++ else ++ HVS_WRITE(SCALER_DISPBKGNDX(channel), ++ HVS_READ(SCALER_DISPBKGNDX(channel)) | ++ SCALER_DISPBKGND_FILL); ++ } else { ++ if (vc4->gen >= VC4_GEN_6) ++ HVS_WRITE(SCALER6_DISPX_CTRL1(channel), ++ HVS_READ(SCALER6_DISPX_CTRL1(channel)) & ++ ~SCALER6_DISPX_CTRL1_BGENB); ++ else ++ HVS_WRITE(SCALER_DISPBKGNDX(channel), ++ HVS_READ(SCALER_DISPBKGNDX(channel)) & ++ ~SCALER_DISPBKGND_FILL); ++ } + + /* Only update DISPLIST if the CRTC was already running and is not + * being disabled. +@@ -1210,6 +1459,27 @@ static irqreturn_t vc4_hvs_irq_handler(i + return irqret; + } + ++static irqreturn_t vc6_hvs_eof_irq_handler(int irq, void *data) ++{ ++ struct drm_device *dev = data; ++ struct vc4_dev *vc4 = to_vc4_dev(dev); ++ struct vc4_hvs *hvs = vc4->hvs; ++ unsigned int i; ++ ++ for (i = 0; i < HVS_NUM_CHANNELS; i++) { ++ if (!hvs->eof_irq[i].enabled) ++ continue; ++ ++ if (hvs->eof_irq[i].desc != irq) ++ continue; ++ ++ vc4_hvs_schedule_dlist_sweep(hvs, i); ++ return IRQ_HANDLED; ++ } ++ ++ return IRQ_NONE; ++} ++ + int vc4_hvs_debugfs_init(struct drm_minor *minor) + { + struct drm_device *drm = minor->dev; +@@ -1232,8 +1502,10 @@ int vc4_hvs_debugfs_init(struct drm_mino + NULL); + } + +- ret = vc4_debugfs_add_file(minor, "hvs_dlists", +- vc4_hvs_debugfs_dlist, NULL); ++ if (vc4->gen >= VC4_GEN_6) ++ ret = vc4_debugfs_add_file(minor, "hvs_dlists", vc6_hvs_debugfs_dlist, NULL); ++ else ++ ret = vc4_debugfs_add_file(minor, "hvs_dlists", vc4_hvs_debugfs_dlist, NULL); + if (ret) + return ret; + +@@ -1256,6 +1528,9 @@ struct vc4_hvs *__vc4_hvs_alloc(struct v + { + struct drm_device *drm = &vc4->base; + struct vc4_hvs *hvs; ++ unsigned int dlist_start; ++ size_t dlist_size; ++ size_t lbm_size; + + hvs = drmm_kzalloc(drm, sizeof(*hvs), GFP_KERNEL); + if (!hvs) +@@ -1270,14 +1545,39 @@ struct vc4_hvs *__vc4_hvs_alloc(struct v + INIT_LIST_HEAD(&hvs->stale_dlist_entries); + INIT_WORK(&hvs->free_dlist_work, vc4_hvs_dlist_free_work); + +- /* Set up the HVS display list memory manager. We never +- * overwrite the setup from the bootloader (just 128b out of +- * our 16K), since we don't want to scramble the screen when +- * transitioning from the firmware's boot setup to runtime. +- */ +- drm_mm_init(&hvs->dlist_mm, +- HVS_BOOTLOADER_DLIST_END, +- (SCALER_DLIST_SIZE >> 2) - HVS_BOOTLOADER_DLIST_END); ++ switch (vc4->gen) { ++ case VC4_GEN_4: ++ case VC4_GEN_5: ++ /* Set up the HVS display list memory manager. We never ++ * overwrite the setup from the bootloader (just 128b ++ * out of our 16K), since we don't want to scramble the ++ * screen when transitioning from the firmware's boot ++ * setup to runtime. ++ */ ++ dlist_start = HVS_BOOTLOADER_DLIST_END; ++ dlist_size = (SCALER_DLIST_SIZE >> 2) - HVS_BOOTLOADER_DLIST_END; ++ break; ++ ++ case VC4_GEN_6: ++ dlist_start = HVS_BOOTLOADER_DLIST_END; ++ ++ /* ++ * If we are running a test, it means that we can't ++ * access a register. Use a plausible size then. ++ */ ++ if (!kunit_get_current_test()) ++ dlist_size = HVS_READ(SCALER6_CXM_SIZE); ++ else ++ dlist_size = 4096; ++ ++ break; ++ ++ default: ++ drm_err(drm, "Unknown VC4 generation: %d", vc4->gen); ++ return ERR_PTR(-ENODEV); ++ } ++ ++ drm_mm_init(&hvs->dlist_mm, dlist_start, dlist_size); + + hvs->dlist_mem_size = dlist_size; + +@@ -1286,12 +1586,46 @@ struct vc4_hvs *__vc4_hvs_alloc(struct v + * between planes when they don't overlap on the screen, but + * for now we just allocate globally. + */ +- if (vc4->gen == VC4_GEN_4) ++ ++ switch (vc4->gen) { ++ case VC4_GEN_4: + /* 48k words of 2x12-bit pixels */ +- drm_mm_init(&hvs->lbm_mm, 0, 48 * 1024); +- else ++ lbm_size = 48 * SZ_1K; ++ break; ++ ++ case VC4_GEN_5: + /* 60k words of 4x12-bit pixels */ +- drm_mm_init(&hvs->lbm_mm, 0, 60 * 1024); ++ lbm_size = 60 * SZ_1K; ++ break; ++ ++ case VC4_GEN_6: ++ /* ++ * If we are running a test, it means that we can't ++ * access a register. Use a plausible size then. ++ */ ++ lbm_size = 1024; ++ break; ++ ++ default: ++ drm_err(drm, "Unknown VC4 generation: %d", vc4->gen); ++ return ERR_PTR(-ENODEV); ++ } ++ ++ drm_mm_init(&hvs->lbm_mm, 0, lbm_size); ++ ++ if (vc4->gen >= VC4_GEN_6) { ++ ida_init(&hvs->upm_handles); ++ ++ /* ++ * NOTE: On BCM2712, the size can also be read through ++ * the SCALER_UBM_SIZE register. We would need to do a ++ * register access though, which we can't do with kunit ++ * that also uses this function to create its mock ++ * device. ++ */ ++ drm_mm_init(&hvs->upm_mm, 0, 1024 * HVS_UBM_WORD_SIZE); ++ } ++ + + vc4->hvs = hvs; + +@@ -1388,10 +1722,124 @@ static int vc4_hvs_hw_init(struct vc4_hv + return 0; + } + ++#define CFC1_N_NL_CSC_CTRL(x) (0xa000 + ((x) * 0x3000)) ++#define CFC1_N_MA_CSC_COEFF_C00(x) (0xa008 + ((x) * 0x3000)) ++#define CFC1_N_MA_CSC_COEFF_C01(x) (0xa00c + ((x) * 0x3000)) ++#define CFC1_N_MA_CSC_COEFF_C02(x) (0xa010 + ((x) * 0x3000)) ++#define CFC1_N_MA_CSC_COEFF_C03(x) (0xa014 + ((x) * 0x3000)) ++#define CFC1_N_MA_CSC_COEFF_C04(x) (0xa018 + ((x) * 0x3000)) ++#define CFC1_N_MA_CSC_COEFF_C10(x) (0xa01c + ((x) * 0x3000)) ++#define CFC1_N_MA_CSC_COEFF_C11(x) (0xa020 + ((x) * 0x3000)) ++#define CFC1_N_MA_CSC_COEFF_C12(x) (0xa024 + ((x) * 0x3000)) ++#define CFC1_N_MA_CSC_COEFF_C13(x) (0xa028 + ((x) * 0x3000)) ++#define CFC1_N_MA_CSC_COEFF_C14(x) (0xa02c + ((x) * 0x3000)) ++#define CFC1_N_MA_CSC_COEFF_C20(x) (0xa030 + ((x) * 0x3000)) ++#define CFC1_N_MA_CSC_COEFF_C21(x) (0xa034 + ((x) * 0x3000)) ++#define CFC1_N_MA_CSC_COEFF_C22(x) (0xa038 + ((x) * 0x3000)) ++#define CFC1_N_MA_CSC_COEFF_C23(x) (0xa03c + ((x) * 0x3000)) ++#define CFC1_N_MA_CSC_COEFF_C24(x) (0xa040 + ((x) * 0x3000)) ++ ++/* 4 S2.22 multiplication factors, and 1 S9.15 addititive element for each of 3 ++ * output components ++ */ ++struct vc6_csc_coeff_entry { ++ u32 csc[3][5]; ++}; ++ ++static const struct vc6_csc_coeff_entry csc_coeffs[2][3] = { ++ [DRM_COLOR_YCBCR_LIMITED_RANGE] = { ++ [DRM_COLOR_YCBCR_BT601] = { ++ .csc = { ++ { 0x004A8542, 0x0, 0x0066254A, 0x0, 0xFF908A0D }, ++ { 0x004A8542, 0xFFE6ED5D, 0xFFCBF856, 0x0, 0x0043C9A3 }, ++ { 0x004A8542, 0x00811A54, 0x0, 0x0, 0xFF759502 } ++ } ++ }, ++ [DRM_COLOR_YCBCR_BT709] = { ++ .csc = { ++ { 0x004A8542, 0x0, 0x0072BC44, 0x0, 0xFF83F312 }, ++ { 0x004A8542, 0xFFF25A22, 0xFFDDE4D0, 0x0, 0x00267064 }, ++ { 0x004A8542, 0x00873197, 0x0, 0x0, 0xFF6F7DC0 } ++ } ++ }, ++ [DRM_COLOR_YCBCR_BT2020] = { ++ .csc = { ++ { 0x004A8542, 0x0, 0x006B4A17, 0x0, 0xFF8B653F }, ++ { 0x004A8542, 0xFFF402D9, 0xFFDDE4D0, 0x0, 0x0024C7AE }, ++ { 0x004A8542, 0x008912CC, 0x0, 0x0, 0xFF6D9C8B } ++ } ++ } ++ }, ++ [DRM_COLOR_YCBCR_FULL_RANGE] = { ++ [DRM_COLOR_YCBCR_BT601] = { ++ .csc = { ++ { 0x00400000, 0x0, 0x0059BA5E, 0x0, 0xFFA645A1 }, ++ { 0x00400000, 0xFFE9F9AC, 0xFFD24B97, 0x0, 0x0043BABB }, ++ { 0x00400000, 0x00716872, 0x0, 0x0, 0xFF8E978D } ++ } ++ }, ++ [DRM_COLOR_YCBCR_BT709] = { ++ .csc = { ++ { 0x00400000, 0x0, 0x0064C985, 0x0, 0xFF9B367A }, ++ { 0x00400000, 0xFFF402E1, 0xFFE20A40, 0x0, 0x0029F2DE }, ++ { 0x00400000, 0x0076C226, 0x0, 0x0, 0xFF893DD9 } ++ } ++ }, ++ [DRM_COLOR_YCBCR_BT2020] = { ++ .csc = { ++ { 0x00400000, 0x0, 0x005E3F14, 0x0, 0xFFA1C0EB }, ++ { 0x00400000, 0xFFF577F6, 0xFFDB580F, 0x0, 0x002F2FFA }, ++ { 0x00400000, 0x007868DB, 0x0, 0x0, 0xFF879724 } ++ } ++ } ++ } ++}; ++ ++static int vc6_hvs_hw_init(struct vc4_hvs *hvs) ++{ ++ const struct vc6_csc_coeff_entry *coeffs; ++ unsigned int i; ++ ++ HVS_WRITE(SCALER6_CONTROL, ++ SCALER6_CONTROL_HVS_EN | ++ VC4_SET_FIELD(8, SCALER6_CONTROL_PF_LINES) | ++ VC4_SET_FIELD(15, SCALER6_CONTROL_MAX_REQS)); ++ ++ /* Set HVS arbiter priority to max */ ++ HVS_WRITE(SCALER6_PRI_MAP0, 0xffffffff); ++ HVS_WRITE(SCALER6_PRI_MAP1, 0xffffffff); ++ ++ for (i = 0; i < 6; i++) { ++ coeffs = &csc_coeffs[i / 3][i % 3]; ++ ++ HVS_WRITE(CFC1_N_MA_CSC_COEFF_C00(i), coeffs->csc[0][0]); ++ HVS_WRITE(CFC1_N_MA_CSC_COEFF_C01(i), coeffs->csc[0][1]); ++ HVS_WRITE(CFC1_N_MA_CSC_COEFF_C02(i), coeffs->csc[0][2]); ++ HVS_WRITE(CFC1_N_MA_CSC_COEFF_C03(i), coeffs->csc[0][3]); ++ HVS_WRITE(CFC1_N_MA_CSC_COEFF_C04(i), coeffs->csc[0][4]); ++ ++ HVS_WRITE(CFC1_N_MA_CSC_COEFF_C10(i), coeffs->csc[1][0]); ++ HVS_WRITE(CFC1_N_MA_CSC_COEFF_C11(i), coeffs->csc[1][1]); ++ HVS_WRITE(CFC1_N_MA_CSC_COEFF_C12(i), coeffs->csc[1][2]); ++ HVS_WRITE(CFC1_N_MA_CSC_COEFF_C13(i), coeffs->csc[1][3]); ++ HVS_WRITE(CFC1_N_MA_CSC_COEFF_C14(i), coeffs->csc[1][4]); ++ ++ HVS_WRITE(CFC1_N_MA_CSC_COEFF_C20(i), coeffs->csc[2][0]); ++ HVS_WRITE(CFC1_N_MA_CSC_COEFF_C21(i), coeffs->csc[2][1]); ++ HVS_WRITE(CFC1_N_MA_CSC_COEFF_C22(i), coeffs->csc[2][2]); ++ HVS_WRITE(CFC1_N_MA_CSC_COEFF_C23(i), coeffs->csc[2][3]); ++ HVS_WRITE(CFC1_N_MA_CSC_COEFF_C24(i), coeffs->csc[2][4]); ++ ++ HVS_WRITE(CFC1_N_NL_CSC_CTRL(i), BIT(15)); ++ } ++ ++ return 0; ++} ++ + static int vc4_hvs_cob_init(struct vc4_hvs *hvs) + { + struct vc4_dev *vc4 = hvs->vc4; +- u32 reg, top; ++ u32 reg, top, base; + + /* + * Recompute Composite Output Buffer (COB) allocations for the +@@ -1452,6 +1900,31 @@ static int vc4_hvs_cob_init(struct vc4_h + HVS_WRITE(SCALER_DISPBASE0, reg); + break; + ++ case VC4_GEN_6: ++ #define VC6_COB_LINE_WIDTH 3840 ++ #define VC6_COB_NUM_LINES 4 ++ reg = 0; ++ top = 3840; ++ ++ HVS_WRITE(SCALER6_DISP2_COB, ++ VC4_SET_FIELD(top, SCALER6_DISPX_COB_TOP) | ++ VC4_SET_FIELD(base, SCALER6_DISPX_COB_BASE)); ++ ++ base = top + 16; ++ top += VC6_COB_LINE_WIDTH * VC6_COB_NUM_LINES; ++ ++ HVS_WRITE(SCALER6_DISP1_COB, ++ VC4_SET_FIELD(top, SCALER6_DISPX_COB_TOP) | ++ VC4_SET_FIELD(base, SCALER6_DISPX_COB_BASE)); ++ ++ base = top + 16; ++ top += VC6_COB_LINE_WIDTH * VC6_COB_NUM_LINES; ++ ++ HVS_WRITE(SCALER6_DISP0_COB, ++ VC4_SET_FIELD(top, SCALER6_DISPX_COB_TOP) | ++ VC4_SET_FIELD(base, SCALER6_DISPX_COB_BASE)); ++ break; ++ + default: + return -EINVAL; + } +@@ -1477,10 +1950,16 @@ static int vc4_hvs_bind(struct device *d + return PTR_ERR(hvs); + + hvs->regset.base = hvs->regs; +- hvs->regset.regs = vc4_hvs_regs; +- hvs->regset.nregs = ARRAY_SIZE(vc4_hvs_regs); + +- if (vc4->gen == VC4_GEN_5) { ++ if (vc4->gen >= VC4_GEN_6) { ++ hvs->regset.regs = vc6_hvs_regs; ++ hvs->regset.nregs = ARRAY_SIZE(vc6_hvs_regs); ++ } else { ++ hvs->regset.regs = vc4_hvs_regs; ++ hvs->regset.nregs = ARRAY_SIZE(vc4_hvs_regs); ++ } ++ ++ if (vc4->gen >= VC4_GEN_5) { + struct rpi_firmware *firmware; + struct device_node *node; + unsigned int max_rate; +@@ -1494,12 +1973,20 @@ static int vc4_hvs_bind(struct device *d + if (!firmware) + return -EPROBE_DEFER; + +- hvs->core_clk = devm_clk_get(&pdev->dev, NULL); ++ hvs->core_clk = devm_clk_get(&pdev->dev, ++ (vc4->gen >= VC4_GEN_6) ? "core" : NULL); + if (IS_ERR(hvs->core_clk)) { + dev_err(&pdev->dev, "Couldn't get core clock\n"); + return PTR_ERR(hvs->core_clk); + } + ++ hvs->disp_clk = devm_clk_get(&pdev->dev, ++ (vc4->gen >= VC4_GEN_6) ? "disp" : NULL); ++ if (IS_ERR(hvs->disp_clk)) { ++ dev_err(&pdev->dev, "Couldn't get disp clock\n"); ++ return PTR_ERR(hvs->disp_clk); ++ } ++ + max_rate = rpi_firmware_clk_get_max_rate(firmware, + RPI_FIRMWARE_CORE_CLK_ID); + rpi_firmware_put(firmware); +@@ -1516,14 +2003,51 @@ static int vc4_hvs_bind(struct device *d + dev_err(&pdev->dev, "Couldn't enable the core clock\n"); + return ret; + } ++ ++ ret = clk_prepare_enable(hvs->disp_clk); ++ if (ret) { ++ dev_err(&pdev->dev, "Couldn't enable the disp clock\n"); ++ return ret; ++ } + } + +- if (vc4->gen == VC4_GEN_4) +- hvs->dlist = hvs->regs + SCALER_DLIST_START; +- else ++ if (vc4->gen >= VC4_GEN_6) { ++ unsigned int i; ++ ++ for (i = 0; i < HVS_NUM_CHANNELS; i++) { ++ char irq_name[16]; ++ int irq; ++ ++ snprintf(irq_name, sizeof(irq_name), "ch%u-eof", i); ++ ++ irq = platform_get_irq_byname(pdev, irq_name); ++ if (irq < 0) { ++ dev_err(&pdev->dev, ++ "Couldn't get %s interrupt: %d\n", ++ irq_name, irq); ++ return irq; ++ } ++ ++ ret = devm_request_irq(&pdev->dev, ++ irq, ++ vc6_hvs_eof_irq_handler, ++ IRQF_NO_AUTOEN, ++ dev_name(&pdev->dev), ++ drm); ++ ++ hvs->eof_irq[i].desc = irq; ++ } ++ } ++ ++ if (vc4->gen >= VC4_GEN_5) + hvs->dlist = hvs->regs + SCALER5_DLIST_START; ++ else ++ hvs->dlist = hvs->regs + SCALER_DLIST_START; + +- ret = vc4_hvs_hw_init(hvs); ++ if (vc4->gen >= VC4_GEN_6) ++ ret = vc6_hvs_hw_init(hvs); ++ else ++ ret = vc4_hvs_hw_init(hvs); + if (ret) + return ret; + +@@ -1540,10 +2064,12 @@ static int vc4_hvs_bind(struct device *d + if (ret) + return ret; + +- ret = devm_request_irq(dev, platform_get_irq(pdev, 0), +- vc4_hvs_irq_handler, 0, "vc4 hvs", drm); +- if (ret) +- return ret; ++ if (vc4->gen < VC4_GEN_6) { ++ ret = devm_request_irq(dev, platform_get_irq(pdev, 0), ++ vc4_hvs_irq_handler, 0, "vc4 hvs", drm); ++ if (ret) ++ return ret; ++ } + + return 0; + } +@@ -1568,6 +2094,7 @@ static void vc4_hvs_unbind(struct device + drm_mm_remove_node(node); + drm_mm_takedown(&vc4->hvs->lbm_mm); + ++ clk_disable_unprepare(hvs->disp_clk); + clk_disable_unprepare(hvs->core_clk); + + vc4->hvs = NULL; +@@ -1591,6 +2118,7 @@ static int vc4_hvs_dev_remove(struct pla + + static const struct of_device_id vc4_hvs_dt_match[] = { + { .compatible = "brcm,bcm2711-hvs" }, ++ { .compatible = "brcm,bcm2712-hvs" }, + { .compatible = "brcm,bcm2835-hvs" }, + {} + }; +--- a/drivers/gpu/drm/vc4/vc4_kms.c ++++ b/drivers/gpu/drm/vc4/vc4_kms.c +@@ -329,17 +329,59 @@ static void vc5_hvs_pv_muxing_commit(str + } + } + ++static void vc6_hvs_pv_muxing_commit(struct vc4_dev *vc4, ++ struct drm_atomic_state *state) ++{ ++ struct vc4_hvs *hvs = vc4->hvs; ++ struct drm_crtc_state *crtc_state; ++ struct drm_crtc *crtc; ++ unsigned int i; ++ ++ WARN_ON_ONCE(vc4->gen != VC4_GEN_6); ++ ++ for_each_new_crtc_in_state(state, crtc, crtc_state, i) { ++ struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc_state); ++ struct vc4_encoder *vc4_encoder; ++ struct drm_encoder *encoder; ++ unsigned char mux; ++ u32 reg; ++ ++ if (!vc4_state->update_muxing) ++ continue; ++ ++ if (vc4_state->assigned_channel != 1) ++ continue; ++ ++ encoder = vc4_get_crtc_encoder(crtc, crtc_state); ++ vc4_encoder = to_vc4_encoder(encoder); ++ switch (vc4_encoder->type) { ++ case VC4_ENCODER_TYPE_HDMI1: ++ mux = 0; ++ break; ++ ++ case VC4_ENCODER_TYPE_TXP: ++ mux = 2; ++ break; ++ ++ default: ++ break; ++ } ++ ++ reg = HVS_READ(SCALER6_CONTROL); ++ HVS_WRITE(SCALER6_CONTROL, ++ (reg & ~SCALER6_CONTROL_DSP1_TARGET_MASK) | ++ VC4_SET_FIELD(mux, SCALER6_CONTROL_DSP1_TARGET)); ++ } ++} ++ + static void vc4_atomic_commit_tail(struct drm_atomic_state *state) + { + struct drm_device *dev = state->dev; + struct vc4_dev *vc4 = to_vc4_dev(dev); + struct vc4_hvs *hvs = vc4->hvs; +- struct drm_crtc_state *new_crtc_state; + struct vc4_hvs_state *new_hvs_state; +- struct drm_crtc *crtc; + struct vc4_hvs_state *old_hvs_state; + unsigned int channel; +- int i; + + old_hvs_state = vc4_hvs_get_old_global_state(state); + if (WARN_ON(IS_ERR(old_hvs_state))) +@@ -349,14 +391,23 @@ static void vc4_atomic_commit_tail(struc + if (WARN_ON(IS_ERR(new_hvs_state))) + return; + +- for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) { +- struct vc4_crtc_state *vc4_crtc_state; ++ if (vc4->gen < VC4_GEN_6) { ++ struct drm_crtc_state *new_crtc_state; ++ struct drm_crtc *crtc; ++ int i; ++ ++ for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) { ++ struct vc4_crtc_state *vc4_crtc_state; + +- if (!new_crtc_state->commit || vc4->firmware_kms) +- continue; ++ if (vc4->firmware_kms) ++ continue; + +- vc4_crtc_state = to_vc4_crtc_state(new_crtc_state); +- vc4_hvs_mask_underrun(hvs, vc4_crtc_state->assigned_channel); ++ if (!new_crtc_state->commit) ++ continue; ++ ++ vc4_crtc_state = to_vc4_crtc_state(new_crtc_state); ++ vc4_hvs_mask_underrun(hvs, vc4_crtc_state->assigned_channel); ++ } + } + + for (channel = 0; channel < HVS_NUM_CHANNELS; channel++) { +@@ -378,7 +429,7 @@ static void vc4_atomic_commit_tail(struc + old_hvs_state->fifo_state[channel].pending_commit = NULL; + } + +- if (vc4->gen == VC4_GEN_5 && !vc4->firmware_kms) { ++ if (vc4->gen >= VC4_GEN_5 && !vc4->firmware_kms) { + unsigned long state_rate = max(old_hvs_state->core_clock_rate, + new_hvs_state->core_clock_rate); + unsigned long core_rate = clamp_t(unsigned long, state_rate, +@@ -391,17 +442,32 @@ static void vc4_atomic_commit_tail(struc + * modeset. + */ + WARN_ON(clk_set_min_rate(hvs->core_clk, core_rate)); ++ WARN_ON(clk_set_min_rate(hvs->disp_clk, core_rate)); + } + + drm_atomic_helper_commit_modeset_disables(dev, state); + +- vc4_ctm_commit(vc4, state); ++ if (vc4->gen <= VC4_GEN_5) ++ vc4_ctm_commit(vc4, state); + + if (!vc4->firmware_kms) { +- if (vc4->gen == VC4_GEN_5) +- vc5_hvs_pv_muxing_commit(vc4, state); +- else ++ switch (vc4->gen) { ++ case VC4_GEN_4: + vc4_hvs_pv_muxing_commit(vc4, state); ++ break; ++ ++ case VC4_GEN_5: ++ vc5_hvs_pv_muxing_commit(vc4, state); ++ break; ++ ++ case VC4_GEN_6: ++ vc6_hvs_pv_muxing_commit(vc4, state); ++ break; ++ ++ default: ++ drm_err(dev, "Unknown VC4 generation: %d", vc4->gen); ++ break; ++ } + } + + drm_atomic_helper_commit_planes(dev, state, +@@ -417,7 +483,7 @@ static void vc4_atomic_commit_tail(struc + + drm_atomic_helper_cleanup_planes(dev, state); + +- if (vc4->gen == VC4_GEN_5 && !vc4->firmware_kms) { ++ if (vc4->gen >= VC4_GEN_5 && !vc4->firmware_kms) { + unsigned long core_rate = min_t(unsigned long, + hvs->max_core_rate, + new_hvs_state->core_clock_rate); +@@ -429,6 +495,7 @@ static void vc4_atomic_commit_tail(struc + * requirements. + */ + WARN_ON(clk_set_min_rate(hvs->core_clk, core_rate)); ++ WARN_ON(clk_set_min_rate(hvs->disp_clk, core_rate)); + + drm_dbg(dev, "Core clock actual rate: %lu Hz\n", + clk_get_rate(hvs->core_clk)); +@@ -1081,7 +1148,10 @@ int vc4_kms_load(struct drm_device *dev) + return ret; + } + +- if (vc4->gen == VC4_GEN_5) { ++ if (vc4->gen >= VC4_GEN_6) { ++ dev->mode_config.max_width = 8192; ++ dev->mode_config.max_height = 8192; ++ } else if (vc4->gen >= VC4_GEN_5) { + dev->mode_config.max_width = 7680; + dev->mode_config.max_height = 7680; + } else { +--- a/drivers/gpu/drm/vc4/vc4_plane.c ++++ b/drivers/gpu/drm/vc4/vc4_plane.c +@@ -279,6 +279,7 @@ static bool plane_enabled(struct drm_pla + static struct drm_plane_state *vc4_plane_duplicate_state(struct drm_plane *plane) + { + struct vc4_plane_state *vc4_state; ++ unsigned int i; + + if (WARN_ON(!plane->state)) + return NULL; +@@ -288,6 +289,11 @@ static struct drm_plane_state *vc4_plane + return NULL; + + memset(&vc4_state->lbm, 0, sizeof(vc4_state->lbm)); ++ memset(&vc4_state->upm, 0, sizeof(vc4_state->upm)); ++ ++ for (i = 0; i < DRM_FORMAT_MAX_PLANES; i++) ++ vc4_state->upm_handle[i] = 0; ++ + vc4_state->dlist_initialized = 0; + + __drm_atomic_helper_plane_duplicate_state(plane, &vc4_state->base); +@@ -310,14 +316,30 @@ static void vc4_plane_destroy_state(stru + struct drm_plane_state *state) + { + struct vc4_dev *vc4 = to_vc4_dev(plane->dev); ++ struct vc4_hvs *hvs = vc4->hvs; + struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); ++ unsigned int i; + + if (drm_mm_node_allocated(&vc4_state->lbm)) { + unsigned long irqflags; + +- spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags); ++ spin_lock_irqsave(&hvs->mm_lock, irqflags); + drm_mm_remove_node(&vc4_state->lbm); +- spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags); ++ spin_unlock_irqrestore(&hvs->mm_lock, irqflags); ++ } ++ ++ for (i = 0; i < DRM_FORMAT_MAX_PLANES; i++) { ++ unsigned long irqflags; ++ ++ if (!drm_mm_node_allocated(&vc4_state->upm[i])) ++ continue; ++ ++ spin_lock_irqsave(&hvs->mm_lock, irqflags); ++ drm_mm_remove_node(&vc4_state->upm[i]); ++ spin_unlock_irqrestore(&hvs->mm_lock, irqflags); ++ ++ if (vc4_state->upm_handle[i] > 0) ++ ida_free(&hvs->upm_handles, vc4_state->upm_handle[i]); + } + + kfree(vc4_state->dlist); +@@ -543,6 +565,11 @@ static void vc4_write_tpz(struct vc4_pla + recip = ~0 / scale; + + vc4_dlist_write(vc4_state, ++ /* ++ * The BCM2712 is lacking BIT(31) compared to ++ * the previous generations, but we don't use ++ * it. ++ */ + VC4_SET_FIELD(scale, SCALER_TPZ0_SCALE) | + VC4_SET_FIELD(0, SCALER_TPZ0_IPHASE)); + vc4_dlist_write(vc4_state, +@@ -590,10 +617,15 @@ static void vc4_write_ppf(struct vc4_pla + vc4_dlist_write(vc4_state, + SCALER_PPF_AGC | + VC4_SET_FIELD(scale, SCALER_PPF_SCALE) | ++ /* ++ * The register layout documentation is slightly ++ * different to setup the phase in the BCM2712, ++ * but they seem equivalent. ++ */ + VC4_SET_FIELD(phase, SCALER_PPF_IPHASE)); + } + +-static u32 vc4_lbm_size(struct drm_plane_state *state) ++static u32 __vc4_lbm_size(struct drm_plane_state *state) + { + struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); + struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev); +@@ -641,6 +673,131 @@ static u32 vc4_lbm_size(struct drm_plane + return lbm; + } + ++static unsigned int vc4_lbm_words_per_component(const struct drm_plane_state *state, ++ unsigned int channel) ++{ ++ struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); ++ ++ switch (vc4_state->y_scaling[channel]) { ++ case VC4_SCALING_PPF: ++ return 4; ++ ++ case VC4_SCALING_TPZ: ++ return 2; ++ ++ default: ++ return 0; ++ } ++} ++ ++static unsigned int vc4_lbm_components(const struct drm_plane_state *state, ++ unsigned int channel) ++{ ++ const struct drm_format_info *info = state->fb->format; ++ struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); ++ ++ if (vc4_state->y_scaling[channel] == VC4_SCALING_NONE) ++ return 0; ++ ++ if (info->is_yuv) ++ return channel ? 2 : 1; ++ ++ if (info->has_alpha) ++ return 4; ++ ++ return 3; ++} ++ ++static unsigned int vc4_lbm_channel_size(const struct drm_plane_state *state, ++ unsigned int channel) ++{ ++ const struct drm_format_info *info = state->fb->format; ++ struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); ++ unsigned int channels_scaled = 0; ++ unsigned int components, words, wpc; ++ unsigned int width, lines; ++ unsigned int i; ++ ++ /* LBM is meant to use the smaller of source or dest width, but there ++ * is a issue with UV scaling that the size required for the second ++ * channel is based on the source width only. ++ */ ++ if (info->hsub > 1 && channel == 1) ++ width = state->src_w >> 16; ++ else ++ width = min(state->src_w >> 16, state->crtc_w); ++ width = round_up(width / info->hsub, 4); ++ ++ wpc = vc4_lbm_words_per_component(state, channel); ++ if (!wpc) ++ return 0; ++ ++ components = vc4_lbm_components(state, channel); ++ if (!components) ++ return 0; ++ ++ if (state->alpha != DRM_BLEND_ALPHA_OPAQUE) ++ components -= 1; ++ ++ words = width * wpc * components; ++ ++ lines = DIV_ROUND_UP(words, 128 / info->hsub); ++ ++ for (i = 0; i < 2; i++) ++ if (vc4_state->y_scaling[channel] != VC4_SCALING_NONE) ++ channels_scaled++; ++ ++ if (channels_scaled == 1) ++ lines = lines / 2; ++ ++ return lines; ++} ++ ++static unsigned int __vc6_lbm_size(const struct drm_plane_state *state) ++{ ++ const struct drm_format_info *info = state->fb->format; ++ ++ if (info->hsub > 1) ++ return max(vc4_lbm_channel_size(state, 0), ++ vc4_lbm_channel_size(state, 1)); ++ else ++ return vc4_lbm_channel_size(state, 0); ++} ++ ++u32 vc4_lbm_size(struct drm_plane_state *state) ++{ ++ struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); ++ struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev); ++ ++ /* LBM is not needed when there's no vertical scaling. */ ++ if (vc4_state->y_scaling[0] == VC4_SCALING_NONE && ++ vc4_state->y_scaling[1] == VC4_SCALING_NONE) ++ return 0; ++ ++ if (vc4->gen >= VC4_GEN_6) ++ return __vc6_lbm_size(state); ++ else ++ return __vc4_lbm_size(state); ++} ++ ++static size_t vc6_upm_size(const struct drm_plane_state *state, ++ unsigned int plane) ++{ ++ struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); ++ unsigned int stride = state->fb->pitches[plane]; ++ ++ /* ++ * TODO: This only works for raster formats, and is sub-optimal ++ * for buffers with a stride aligned on 32 bytes. ++ */ ++ unsigned int words_per_line = (stride + 62) / 32; ++ unsigned int fetch_region_size = words_per_line * 32; ++ unsigned int buffer_lines = 2 << vc4_state->upm_buffer_lines; ++ unsigned int buffer_size = fetch_region_size * buffer_lines; ++ ++ return ALIGN(buffer_size, HVS_UBM_WORD_SIZE); ++} ++ + static void vc4_write_scaling_parameters(struct drm_plane_state *state, + int channel) + { +@@ -744,6 +901,10 @@ static int vc4_plane_allocate_lbm(struct + if (!lbm_size) + return 0; + ++ /* ++ * NOTE: BCM2712 doesn't need to be aligned, since the size ++ * returned by vc4_lbm_size() is in words already. ++ */ + if (vc4->gen == VC4_GEN_5) + lbm_size = ALIGN(lbm_size, 64); + else if (vc4->gen == VC4_GEN_4) +@@ -781,6 +942,57 @@ static int vc4_plane_allocate_lbm(struct + return 0; + } + ++static int vc6_plane_allocate_upm(struct drm_plane_state *state) ++{ ++ const struct drm_format_info *info = state->fb->format; ++ struct drm_device *drm = state->plane->dev; ++ struct vc4_dev *vc4 = to_vc4_dev(drm); ++ struct vc4_hvs *hvs = vc4->hvs; ++ struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); ++ unsigned int i; ++ int ret; ++ ++ WARN_ON_ONCE(vc4->gen < VC4_GEN_6); ++ ++ vc4_state->upm_buffer_lines = SCALER6_PTR0_UPM_BUFF_SIZE_2_LINES; ++ ++ for (i = 0; i < info->num_planes; i++) { ++ unsigned long irqflags; ++ size_t upm_size; ++ ++ upm_size = vc6_upm_size(state, i); ++ if (!upm_size) ++ return -EINVAL; ++ ++ spin_lock_irqsave(&hvs->mm_lock, irqflags); ++ ret = drm_mm_insert_node_generic(&hvs->upm_mm, ++ &vc4_state->upm[i], ++ upm_size, HVS_UBM_WORD_SIZE, ++ 0, 0); ++ spin_unlock_irqrestore(&hvs->mm_lock, irqflags); ++ if (ret) { ++ drm_err(drm, "Failed to allocate UPM entry: %d\n", ret); ++ return ret; ++ } ++ ++ ret = ida_alloc_range(&hvs->upm_handles, 1, 32, GFP_KERNEL); ++ if (ret < 0) ++ return ret; ++ ++ vc4_state->upm_handle[i] = ret; ++ ++ vc4_state->dlist[vc4_state->ptr0_offset[i]] |= ++ VC4_SET_FIELD(vc4_state->upm[i].start / HVS_UBM_WORD_SIZE, ++ SCALER6_PTR0_UPM_BASE) | ++ VC4_SET_FIELD(vc4_state->upm_handle[i] - 1, ++ SCALER6_PTR0_UPM_HANDLE) | ++ VC4_SET_FIELD(vc4_state->upm_buffer_lines, ++ SCALER6_PTR0_UPM_BUFF_SIZE); ++ } ++ ++ return 0; ++} ++ + /* + * The colorspace conversion matrices are held in 3 entries in the dlist. + * Create an array of them, with entries for each full and limited mode, and +@@ -1355,6 +1567,413 @@ static int vc4_plane_mode_set(struct drm + return 0; + } + ++static u32 vc6_plane_get_csc_mode(struct vc4_plane_state *vc4_state) ++{ ++ struct drm_plane_state *state = &vc4_state->base; ++ u32 ret = 0; ++ ++ if (vc4_state->is_yuv) { ++ enum drm_color_encoding color_encoding = state->color_encoding; ++ enum drm_color_range color_range = state->color_range; ++ ++ ret |= SCALER6_CTL2_CSC_ENABLE; ++ ++ /* CSC pre-loaded with: ++ * 0 = BT601 limited range ++ * 1 = BT709 limited range ++ * 2 = BT2020 limited range ++ * 3 = BT601 full range ++ * 4 = BT709 full range ++ * 5 = BT2020 full range ++ */ ++ if (color_encoding > DRM_COLOR_YCBCR_BT2020) ++ color_encoding = DRM_COLOR_YCBCR_BT601; ++ if (color_range > DRM_COLOR_YCBCR_FULL_RANGE) ++ color_range = DRM_COLOR_YCBCR_LIMITED_RANGE; ++ ++ ret |= VC4_SET_FIELD(color_encoding + (color_range * 3), ++ SCALER6_CTL2_BRCM_CFC_CONTROL); ++ } ++ ++ return ret; ++} ++ ++static int vc6_plane_mode_set(struct drm_plane *plane, ++ struct drm_plane_state *state) ++{ ++ struct drm_device *drm = plane->dev; ++ struct vc4_dev *vc4 = to_vc4_dev(drm); ++ struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); ++ struct drm_framebuffer *fb = state->fb; ++ const struct hvs_format *format = vc4_get_hvs_format(fb->format->format); ++ u64 base_format_mod = fourcc_mod_broadcom_mod(fb->modifier); ++ int num_planes = fb->format->num_planes; ++ u32 h_subsample = fb->format->hsub; ++ u32 v_subsample = fb->format->vsub; ++ bool mix_plane_alpha; ++ bool covers_screen; ++ u32 scl0, scl1, pitch0; ++ u32 tiling, src_x, src_y; ++ u32 width, height; ++ u32 hvs_format = format->hvs; ++ u32 offsets[3] = { 0 }; ++ unsigned int rotation; ++ int ret, i; ++ ++ if (vc4_state->dlist_initialized) ++ return 0; ++ ++ ret = vc4_plane_setup_clipping_and_scaling(state); ++ if (ret) ++ return ret; ++ ++ width = vc4_state->src_w[0] >> 16; ++ height = vc4_state->src_h[0] >> 16; ++ ++ /* SCL1 is used for Cb/Cr scaling of planar formats. For RGB ++ * and 4:4:4, scl1 should be set to scl0 so both channels of ++ * the scaler do the same thing. For YUV, the Y plane needs ++ * to be put in channel 1 and Cb/Cr in channel 0, so we swap ++ * the scl fields here. ++ */ ++ if (num_planes == 1) { ++ scl0 = vc4_get_scl_field(state, 0); ++ scl1 = scl0; ++ } else { ++ scl0 = vc4_get_scl_field(state, 1); ++ scl1 = vc4_get_scl_field(state, 0); ++ } ++ ++ rotation = drm_rotation_simplify(state->rotation, ++ DRM_MODE_ROTATE_0 | ++ DRM_MODE_REFLECT_X | ++ DRM_MODE_REFLECT_Y); ++ ++ /* We must point to the last line when Y reflection is enabled. */ ++ src_y = vc4_state->src_y >> 16; ++ if (rotation & DRM_MODE_REFLECT_Y) ++ src_y += height - 1; ++ ++ src_x = vc4_state->src_x >> 16; ++ ++ switch (base_format_mod) { ++ case DRM_FORMAT_MOD_LINEAR: ++ tiling = SCALER6_CTL0_ADDR_MODE_LINEAR; ++ ++ /* Adjust the base pointer to the first pixel to be scanned ++ * out. ++ */ ++ for (i = 0; i < num_planes; i++) { ++ offsets[i] += src_y / (i ? v_subsample : 1) * fb->pitches[i]; ++ offsets[i] += src_x / (i ? h_subsample : 1) * fb->format->cpp[i]; ++ } ++ ++ break; ++ ++ case DRM_FORMAT_MOD_BROADCOM_SAND128: ++ case DRM_FORMAT_MOD_BROADCOM_SAND256: { ++ uint32_t param = fourcc_mod_broadcom_param(fb->modifier); ++ u32 components_per_word; ++ u32 starting_offset; ++ u32 fetch_count; ++ ++ if (param > SCALER_TILE_HEIGHT_MASK) { ++ DRM_DEBUG_KMS("SAND height too large (%d)\n", ++ param); ++ return -EINVAL; ++ } ++ ++ if (fb->format->format == DRM_FORMAT_P030) { ++ hvs_format = HVS_PIXEL_FORMAT_YCBCR_10BIT; ++ tiling = SCALER6_CTL0_ADDR_MODE_128B; ++ } else { ++ hvs_format = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE; ++ ++ switch (base_format_mod) { ++ case DRM_FORMAT_MOD_BROADCOM_SAND128: ++ tiling = SCALER6_CTL0_ADDR_MODE_128B; ++ break; ++ case DRM_FORMAT_MOD_BROADCOM_SAND256: ++ tiling = SCALER6_CTL0_ADDR_MODE_256B; ++ break; ++ default: ++ return -EINVAL; ++ } ++ } ++ ++ /* Adjust the base pointer to the first pixel to be scanned ++ * out. ++ * ++ * For P030, y_ptr [31:4] is the 128bit word for the start pixel ++ * y_ptr [3:0] is the pixel (0-11) contained within that 128bit ++ * word that should be taken as the first pixel. ++ * Ditto uv_ptr [31:4] vs [3:0], however [3:0] contains the ++ * element within the 128bit word, eg for pixel 3 the value ++ * should be 6. ++ */ ++ for (i = 0; i < num_planes; i++) { ++ u32 tile_w, tile, x_off, pix_per_tile; ++ ++ if (fb->format->format == DRM_FORMAT_P030) { ++ /* ++ * Spec says: bits [31:4] of the given address ++ * should point to the 128-bit word containing ++ * the desired starting pixel, and bits[3:0] ++ * should be between 0 and 11, indicating which ++ * of the 12-pixels in that 128-bit word is the ++ * first pixel to be used ++ */ ++ u32 remaining_pixels = src_x % 96; ++ u32 aligned = remaining_pixels / 12; ++ u32 last_bits = remaining_pixels % 12; ++ ++ x_off = aligned * 16 + last_bits; ++ tile_w = 128; ++ pix_per_tile = 96; ++ } else { ++ switch (base_format_mod) { ++ case DRM_FORMAT_MOD_BROADCOM_SAND128: ++ tile_w = 128; ++ break; ++ case DRM_FORMAT_MOD_BROADCOM_SAND256: ++ tile_w = 256; ++ break; ++ default: ++ return -EINVAL; ++ } ++ pix_per_tile = tile_w / fb->format->cpp[0]; ++ x_off = (src_x % pix_per_tile) / ++ (i ? h_subsample : 1) * ++ fb->format->cpp[i]; ++ } ++ ++ tile = src_x / pix_per_tile; ++ ++ offsets[i] += param * tile_w * tile; ++ offsets[i] += src_y / (i ? v_subsample : 1) * tile_w; ++ offsets[i] += x_off & ~(i ? 1 : 0); ++ } ++ ++ components_per_word = fb->format->format == DRM_FORMAT_P030 ? 24 : 32; ++ starting_offset = src_x % components_per_word; ++ fetch_count = (width + starting_offset + components_per_word - 1) / ++ components_per_word; ++ ++ pitch0 = VC4_SET_FIELD(param, SCALER6_PTR2_PITCH) | ++ VC4_SET_FIELD(fetch_count - 1, SCALER6_PTR2_FETCH_COUNT); ++ break; ++ } ++ ++ default: ++ DRM_DEBUG_KMS("Unsupported FB tiling flag 0x%16llx", ++ (long long)fb->modifier); ++ return -EINVAL; ++ } ++ ++ /* fetch an extra pixel if we don't actually line up with the left edge. */ ++ if ((vc4_state->src_x & 0xffff) && vc4_state->src_x < (state->fb->width << 16)) ++ width++; ++ ++ /* same for the right side */ ++ if (((vc4_state->src_x + vc4_state->src_w[0]) & 0xffff) && ++ vc4_state->src_x + vc4_state->src_w[0] < (state->fb->width << 16)) ++ width++; ++ ++ /* now for the top */ ++ if ((vc4_state->src_y & 0xffff) && vc4_state->src_y < (state->fb->height << 16)) ++ height++; ++ ++ /* and the bottom */ ++ if (((vc4_state->src_y + vc4_state->src_h[0]) & 0xffff) && ++ vc4_state->src_y + vc4_state->src_h[0] < (state->fb->height << 16)) ++ height++; ++ ++ /* for YUV444 hardware wants double the width, otherwise it doesn't ++ * fetch full width of chroma ++ */ ++ if (format->drm == DRM_FORMAT_YUV444 || format->drm == DRM_FORMAT_YVU444) ++ width <<= 1; ++ ++ /* Don't waste cycles mixing with plane alpha if the set alpha ++ * is opaque or there is no per-pixel alpha information. ++ * In any case we use the alpha property value as the fixed alpha. ++ */ ++ mix_plane_alpha = state->alpha != DRM_BLEND_ALPHA_OPAQUE && ++ fb->format->has_alpha; ++ ++ /* Control Word 0: Scaling Configuration & Element Validity*/ ++ vc4_dlist_write(vc4_state, ++ SCALER6_CTL0_VALID | ++ VC4_SET_FIELD(tiling, SCALER6_CTL0_ADDR_MODE) | ++ VC4_SET_FIELD(0, SCALER6_CTL0_ALPHA_MASK) | ++ (vc4_state->is_unity ? SCALER6_CTL0_UNITY : 0) | ++ VC4_SET_FIELD(format->pixel_order_hvs5, SCALER6_CTL0_ORDERRGBA) | ++ VC4_SET_FIELD(scl1, SCALER6_CTL0_SCL1_MODE) | ++ VC4_SET_FIELD(scl0, SCALER6_CTL0_SCL0_MODE) | ++ VC4_SET_FIELD(hvs_format, SCALER6_CTL0_PIXEL_FORMAT)); ++ ++ /* Position Word 0: Image Position */ ++ vc4_state->pos0_offset = vc4_state->dlist_count; ++ vc4_dlist_write(vc4_state, ++ VC4_SET_FIELD(vc4_state->crtc_y, SCALER6_POS0_START_Y) | ++ (rotation & DRM_MODE_REFLECT_X ? SCALER6_POS0_HFLIP : 0) | ++ VC4_SET_FIELD(vc4_state->crtc_x, SCALER6_POS0_START_X)); ++ ++ /* Control Word 2: Alpha Value & CSC */ ++ vc4_dlist_write(vc4_state, ++ vc6_plane_get_csc_mode(vc4_state) | ++ vc4_hvs5_get_alpha_blend_mode(state) | ++ (mix_plane_alpha ? SCALER6_CTL2_ALPHA_MIX : 0) | ++ VC4_SET_FIELD(state->alpha >> 4, SCALER5_CTL2_ALPHA)); ++ ++ /* Position Word 1: Scaled Image Dimensions */ ++ if (!vc4_state->is_unity) ++ vc4_dlist_write(vc4_state, ++ VC4_SET_FIELD(vc4_state->crtc_h - 1, ++ SCALER6_POS1_SCL_LINES) | ++ VC4_SET_FIELD(vc4_state->crtc_w - 1, ++ SCALER6_POS1_SCL_WIDTH)); ++ ++ /* Position Word 2: Source Image Size */ ++ vc4_state->pos2_offset = vc4_state->dlist_count; ++ vc4_dlist_write(vc4_state, ++ VC4_SET_FIELD(height - 1, ++ SCALER6_POS2_SRC_LINES) | ++ VC4_SET_FIELD(width - 1, ++ SCALER6_POS2_SRC_WIDTH)); ++ ++ /* Position Word 3: Context */ ++ vc4_dlist_write(vc4_state, 0xc0c0c0c0); ++ ++ /* ++ * TODO: This only covers Raster Scan Order planes ++ */ ++ for (i = 0; i < num_planes; i++) { ++ dma_addr_t paddr = drm_fb_dma_get_gem_addr(fb, state, i); ++ ++ paddr += offsets[i]; ++ ++ /* Pointer Word 0 */ ++ vc4_state->ptr0_offset[i] = vc4_state->dlist_count; ++ vc4_dlist_write(vc4_state, ++ (rotation & DRM_MODE_REFLECT_Y ? SCALER6_PTR0_VFLIP : 0) | ++ /* ++ * The UPM buffer will be allocated in ++ * vc6_plane_allocate_upm(). ++ */ ++ VC4_SET_FIELD(upper_32_bits(paddr) & 0xf, ++ SCALER6_PTR0_UPPER_ADDR)); ++ ++ /* Pointer Word 1 */ ++ vc4_dlist_write(vc4_state, lower_32_bits(paddr)); ++ ++ /* Pointer Word 2 */ ++ if (base_format_mod != DRM_FORMAT_MOD_BROADCOM_SAND128 && ++ base_format_mod != DRM_FORMAT_MOD_BROADCOM_SAND256) { ++ vc4_dlist_write(vc4_state, ++ VC4_SET_FIELD(fb->pitches[i], ++ SCALER6_PTR2_PITCH)); ++ } else { ++ vc4_dlist_write(vc4_state, pitch0); ++ } ++ } ++ ++ /* ++ * Palette Word 0 ++ * TODO: We're not using the palette mode ++ */ ++ ++ /* ++ * Trans Word 0 ++ * TODO: It's only relevant if we set the trans_rgb bit in the ++ * control word 0, and we don't at the moment. ++ */ ++ ++ vc4_state->lbm_offset = 0; ++ ++ if (!vc4_state->is_unity || fb->format->is_yuv) { ++ /* ++ * Reserve a slot for the LBM Base Address. The real value will ++ * be set when calling vc4_plane_allocate_lbm(). ++ */ ++ if (vc4_state->y_scaling[0] != VC4_SCALING_NONE || ++ vc4_state->y_scaling[1] != VC4_SCALING_NONE) { ++ vc4_state->lbm_offset = vc4_state->dlist_count; ++ vc4_dlist_counter_increment(vc4_state); ++ } ++ ++ if (vc4_state->x_scaling[0] != VC4_SCALING_NONE || ++ vc4_state->x_scaling[1] != VC4_SCALING_NONE || ++ vc4_state->y_scaling[0] != VC4_SCALING_NONE || ++ vc4_state->y_scaling[1] != VC4_SCALING_NONE) { ++ if (num_planes > 1) ++ /* ++ * Emit Cb/Cr as channel 0 and Y as channel ++ * 1. This matches how we set up scl0/scl1 ++ * above. ++ */ ++ vc4_write_scaling_parameters(state, 1); ++ ++ vc4_write_scaling_parameters(state, 0); ++ } ++ ++ /* ++ * If any PPF setup was done, then all the kernel ++ * pointers get uploaded. ++ */ ++ if (vc4_state->x_scaling[0] == VC4_SCALING_PPF || ++ vc4_state->y_scaling[0] == VC4_SCALING_PPF || ++ vc4_state->x_scaling[1] == VC4_SCALING_PPF || ++ vc4_state->y_scaling[1] == VC4_SCALING_PPF) { ++ u32 kernel = ++ VC4_SET_FIELD(vc4->hvs->mitchell_netravali_filter.start, ++ SCALER_PPF_KERNEL_OFFSET); ++ ++ /* HPPF plane 0 */ ++ vc4_dlist_write(vc4_state, kernel); ++ /* VPPF plane 0 */ ++ vc4_dlist_write(vc4_state, kernel); ++ /* HPPF plane 1 */ ++ vc4_dlist_write(vc4_state, kernel); ++ /* VPPF plane 1 */ ++ vc4_dlist_write(vc4_state, kernel); ++ } ++ } ++ ++ vc4_dlist_write(vc4_state, SCALER6_CTL0_END); ++ ++ vc4_state->dlist[0] |= ++ VC4_SET_FIELD(vc4_state->dlist_count, SCALER6_CTL0_NEXT); ++ ++ /* crtc_* are already clipped coordinates. */ ++ covers_screen = vc4_state->crtc_x == 0 && vc4_state->crtc_y == 0 && ++ vc4_state->crtc_w == state->crtc->mode.hdisplay && ++ vc4_state->crtc_h == state->crtc->mode.vdisplay; ++ ++ /* ++ * Background fill might be necessary when the plane has per-pixel ++ * alpha content or a non-opaque plane alpha and could blend from the ++ * background or does not cover the entire screen. ++ */ ++ vc4_state->needs_bg_fill = fb->format->has_alpha || !covers_screen || ++ state->alpha != DRM_BLEND_ALPHA_OPAQUE; ++ ++ /* ++ * Flag the dlist as initialized to avoid checking it twice in case ++ * the async update check already called vc4_plane_mode_set() and ++ * decided to fallback to sync update because async update was not ++ * possible. ++ */ ++ vc4_state->dlist_initialized = 1; ++ ++ vc4_plane_calc_load(state); ++ ++ drm_dbg_driver(drm, "[PLANE:%d:%s] Computed DLIST size: %u\n", ++ plane->base.id, plane->name, vc4_state->dlist_count); ++ ++ return 0; ++} ++ + /* If a modeset involves changing the setup of a plane, the atomic + * infrastructure will call this to validate a proposed plane setup. + * However, if a plane isn't getting updated, this (and the +@@ -1365,6 +1984,7 @@ static int vc4_plane_mode_set(struct drm + static int vc4_plane_atomic_check(struct drm_plane *plane, + struct drm_atomic_state *state) + { ++ struct vc4_dev *vc4 = to_vc4_dev(plane->dev); + struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, + plane); + struct vc4_plane_state *vc4_state = to_vc4_plane_state(new_plane_state); +@@ -1375,7 +1995,10 @@ static int vc4_plane_atomic_check(struct + if (!plane_enabled(new_plane_state)) + return 0; + +- ret = vc4_plane_mode_set(plane, new_plane_state); ++ if (vc4->gen >= VC4_GEN_6) ++ ret = vc6_plane_mode_set(plane, new_plane_state); ++ else ++ ret = vc4_plane_mode_set(plane, new_plane_state); + if (ret) + return ret; + +@@ -1383,6 +2006,12 @@ static int vc4_plane_atomic_check(struct + if (ret) + return ret; + ++ if (vc4->gen >= VC4_GEN_6) { ++ ret = vc6_plane_allocate_upm(new_plane_state); ++ if (ret) ++ return ret; ++ } ++ + return 0; + } + +@@ -1716,7 +2345,7 @@ struct drm_plane *vc4_plane_init(struct + }; + + for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) { +- if (!hvs_formats[i].hvs5_only || vc4->gen == VC4_GEN_5) { ++ if (!hvs_formats[i].hvs5_only || vc4->gen >= VC4_GEN_5) { + formats[num_formats] = hvs_formats[i].drm; + num_formats++; + } +@@ -1731,7 +2360,7 @@ struct drm_plane *vc4_plane_init(struct + return ERR_CAST(vc4_plane); + plane = &vc4_plane->base; + +- if (vc4->gen == VC4_GEN_5) ++ if (vc4->gen >= VC4_GEN_5) + drm_plane_helper_add(plane, &vc5_plane_helper_funcs); + else + drm_plane_helper_add(plane, &vc4_plane_helper_funcs); +--- a/drivers/gpu/drm/vc4/vc4_regs.h ++++ b/drivers/gpu/drm/vc4/vc4_regs.h +@@ -536,6 +536,130 @@ + + #define SCALER5_DLIST_START 0x00004000 + ++#define SCALER6_VERSION 0x00000000 ++#define SCALER6_CXM_SIZE 0x00000004 ++#define SCALER6_LBM_SIZE 0x00000008 ++#define SCALER6_UBM_SIZE 0x0000000c ++#define SCALER6_COBA_SIZE 0x00000010 ++#define SCALER6_COB_SIZE 0x00000014 ++ ++#define SCALER6_CONTROL 0x00000020 ++# define SCALER6_CONTROL_HVS_EN BIT(31) ++# define SCALER6_CONTROL_PF_LINES_MASK VC4_MASK(22, 18) ++# define SCALER6_CONTROL_ABORT_ON_EMPTY BIT(16) ++# define SCALER6_CONTROL_DSP1_TARGET_MASK VC4_MASK(13, 12) ++# define SCALER6_CONTROL_MAX_REQS_MASK VC4_MASK(7, 4) ++ ++#define SCALER6_FETCHER_STATUS 0x00000024 ++#define SCALER6_FETCH_STATUS 0x00000028 ++#define SCALER6_HANDLE_ERROR 0x0000002c ++ ++#define SCALER6_DISP0_CTRL0 0x00000030 ++#define SCALER6_DISPX_CTRL0(x) \ ++ (SCALER6_DISP0_CTRL0 + ((x) * (SCALER6_DISP1_CTRL0 - SCALER6_DISP0_CTRL0))) ++# define SCALER6_DISPX_CTRL0_ENB BIT(31) ++# define SCALER6_DISPX_CTRL0_RESET BIT(30) ++# define SCALER6_DISPX_CTRL0_FWIDTH_MASK VC4_MASK(28, 16) ++# define SCALER6_DISPX_CTRL0_ONESHOT BIT(15) ++# define SCALER6_DISPX_CTRL0_ONECTX_MASK VC4_MASK(14, 13) ++# define SCALER6_DISPX_CTRL0_LINES_MASK VC4_MASK(12, 0) ++ ++#define SCALER6_DISP0_CTRL1 0x00000034 ++#define SCALER6_DISPX_CTRL1(x) \ ++ (SCALER6_DISP0_CTRL1 + ((x) * (SCALER6_DISP1_CTRL1 - SCALER6_DISP0_CTRL1))) ++# define SCALER6_DISPX_CTRL1_BGENB BIT(8) ++# define SCALER6_DISPX_CTRL1_INTLACE BIT(0) ++ ++#define SCALER6_DISP0_BGND 0x00000038 ++#define SCALER6_DISPX_BGND(x) \ ++ (SCALER6_DISP0_BGND + ((x) * (SCALER6_DISP1_BGND - SCALER6_DISP0_BGND))) ++ ++#define SCALER6_DISP0_LPTRS 0x0000003c ++#define SCALER6_DISPX_LPTRS(x) \ ++ (SCALER6_DISP0_LPTRS + ((x) * (SCALER6_DISP1_LPTRS - SCALER6_DISP0_LPTRS))) ++# define SCALER6_DISPX_LPTRS_HEADE_MASK VC4_MASK(11, 0) ++ ++#define SCALER6_DISP0_COB 0x00000040 ++#define SCALER6_DISPX_COB(x) \ ++ (SCALER6_DISP0_COB + ((x) * (SCALER6_DISP1_COB - SCALER6_DISP0_COB))) ++# define SCALER6_DISPX_COB_TOP_MASK VC4_MASK(31, 16) ++# define SCALER6_DISPX_COB_BASE_MASK VC4_MASK(15, 0) ++ ++#define SCALER6_DISP0_STATUS 0x00000044 ++ ++#define SCALER6_DISPX_STATUS(x) \ ++ (SCALER6_DISP0_STATUS + ((x) * (SCALER6_DISP1_STATUS - SCALER6_DISP0_STATUS))) ++# define SCALER6_DISPX_STATUS_EMPTY BIT(22) ++# define SCALER6_DISPX_STATUS_FRCNT_MASK VC4_MASK(21, 16) ++# define SCALER6_DISPX_STATUS_OFIELD BIT(15) ++# define SCALER6_DISPX_STATUS_MODE_MASK VC4_MASK(14, 13) ++# define SCALER6_DISPX_STATUS_MODE_DISABLED 0 ++# define SCALER6_DISPX_STATUS_MODE_INIT 1 ++# define SCALER6_DISPX_STATUS_MODE_RUN 2 ++# define SCALER6_DISPX_STATUS_MODE_EOF 3 ++# define SCALER6_DISPX_STATUS_YLINE_MASK VC4_MASK(12, 0) ++ ++#define SCALER6_DISP0_DL 0x00000048 ++ ++#define SCALER6_DISPX_DL(x) \ ++ (SCALER6_DISP0_DL + ((x) * (SCALER6_DISP1_DL - SCALER6_DISP0_DL))) ++# define SCALER6_DISPX_DL_LACT_MASK VC4_MASK(11, 0) ++ ++#define SCALER6_DISP0_RUN 0x0000004c ++#define SCALER6_DISP1_CTRL0 0x00000050 ++#define SCALER6_DISP1_CTRL1 0x00000054 ++#define SCALER6_DISP1_BGND 0x00000058 ++#define SCALER6_DISP1_LPTRS 0x0000005c ++#define SCALER6_DISP1_COB 0x00000060 ++#define SCALER6_DISP1_STATUS 0x00000064 ++#define SCALER6_DISP1_DL 0x00000068 ++#define SCALER6_DISP1_RUN 0x0000006c ++#define SCALER6_DISP2_CTRL0 0x00000070 ++#define SCALER6_DISP2_CTRL1 0x00000074 ++#define SCALER6_DISP2_BGND 0x00000078 ++#define SCALER6_DISP2_LPTRS 0x0000007c ++#define SCALER6_DISP2_COB 0x00000080 ++#define SCALER6_DISP2_STATUS 0x00000084 ++#define SCALER6_DISP2_DL 0x00000088 ++#define SCALER6_DISP2_RUN 0x0000008c ++#define SCALER6_EOLN 0x00000090 ++#define SCALER6_DL_STATUS 0x00000094 ++#define SCALER6_BFG_MISC 0x0000009c ++#define SCALER6_QOS0 0x000000a0 ++#define SCALER6_PROF0 0x000000a4 ++#define SCALER6_QOS1 0x000000a8 ++#define SCALER6_PROF1 0x000000ac ++#define SCALER6_QOS2 0x000000b0 ++#define SCALER6_PROF2 0x000000b4 ++#define SCALER6_PRI_MAP0 0x000000b8 ++#define SCALER6_PRI_MAP1 0x000000bc ++#define SCALER6_HISTCTRL 0x000000c0 ++#define SCALER6_HISTBIN0 0x000000c4 ++#define SCALER6_HISTBIN1 0x000000c8 ++#define SCALER6_HISTBIN2 0x000000cc ++#define SCALER6_HISTBIN3 0x000000d0 ++#define SCALER6_HISTBIN4 0x000000d4 ++#define SCALER6_HISTBIN5 0x000000d8 ++#define SCALER6_HISTBIN6 0x000000dc ++#define SCALER6_HISTBIN7 0x000000e0 ++#define SCALER6_HDR_CFG_REMAP 0x000000f4 ++#define SCALER6_COL_SPACE 0x000000f8 ++#define SCALER6_HVS_ID 0x000000fc ++#define SCALER6_CFC1 0x00000100 ++#define SCALER6_DISP_UPM_ISO0 0x00000200 ++#define SCALER6_DISP_UPM_ISO1 0x00000204 ++#define SCALER6_DISP_UPM_ISO2 0x00000208 ++#define SCALER6_DISP_LBM_ISO0 0x0000020c ++#define SCALER6_DISP_LBM_ISO1 0x00000210 ++#define SCALER6_DISP_LBM_ISO2 0x00000214 ++#define SCALER6_DISP_COB_ISO0 0x00000218 ++#define SCALER6_DISP_COB_ISO1 0x0000021c ++#define SCALER6_DISP_COB_ISO2 0x00000220 ++#define SCALER6_BAD_COB 0x00000224 ++#define SCALER6_BAD_LBM 0x00000228 ++#define SCALER6_BAD_UPM 0x0000022c ++#define SCALER6_BAD_AXI 0x00000230 ++ + # define VC4_HDMI_SW_RESET_FORMAT_DETECT BIT(1) + # define VC4_HDMI_SW_RESET_HDMI BIT(0) + +@@ -1131,4 +1255,61 @@ enum hvs_pixel_format { + #define SCALER_PITCH0_TILE_WIDTH_R_MASK VC4_MASK(6, 0) + #define SCALER_PITCH0_TILE_WIDTH_R_SHIFT 0 + ++#define SCALER6_CTL0_END BIT(31) ++#define SCALER6_CTL0_VALID BIT(30) ++#define SCALER6_CTL0_NEXT_MASK VC4_MASK(29, 24) ++#define SCALER6_CTL0_RGB_TRANS BIT(23) ++#define SCALER6_CTL0_ADDR_MODE_MASK VC4_MASK(22, 20) ++#define SCALER6_CTL0_ADDR_MODE_LINEAR 0 ++#define SCALER6_CTL0_ADDR_MODE_128B 1 ++#define SCALER6_CTL0_ADDR_MODE_256B 2 ++#define SCALER6_CTL0_ADDR_MODE_MAP8 3 ++#define SCALER6_CTL0_ADDR_MODE_UIF 4 ++ ++#define SCALER6_CTL0_ALPHA_MASK_MASK VC4_MASK(19, 18) ++#define SCALER6_CTL0_UNITY BIT(15) ++#define SCALER6_CTL0_ORDERRGBA_MASK VC4_MASK(14, 13) ++#define SCALER6_CTL0_SCL1_MODE_MASK VC4_MASK(10, 8) ++#define SCALER6_CTL0_SCL0_MODE_MASK VC4_MASK(7, 5) ++#define SCALER6_CTL0_PIXEL_FORMAT_MASK VC4_MASK(4, 0) ++ ++#define SCALER6_POS0_START_Y_MASK VC4_MASK(28, 16) ++#define SCALER6_POS0_HFLIP BIT(15) ++#define SCALER6_POS0_START_X_MASK VC4_MASK(12, 0) ++ ++#define SCALER6_CTL2_ALPHA_MODE_MASK VC4_MASK(31, 30) ++#define SCALER6_CTL2_ALPHA_PREMULT BIT(29) ++#define SCALER6_CTL2_ALPHA_MIX BIT(28) ++#define SCALER6_CTL2_BFG BIT(26) ++#define SCALER6_CTL2_CSC_ENABLE BIT(25) ++#define SCALER6_CTL2_BRCM_CFC_CONTROL_MASK VC4_MASK(18, 16) ++#define SCALER6_CTL2_ALPHA_MASK VC4_MASK(15, 4) ++ ++#define SCALER6_POS1_SCL_LINES_MASK VC4_MASK(28, 16) ++#define SCALER6_POS1_SCL_WIDTH_MASK VC4_MASK(12, 0) ++ ++#define SCALER6_POS2_SRC_LINES_MASK VC4_MASK(28, 16) ++#define SCALER6_POS2_SRC_WIDTH_MASK VC4_MASK(12, 0) ++ ++#define SCALER6_PTR0_VFLIP BIT(31) ++#define SCALER6_PTR0_UPM_BASE_MASK VC4_MASK(28, 16) ++#define SCALER6_PTR0_UPM_HANDLE_MASK VC4_MASK(14, 10) ++#define SCALER6_PTR0_UPM_BUFF_SIZE_MASK VC4_MASK(9, 8) ++#define SCALER6_PTR0_UPM_BUFF_SIZE_16_LINES 3 ++#define SCALER6_PTR0_UPM_BUFF_SIZE_8_LINES 2 ++#define SCALER6_PTR0_UPM_BUFF_SIZE_4_LINES 1 ++#define SCALER6_PTR0_UPM_BUFF_SIZE_2_LINES 0 ++#define SCALER6_PTR0_UPPER_ADDR_MASK VC4_MASK(7, 0) ++ ++#define SCALER6_PTR2_ALPHA_BPP_MASK VC4_MASK(31, 31) ++#define SCALER6_PTR2_ALPHA_BPP_1BPP 1 ++#define SCALER6_PTR2_ALPHA_BPP_8BPP 0 ++#define SCALER6_PTR2_ALPHA_ORDER_MASK VC4_MASK(30, 30) ++#define SCALER6_PTR2_ALPHA_ORDER_MSB_TO_LSB 1 ++#define SCALER6_PTR2_ALPHA_ORDER_LSB_TO_MSB 0 ++#define SCALER6_PTR2_ALPHA_OFFS_MASK VC4_MASK(29, 27) ++#define SCALER6_PTR2_LSKIP_MASK VC4_MASK(26, 24) ++#define SCALER6_PTR2_PITCH_MASK VC4_MASK(16, 0) ++#define SCALER6_PTR2_FETCH_COUNT_MASK VC4_MASK(26, 16) ++ + #endif /* VC4_REGS_H */ |